From a7ebe090291a218ad781f7239e5ca649ccd6f35b Mon Sep 17 00:00:00 2001 From: Vyacheslav Shults Date: Wed, 6 May 2020 09:54:03 +0300 Subject: [PATCH 01/19] Replace all None by nn.Identity() in all models reset_classifier when False-values num_classes is given. Make small code refactoring --- timm/models/densenet.py | 15 +++-- timm/models/dla.py | 13 ++-- timm/models/dpn.py | 18 ++--- timm/models/efficientnet.py | 34 +++++----- timm/models/gluon_resnet.py | 14 ++-- timm/models/gluon_xception.py | 20 +++--- timm/models/inception_resnet_v2.py | 11 +-- timm/models/inception_v3.py | 5 +- timm/models/inception_v4.py | 14 ++-- timm/models/mobilenetv3.py | 18 ++--- timm/models/nasnet.py | 105 +++++++++++++++-------------- timm/models/pnasnet.py | 9 +-- timm/models/res2net.py | 10 ++- timm/models/resnet.py | 12 ++-- timm/models/selecsls.py | 11 ++- timm/models/senet.py | 12 ++-- timm/models/tresnet.py | 14 ++-- timm/models/xception.py | 11 +-- 18 files changed, 179 insertions(+), 167 deletions(-) diff --git a/timm/models/densenet.py b/timm/models/densenet.py index 4235c0f7..c8be8683 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -2,17 +2,17 @@ This file is a copy of https://github.com/pytorch/vision 'densenet.py' (BSD-3-Clause) with fixed kwargs passthrough and addition of dynamic global avg/max pool. """ +import re from collections import OrderedDict import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -import re +from .registry import register_model __all__ = ['DenseNet'] @@ -85,6 +85,7 @@ class DenseNet(nn.Module): drop_rate (float) - dropout rate after each dense layer num_classes (int) - number of classification classes """ + def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000, in_chans=3, global_pool='avg'): @@ -127,8 +128,11 @@ class DenseNet(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) - self.classifier = nn.Linear( - self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.classifier = nn.Linear(num_features, num_classes) + else: + self.classifier = nn.Identity() def forward_features(self, x): x = self.features(x) @@ -157,7 +161,6 @@ def _filter_pretrained(state_dict): return state_dict - @register_model def densenet121(pretrained=False, num_classes=1000, in_chans=3, **kwargs): r"""Densenet-121 model from diff --git a/timm/models/dla.py b/timm/models/dla.py index a9e81d16..f6820ab9 100644 --- a/timm/models/dla.py +++ b/timm/models/dla.py @@ -11,11 +11,10 @@ import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD - +from .registry import register_model __all__ = ['DLA'] @@ -51,6 +50,7 @@ default_cfgs = { class DlaBasic(nn.Module): """DLA Basic""" + def __init__(self, inplanes, planes, stride=1, dilation=1, **_): super(DlaBasic, self).__init__() self.conv1 = nn.Conv2d( @@ -170,7 +170,7 @@ class DlaBottle2neck(nn.Module): sp = bn(sp) sp = self.relu(sp) spo.append(sp) - if self.scale > 1 : + if self.scale > 1: spo.append(self.pool(spx[-1]) if self.is_first else spx[-1]) out = torch.cat(spo, 1) @@ -304,9 +304,10 @@ class DLA(nn.Module): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) if num_classes: - self.fc = nn.Conv2d(self.num_features * self.global_pool.feat_mult(), num_classes, 1, bias=True) + num_features = self.num_features * self.global_pool.feat_mult() + self.fc = nn.Conv2d(num_features, num_classes, kernel_size=1, bias=True) else: - self.fc = None + self.fc = nn.Identity() def forward_features(self, x): x = self.base_layer(x) diff --git a/timm/models/dpn.py b/timm/models/dpn.py index fd58e516..9c4fafc8 100644 --- a/timm/models/dpn.py +++ b/timm/models/dpn.py @@ -9,16 +9,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import OrderedDict + import torch import torch.nn as nn import torch.nn.functional as F -from collections import OrderedDict -from .registry import register_model +from timm.data import IMAGENET_DPN_MEAN, IMAGENET_DPN_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_DPN_MEAN, IMAGENET_DPN_STD - +from .registry import register_model __all__ = ['DPN'] @@ -218,8 +218,8 @@ class DPN(nn.Module): # Using 1x1 conv for the FC layer to allow the extra pooling scheme self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) - self.classifier = nn.Conv2d( - self.num_features * self.global_pool.feat_mult(), num_classes, kernel_size=1, bias=True) + num_features = self.num_features * self.global_pool.feat_mult() + self.classifier = nn.Conv2d(num_features, num_classes, kernel_size=1, bias=True) def get_classifier(self): return self.classifier @@ -228,10 +228,10 @@ class DPN(nn.Module): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) if num_classes: - self.classifier = nn.Conv2d( - self.num_features * self.global_pool.feat_mult(), num_classes, kernel_size=1, bias=True) + num_features = self.num_features * self.global_pool.feat_mult() + self.classifier = nn.Conv2d(num_features, num_classes, kernel_size=1, bias=True) else: - self.classifier = None + self.classifier = nn.Identity() def forward_features(self, x): return self.features(x) diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py index 92460438..21fbee19 100644 --- a/timm/models/efficientnet.py +++ b/timm/models/efficientnet.py @@ -24,14 +24,12 @@ An implementation of EfficienNet that covers variety of related models with effi Hacked together by Ross Wightman """ +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from .efficientnet_builder import * from .feature_hooks import FeatureHooks -from .registry import register_model from .helpers import load_pretrained, adapt_model_from_file from .layers import SelectAdaptivePool2d -from timm.models.layers import create_conv2d -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD - +from .registry import register_model __all__ = ['EfficientNet'] @@ -373,8 +371,11 @@ class EfficientNet(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) - self.classifier = nn.Linear( - self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.classifier = nn.Linear(num_features, num_classes) + else: + self.classifier = nn.Identity() def forward_features(self, x): x = self.conv_stem(x) @@ -785,13 +786,13 @@ def _gen_efficientnet_condconv( Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/condconv """ arch_def = [ - ['ds_r1_k3_s1_e1_c16_se0.25'], - ['ir_r2_k3_s2_e6_c24_se0.25'], - ['ir_r2_k5_s2_e6_c40_se0.25'], - ['ir_r3_k3_s2_e6_c80_se0.25'], - ['ir_r3_k5_s1_e6_c112_se0.25_cc4'], - ['ir_r4_k5_s2_e6_c192_se0.25_cc4'], - ['ir_r1_k3_s1_e6_c320_se0.25_cc4'], + ['ds_r1_k3_s1_e1_c16_se0.25'], + ['ir_r2_k3_s2_e6_c24_se0.25'], + ['ir_r2_k5_s2_e6_c40_se0.25'], + ['ir_r3_k3_s2_e6_c80_se0.25'], + ['ir_r3_k5_s1_e6_c112_se0.25_cc4'], + ['ir_r4_k5_s2_e6_c192_se0.25_cc4'], + ['ir_r1_k3_s1_e6_c320_se0.25_cc4'], ] # NOTE unlike official impl, this one uses `cc` option where x is the base number of experts for each stage and # the expert_multiplier increases that on a per-model basis as with depth/channel multipliers @@ -1187,6 +1188,7 @@ def efficientnet_cc_b0_8e(pretrained=False, **kwargs): pretrained=pretrained, **kwargs) return model + @register_model def efficientnet_cc_b1_8e(pretrained=False, **kwargs): """ EfficientNet-CondConv-B1 w/ 8 Experts """ @@ -1242,8 +1244,6 @@ def efficientnet_lite4(pretrained=False, **kwargs): return model - - @register_model def efficientnet_b1_pruned(pretrained=False, **kwargs): """ EfficientNet-B1 Pruned. The pruning has been obtained using https://arxiv.org/pdf/2002.08258.pdf """ @@ -1275,8 +1275,6 @@ def efficientnet_b3_pruned(pretrained=False, **kwargs): return model - - @register_model def tf_efficientnet_b0(pretrained=False, **kwargs): """ EfficientNet-B0. Tensorflow compatible variant """ @@ -1619,6 +1617,7 @@ def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs): pretrained=pretrained, **kwargs) return model + @register_model def tf_efficientnet_cc_b1_8e(pretrained=False, **kwargs): """ EfficientNet-CondConv-B1 w/ 8 Experts. Tensorflow compatible variant """ @@ -1764,4 +1763,3 @@ def tf_mixnet_l(pretrained=False, **kwargs): model = _gen_mixnet_m( 'tf_mixnet_l', channel_multiplier=1.3, pretrained=pretrained, **kwargs) return model - diff --git a/timm/models/gluon_resnet.py b/timm/models/gluon_resnet.py index 6ccc4c53..a0bc4bb2 100644 --- a/timm/models/gluon_resnet.py +++ b/timm/models/gluon_resnet.py @@ -3,17 +3,11 @@ This file evolved from https://github.com/pytorch/vision 'resnet.py' with (SE)-R and ports of Gluon variations (https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/resnet.py) by Ross Wightman """ -import math -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .registry import register_model +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained from .layers import SEModule -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD - +from .registry import register_model from .resnet import ResNet, Bottleneck, BasicBlock @@ -202,8 +196,8 @@ def gluon_resnet50_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs) model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans, stem_width=64, stem_type='deep', avg_down=True, **kwargs) model.default_cfg = default_cfg - #if pretrained: - # load_pretrained(model, default_cfg, num_classes, in_chans) + if pretrained: + load_pretrained(model, default_cfg, num_classes, in_chans) return model diff --git a/timm/models/gluon_xception.py b/timm/models/gluon_xception.py index 2fc8e699..0a536b5f 100644 --- a/timm/models/gluon_xception.py +++ b/timm/models/gluon_xception.py @@ -6,15 +6,15 @@ Original PyTorch DeepLab impl: https://github.com/jfzhang95/pytorch-deeplab-xcep Hacked together by Ross Wightman """ -import torch +from collections import OrderedDict + import torch.nn as nn import torch.nn.functional as F -from collections import OrderedDict -from .registry import register_model +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .registry import register_model __all__ = ['Xception65', 'Xception71'] @@ -47,7 +47,6 @@ default_cfgs = { } } - """ PADDING NOTES The original PyTorch and Gluon impl of these models dutifully reproduced the aligned padding added to Tensorflow models for Deeplab. This padding was compensating @@ -223,7 +222,7 @@ class Xception65(nn.Module): norm_layer=norm_layer, norm_kwargs=norm_kwargs, start_with_relu=True, grow_first=True, is_last=True) # Middle flow - self.mid = nn.Sequential(OrderedDict([('block%d' % i, Block( + self.mid = nn.Sequential(OrderedDict([('block%d' % i, Block( 728, 728, num_reps=3, stride=1, dilation=middle_block_dilation, norm_layer=norm_layer, norm_kwargs=norm_kwargs, start_with_relu=True, grow_first=True)) for i in range(4, 20)])) @@ -333,7 +332,7 @@ class Xception71(nn.Module): exit_block_dilations = (2, 4) else: raise NotImplementedError - + # Entry flow self.conv1 = nn.Conv2d(in_chans, 32, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = norm_layer(num_features=32, **norm_kwargs) @@ -394,7 +393,11 @@ class Xception71(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) - self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.fc = nn.Linear(num_features, num_classes) + else: + self.fc = nn.Identity() def forward_features(self, x): # Entry flow @@ -465,4 +468,3 @@ def gluon_xception71(pretrained=False, num_classes=1000, in_chans=3, **kwargs): if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) return model - diff --git a/timm/models/inception_resnet_v2.py b/timm/models/inception_resnet_v2.py index 13ad0e9d..34b14570 100644 --- a/timm/models/inception_resnet_v2.py +++ b/timm/models/inception_resnet_v2.py @@ -6,10 +6,10 @@ import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model +from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD +from .registry import register_model __all__ = ['InceptionResnetV2'] @@ -296,8 +296,11 @@ class InceptionResnetV2(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.num_classes = num_classes - self.classif = nn.Linear( - self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.classif = nn.Linear(num_features, num_classes) + else: + self.classif = nn.Identity() def forward_features(self, x): x = self.conv2d_1a(x) diff --git a/timm/models/inception_v3.py b/timm/models/inception_v3.py index a0ea784f..64d6fe75 100644 --- a/timm/models/inception_v3.py +++ b/timm/models/inception_v3.py @@ -1,7 +1,8 @@ from torchvision.models import Inception3 -from .registry import register_model -from .helpers import load_pretrained + from timm.data import IMAGENET_DEFAULT_STD, IMAGENET_DEFAULT_MEAN, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD +from .helpers import load_pretrained +from .registry import register_model __all__ = [] diff --git a/timm/models/inception_v4.py b/timm/models/inception_v4.py index 16080554..52b5ef47 100644 --- a/timm/models/inception_v4.py +++ b/timm/models/inception_v4.py @@ -6,10 +6,10 @@ import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model +from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD +from .registry import register_model __all__ = ['InceptionV4'] @@ -280,8 +280,11 @@ class InceptionV4(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.num_classes = num_classes - self.last_linear = nn.Linear( - self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.last_linear = nn.Linear(num_features, num_classes) + else: + self.last_linear = nn.Identity() def forward_features(self, x): return self.features(x) @@ -303,6 +306,3 @@ def inception_v4(pretrained=False, num_classes=1000, in_chans=3, **kwargs): if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) return model - - - diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py index 86ca9f7a..e38884b8 100644 --- a/timm/models/mobilenetv3.py +++ b/timm/models/mobilenetv3.py @@ -8,13 +8,13 @@ Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244 Hacked together by Ross Wightman """ +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from .efficientnet_builder import * -from .registry import register_model +from .feature_hooks import FeatureHooks from .helpers import load_pretrained from .layers import SelectAdaptivePool2d, create_conv2d from .layers.activations import HardSwish, hard_sigmoid -from .feature_hooks import FeatureHooks -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD +from .registry import register_model __all__ = ['MobileNetV3'] @@ -76,7 +76,7 @@ class MobileNetV3(nn.Module): channel_multiplier=1.0, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'): super(MobileNetV3, self).__init__() - + self.num_classes = num_classes self.num_features = num_features self.drop_rate = drop_rate @@ -96,7 +96,7 @@ class MobileNetV3(nn.Module): self.blocks = nn.Sequential(*builder(self._in_chs, block_args)) self.feature_info = builder.features self._in_chs = builder.in_chs - + # Head + Pooling self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.conv_head = create_conv2d(self._in_chs, self.num_features, 1, padding=pad_type, bias=head_bias) @@ -120,8 +120,11 @@ class MobileNetV3(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.num_classes = num_classes - self.classifier = nn.Linear( - self.num_features * self.global_pool.feat_mult(), num_classes) if self.num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.classifier = nn.Linear(num_features, num_classes) + else: + self.classifier = nn.Identity() def forward_features(self, x): x = self.conv_stem(x) @@ -397,7 +400,6 @@ def mobilenetv3_small_075(pretrained=False, **kwargs): @register_model def mobilenetv3_small_100(pretrained=False, **kwargs): - print(kwargs) """ MobileNet V3 """ model = _gen_mobilenet_v3('mobilenetv3_small_100', 1.0, pretrained=pretrained, **kwargs) return model diff --git a/timm/models/nasnet.py b/timm/models/nasnet.py index 8847b1de..21d20032 100644 --- a/timm/models/nasnet.py +++ b/timm/models/nasnet.py @@ -2,10 +2,9 @@ import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model from .helpers import load_pretrained from .layers import SelectAdaptivePool2d - +from .registry import register_model __all__ = ['NASNetALarge'] @@ -187,17 +186,17 @@ class CellStem1(nn.Module): self.stem_size = stem_size self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) - self.conv_1x1.add_module('conv', nn.Conv2d(2*self.num_channels, self.num_channels, 1, stride=1, bias=False)) + self.conv_1x1.add_module('conv', nn.Conv2d(2 * self.num_channels, self.num_channels, 1, stride=1, bias=False)) self.conv_1x1.add_module('bn', nn.BatchNorm2d(self.num_channels, eps=0.001, momentum=0.1, affine=True)) self.relu = nn.ReLU() self.path_1 = nn.Sequential() self.path_1.add_module('avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)) - self.path_1.add_module('conv', nn.Conv2d(self.stem_size, self.num_channels//2, 1, stride=1, bias=False)) + self.path_1.add_module('conv', nn.Conv2d(self.stem_size, self.num_channels // 2, 1, stride=1, bias=False)) self.path_2 = nn.ModuleList() self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1))) self.path_2.add_module('avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)) - self.path_2.add_module('conv', nn.Conv2d(self.stem_size, self.num_channels//2, 1, stride=1, bias=False)) + self.path_2.add_module('conv', nn.Conv2d(self.stem_size, self.num_channels // 2, 1, stride=1, bias=False)) self.final_path_bn = nn.BatchNorm2d(self.num_channels, eps=0.001, momentum=0.1, affine=True) @@ -507,50 +506,50 @@ class NASNetALarge(nn.Module): self.cell_stem_0 = CellStem0(self.stem_size, num_channels=channels // (channel_multiplier ** 2)) self.cell_stem_1 = CellStem1(self.stem_size, num_channels=channels // channel_multiplier) - self.cell_0 = FirstCell(in_channels_left=channels, out_channels_left=channels//2, - in_channels_right=2*channels, out_channels_right=channels) - self.cell_1 = NormalCell(in_channels_left=2*channels, out_channels_left=channels, - in_channels_right=6*channels, out_channels_right=channels) - self.cell_2 = NormalCell(in_channels_left=6*channels, out_channels_left=channels, - in_channels_right=6*channels, out_channels_right=channels) - self.cell_3 = NormalCell(in_channels_left=6*channels, out_channels_left=channels, - in_channels_right=6*channels, out_channels_right=channels) - self.cell_4 = NormalCell(in_channels_left=6*channels, out_channels_left=channels, - in_channels_right=6*channels, out_channels_right=channels) - self.cell_5 = NormalCell(in_channels_left=6*channels, out_channels_left=channels, - in_channels_right=6*channels, out_channels_right=channels) - - self.reduction_cell_0 = ReductionCell0(in_channels_left=6*channels, out_channels_left=2*channels, - in_channels_right=6*channels, out_channels_right=2*channels) - - self.cell_6 = FirstCell(in_channels_left=6*channels, out_channels_left=channels, - in_channels_right=8*channels, out_channels_right=2*channels) - self.cell_7 = NormalCell(in_channels_left=8*channels, out_channels_left=2*channels, - in_channels_right=12*channels, out_channels_right=2*channels) - self.cell_8 = NormalCell(in_channels_left=12*channels, out_channels_left=2*channels, - in_channels_right=12*channels, out_channels_right=2*channels) - self.cell_9 = NormalCell(in_channels_left=12*channels, out_channels_left=2*channels, - in_channels_right=12*channels, out_channels_right=2*channels) - self.cell_10 = NormalCell(in_channels_left=12*channels, out_channels_left=2*channels, - in_channels_right=12*channels, out_channels_right=2*channels) - self.cell_11 = NormalCell(in_channels_left=12*channels, out_channels_left=2*channels, - in_channels_right=12*channels, out_channels_right=2*channels) - - self.reduction_cell_1 = ReductionCell1(in_channels_left=12*channels, out_channels_left=4*channels, - in_channels_right=12*channels, out_channels_right=4*channels) - - self.cell_12 = FirstCell(in_channels_left=12*channels, out_channels_left=2*channels, - in_channels_right=16*channels, out_channels_right=4*channels) - self.cell_13 = NormalCell(in_channels_left=16*channels, out_channels_left=4*channels, - in_channels_right=24*channels, out_channels_right=4*channels) - self.cell_14 = NormalCell(in_channels_left=24*channels, out_channels_left=4*channels, - in_channels_right=24*channels, out_channels_right=4*channels) - self.cell_15 = NormalCell(in_channels_left=24*channels, out_channels_left=4*channels, - in_channels_right=24*channels, out_channels_right=4*channels) - self.cell_16 = NormalCell(in_channels_left=24*channels, out_channels_left=4*channels, - in_channels_right=24*channels, out_channels_right=4*channels) - self.cell_17 = NormalCell(in_channels_left=24*channels, out_channels_left=4*channels, - in_channels_right=24*channels, out_channels_right=4*channels) + self.cell_0 = FirstCell(in_channels_left=channels, out_channels_left=channels // 2, + in_channels_right=2 * channels, out_channels_right=channels) + self.cell_1 = NormalCell(in_channels_left=2 * channels, out_channels_left=channels, + in_channels_right=6 * channels, out_channels_right=channels) + self.cell_2 = NormalCell(in_channels_left=6 * channels, out_channels_left=channels, + in_channels_right=6 * channels, out_channels_right=channels) + self.cell_3 = NormalCell(in_channels_left=6 * channels, out_channels_left=channels, + in_channels_right=6 * channels, out_channels_right=channels) + self.cell_4 = NormalCell(in_channels_left=6 * channels, out_channels_left=channels, + in_channels_right=6 * channels, out_channels_right=channels) + self.cell_5 = NormalCell(in_channels_left=6 * channels, out_channels_left=channels, + in_channels_right=6 * channels, out_channels_right=channels) + + self.reduction_cell_0 = ReductionCell0(in_channels_left=6 * channels, out_channels_left=2 * channels, + in_channels_right=6 * channels, out_channels_right=2 * channels) + + self.cell_6 = FirstCell(in_channels_left=6 * channels, out_channels_left=channels, + in_channels_right=8 * channels, out_channels_right=2 * channels) + self.cell_7 = NormalCell(in_channels_left=8 * channels, out_channels_left=2 * channels, + in_channels_right=12 * channels, out_channels_right=2 * channels) + self.cell_8 = NormalCell(in_channels_left=12 * channels, out_channels_left=2 * channels, + in_channels_right=12 * channels, out_channels_right=2 * channels) + self.cell_9 = NormalCell(in_channels_left=12 * channels, out_channels_left=2 * channels, + in_channels_right=12 * channels, out_channels_right=2 * channels) + self.cell_10 = NormalCell(in_channels_left=12 * channels, out_channels_left=2 * channels, + in_channels_right=12 * channels, out_channels_right=2 * channels) + self.cell_11 = NormalCell(in_channels_left=12 * channels, out_channels_left=2 * channels, + in_channels_right=12 * channels, out_channels_right=2 * channels) + + self.reduction_cell_1 = ReductionCell1(in_channels_left=12 * channels, out_channels_left=4 * channels, + in_channels_right=12 * channels, out_channels_right=4 * channels) + + self.cell_12 = FirstCell(in_channels_left=12 * channels, out_channels_left=2 * channels, + in_channels_right=16 * channels, out_channels_right=4 * channels) + self.cell_13 = NormalCell(in_channels_left=16 * channels, out_channels_left=4 * channels, + in_channels_right=24 * channels, out_channels_right=4 * channels) + self.cell_14 = NormalCell(in_channels_left=24 * channels, out_channels_left=4 * channels, + in_channels_right=24 * channels, out_channels_right=4 * channels) + self.cell_15 = NormalCell(in_channels_left=24 * channels, out_channels_left=4 * channels, + in_channels_right=24 * channels, out_channels_right=4 * channels) + self.cell_16 = NormalCell(in_channels_left=24 * channels, out_channels_left=4 * channels, + in_channels_right=24 * channels, out_channels_right=4 * channels) + self.cell_17 = NormalCell(in_channels_left=24 * channels, out_channels_left=4 * channels, + in_channels_right=24 * channels, out_channels_right=4 * channels) self.relu = nn.ReLU() self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) @@ -562,9 +561,11 @@ class NASNetALarge(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) - del self.last_linear - self.last_linear = nn.Linear( - self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.last_linear = nn.Linear(num_features, num_classes) + else: + self.last_linear = nn.Identity() def forward_features(self, x): x_conv0 = self.conv0(x) diff --git a/timm/models/pnasnet.py b/timm/models/pnasnet.py index 64d83e3c..97c2f86d 100644 --- a/timm/models/pnasnet.py +++ b/timm/models/pnasnet.py @@ -6,15 +6,16 @@ """ from __future__ import print_function, division, absolute_import + from collections import OrderedDict import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model from .helpers import load_pretrained from .layers import SelectAdaptivePool2d +from .registry import register_model __all__ = ['PNASNet5Large'] @@ -349,11 +350,11 @@ class PNASNet5Large(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) - del self.last_linear if num_classes: - self.last_linear = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) + num_features = self.num_features * self.global_pool.feat_mult() + self.last_linear = nn.Linear(num_features, num_classes) else: - self.last_linear = None + self.last_linear = nn.Identity() def forward_features(self, x): x_conv_0 = self.conv_0(x) diff --git a/timm/models/res2net.py b/timm/models/res2net.py index 8655776c..3e3882fe 100644 --- a/timm/models/res2net.py +++ b/timm/models/res2net.py @@ -6,13 +6,11 @@ import math import torch import torch.nn as nn -import torch.nn.functional as F -from .resnet import ResNet -from .registry import register_model -from .helpers import load_pretrained -from .layers import SEModule from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .helpers import load_pretrained +from .registry import register_model +from .resnet import ResNet __all__ = [] @@ -105,7 +103,7 @@ class Bottle2neck(nn.Module): sp = bn(sp) sp = self.relu(sp) spo.append(sp) - if self.scale > 1 : + if self.scale > 1: spo.append(self.pool(spx[-1]) if self.is_first else spx[-1]) out = torch.cat(spo, 1) diff --git a/timm/models/resnet.py b/timm/models/resnet.py index 4e865705..430bbb49 100644 --- a/timm/models/resnet.py +++ b/timm/models/resnet.py @@ -10,10 +10,10 @@ import math import torch.nn as nn import torch.nn.functional as F -from .registry import register_model +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained, adapt_model_from_file from .layers import SelectAdaptivePool2d, DropBlock2d, DropPath, AvgPool2dSame, create_attn, BlurPool2d -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .registry import register_model __all__ = ['ResNet', 'BasicBlock', 'Bottleneck'] # model_registry will add each entrypoint fn to this @@ -377,6 +377,7 @@ class ResNet(nn.Module): global_pool : str, default 'avg' Global pooling type. One of 'avg', 'max', 'avgmax', 'catavgmax' """ + def __init__(self, block, layers, num_classes=1000, in_chans=3, cardinality=1, base_width=64, stem_width=64, stem_type='', block_reduce_first=1, down_kernel_size=1, avg_down=False, output_stride=32, @@ -482,8 +483,11 @@ class ResNet(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.num_classes = num_classes - del self.fc - self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.fc = nn.Linear(num_features, num_classes) + else: + self.fc = nn.Identity() def forward_features(self, x): x = self.conv1(x) diff --git a/timm/models/selecsls.py b/timm/models/selecsls.py index 2f369e99..7b7de369 100644 --- a/timm/models/selecsls.py +++ b/timm/models/selecsls.py @@ -9,16 +9,15 @@ https://arxiv.org/abs/1907.00837 Based on ResNet implementation in https://github.com/rwightman/pytorch-image-models and SelecSLS Net implementation in https://github.com/mehtadushy/SelecSLS-Pytorch """ -import math import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .registry import register_model __all__ = ['SelecSLS'] # model_registry will add each entrypoint fn to this @@ -134,11 +133,11 @@ class SelecSLS(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.num_classes = num_classes - del self.fc if num_classes: - self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) + num_features = self.num_features * self.global_pool.feat_mult() + self.fc = nn.Linear(num_features, num_classes) else: - self.fc = None + self.fc = nn.Identity() def forward_features(self, x): x = self.stem(x) diff --git a/timm/models/senet.py b/timm/models/senet.py index efbf4657..8594d14d 100644 --- a/timm/models/senet.py +++ b/timm/models/senet.py @@ -8,16 +8,16 @@ Original model: https://github.com/hujie-frank/SENet ResNet code gently borrowed from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py """ -from collections import OrderedDict import math +from collections import OrderedDict import torch.nn as nn import torch.nn.functional as F -from .registry import register_model +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained from .layers import SelectAdaptivePool2d -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .registry import register_model __all__ = ['SENet'] @@ -369,11 +369,11 @@ class SENet(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.num_classes = num_classes self.avg_pool = SelectAdaptivePool2d(pool_type=global_pool) - del self.last_linear if num_classes: - self.last_linear = nn.Linear(self.num_features * self.avg_pool.feat_mult(), num_classes) + num_features = self.num_features * self.avg_pool.feat_mult() + self.last_linear = nn.Linear(num_features, num_classes) else: - self.last_linear = None + self.last_linear = nn.Identity() def forward_features(self, x): x = self.layer0(x) diff --git a/timm/models/tresnet.py b/timm/models/tresnet.py index 48b3e1de..a4a980b4 100644 --- a/timm/models/tresnet.py +++ b/timm/models/tresnet.py @@ -5,14 +5,16 @@ https://arxiv.org/pdf/2003.13630.pdf Original model: https://github.com/mrT23/TResNet """ +from collections import OrderedDict from functools import partial + import torch import torch.nn as nn import torch.nn.functional as F -from collections import OrderedDict + +from .helpers import load_pretrained from .layers import SpaceToDepthModule, AntiAliasDownsampleLayer, SelectAdaptivePool2d from .registry import register_model -from .helpers import load_pretrained try: from inplace_abn import InPlaceABN @@ -88,7 +90,7 @@ class FastSEModule(nn.Module): def IABN2Float(module: nn.Module) -> nn.Module: - "If `module` is IABN don't use half precision." + """If `module` is IABN don't use half precision.""" if isinstance(module, InPlaceABN): module.float() for child in module.children(): @@ -277,8 +279,10 @@ class TResNet(nn.Module): self.num_classes = num_classes self.head = None if num_classes: - self.head = nn.Sequential(OrderedDict([ - ('fc', nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes))])) + num_features = self.num_features * self.global_pool.feat_mult() + self.head = nn.Sequential(OrderedDict([('fc', nn.Linear(num_features, num_classes))])) + else: + self.head = nn.Sequential(OrderedDict([('fc', nn.Identity())])) def forward_features(self, x): return self.body(x) diff --git a/timm/models/xception.py b/timm/models/xception.py index cb98bbc9..467b42f6 100644 --- a/timm/models/xception.py +++ b/timm/models/xception.py @@ -21,15 +21,13 @@ normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 """ -import math -import torch import torch.nn as nn import torch.nn.functional as F -from .registry import register_model from .helpers import load_pretrained from .layers import SelectAdaptivePool2d +from .registry import register_model __all__ = ['Xception'] @@ -180,8 +178,11 @@ class Xception(nn.Module): def reset_classifier(self, num_classes, global_pool='avg'): self.num_classes = num_classes self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) - del self.fc - self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None + if num_classes: + num_features = self.num_features * self.global_pool.feat_mult() + self.fc = nn.Linear(num_features, num_classes) + else: + self.fc = nn.Identity() def forward_features(self, x): x = self.conv1(x) From 022ed001f3d5bea501386f36c6c9b5f88fae27cb Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 7 May 2020 09:57:48 -0700 Subject: [PATCH 02/19] Update DenseNet to latest in Torchvision (torchscript compat, checkpointing, proper init). Start adding ehanced configurability, stem options... --- timm/models/densenet.py | 285 ++++++++++++++++++++++++++++++---------- 1 file changed, 216 insertions(+), 69 deletions(-) diff --git a/timm/models/densenet.py b/timm/models/densenet.py index c8be8683..4b774c4a 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -8,6 +8,8 @@ from collections import OrderedDict import torch import torch.nn as nn import torch.nn.functional as F +import torch.utils.checkpoint as cp +from torch.jit.annotations import List from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained @@ -28,53 +30,121 @@ def _cfg(url=''): default_cfgs = { 'densenet121': _cfg(url='https://download.pytorch.org/models/densenet121-a639ec97.pth'), + 'densenet121d': _cfg(url=''), + 'densenet121tn': _cfg(url=''), 'densenet169': _cfg(url='https://download.pytorch.org/models/densenet169-b2777c0a.pth'), 'densenet201': _cfg(url='https://download.pytorch.org/models/densenet201-c1103571.pth'), 'densenet161': _cfg(url='https://download.pytorch.org/models/densenet161-8d451a50.pth'), } -class _DenseLayer(nn.Sequential): - def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): +class _DenseLayer(nn.Module): + def __init__(self, num_input_features, growth_rate, bn_size, act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, + drop_rate=0., memory_efficient=False): super(_DenseLayer, self).__init__() - self.add_module('norm1', nn.BatchNorm2d(num_input_features)), - self.add_module('relu1', nn.ReLU(inplace=True)), - self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * - growth_rate, kernel_size=1, stride=1, bias=False)), - self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)), - self.add_module('relu2', nn.ReLU(inplace=True)), - self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate, - kernel_size=3, stride=1, padding=1, bias=False)), - self.drop_rate = drop_rate + self.add_module('norm1', norm_layer(num_input_features)), + self.add_module('relu1', act_layer(inplace=True)), + self.add_module('conv1', nn.Conv2d( + num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)), + self.add_module('norm2', norm_layer(bn_size * growth_rate)), + self.add_module('relu2', act_layer(inplace=True)), + self.add_module('conv2', nn.Conv2d( + bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)), + self.drop_rate = float(drop_rate) + self.memory_efficient = memory_efficient + + def bn_function(self, inputs): + # type: (List[torch.Tensor]) -> torch.Tensor + concated_features = torch.cat(inputs, 1) + bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484 + return bottleneck_output + + # todo: rewrite when torchscript supports any + def any_requires_grad(self, input): + # type: (List[torch.Tensor]) -> bool + for tensor in input: + if tensor.requires_grad: + return True + return False + + @torch.jit.unused # noqa: T484 + def call_checkpoint_bottleneck(self, input): + # type: (List[torch.Tensor]) -> torch.Tensor + def closure(*inputs): + return self.bn_function(*inputs) + + return cp.checkpoint(closure, input) + + @torch.jit._overload_method # noqa: F811 + def forward(self, input): + # type: (List[torch.Tensor]) -> (torch.Tensor) + pass + + @torch.jit._overload_method # noqa: F811 + def forward(self, input): + # type: (torch.Tensor) -> (torch.Tensor) + pass + + # torchscript does not yet support *args, so we overload method + # allowing it to take either a List[Tensor] or single Tensor + def forward(self, input): # noqa: F811 + if isinstance(input, torch.Tensor): + prev_features = [input] + else: + prev_features = input - def forward(self, x): - new_features = super(_DenseLayer, self).forward(x) + if self.memory_efficient and self.any_requires_grad(prev_features): + if torch.jit.is_scripting(): + raise Exception("Memory Efficient not supported in JIT") + bottleneck_output = self.call_checkpoint_bottleneck(prev_features) + else: + bottleneck_output = self.bn_function(prev_features) + + new_features = self.conv2(self.relu2(self.norm2(bottleneck_output))) if self.drop_rate > 0: new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) - return torch.cat([x, new_features], 1) + return new_features -class _DenseBlock(nn.Sequential): - def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): +class _DenseBlock(nn.ModuleDict): + _version = 2 + + def __init__(self, num_layers, num_input_features, bn_size, growth_rate, act_layer=nn.ReLU, + norm_layer=nn.BatchNorm2d, drop_rate=0., memory_efficient=False): super(_DenseBlock, self).__init__() for i in range(num_layers): - layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate) + layer = _DenseLayer( + num_input_features + i * growth_rate, + growth_rate=growth_rate, + bn_size=bn_size, + act_layer=act_layer, + norm_layer=norm_layer, + drop_rate=drop_rate, + memory_efficient=memory_efficient, + ) self.add_module('denselayer%d' % (i + 1), layer) + def forward(self, init_features): + features = [init_features] + for name, layer in self.items(): + new_features = layer(features) + features.append(new_features) + return torch.cat(features, 1) + class _Transition(nn.Sequential): - def __init__(self, num_input_features, num_output_features): + def __init__(self, num_input_features, num_output_features, act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d): super(_Transition, self).__init__() - self.add_module('norm', nn.BatchNorm2d(num_input_features)) - self.add_module('relu', nn.ReLU(inplace=True)) - self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, - kernel_size=1, stride=1, bias=False)) + self.add_module('norm', norm_layer(num_input_features)) + self.add_module('relu', act_layer(inplace=True)) + self.add_module('conv', nn.Conv2d( + num_input_features, num_output_features, kernel_size=1, stride=1, bias=False)) self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) class DenseNet(nn.Module): r"""Densenet-BC model class, based on - `"Densely Connected Convolutional Networks" ` + `"Densely Connected Convolutional Networks" `_ Args: growth_rate (int) - how many filters to add each layer (`k` in paper) @@ -84,44 +154,87 @@ class DenseNet(nn.Module): (i.e. bn_size * k features in the bottleneck layer) drop_rate (float) - dropout rate after each dense layer num_classes (int) - number of classification classes + memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, + but slower. Default: *False*. See `"paper" `_ """ - def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), - num_init_features=64, bn_size=4, drop_rate=0, - num_classes=1000, in_chans=3, global_pool='avg'): + def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, + bn_size=4, stem_type='', num_classes=1000, in_chans=3, global_pool='avg', + act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, aa_layer=None, drop_rate=0, memory_efficient=False): self.num_classes = num_classes self.drop_rate = drop_rate + deep_stem = 'deep' in stem_type super(DenseNet, self).__init__() # First convolution - self.features = nn.Sequential(OrderedDict([ - ('conv0', nn.Conv2d(in_chans, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), - ('norm0', nn.BatchNorm2d(num_init_features)), - ('relu0', nn.ReLU(inplace=True)), - ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), - ])) + if aa_layer is None: + max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + else: + max_pool = nn.Sequential(*[ + nn.MaxPool2d(kernel_size=3, stride=1, padding=1), + aa_layer(channels=self.inplanes, stride=2)]) + if deep_stem: + stem_chs_1 = stem_chs_2 = num_init_features // 2 + if 'tiered' in stem_type: + stem_chs_1 = 3 * (num_init_features // 8) + stem_chs_2 = num_init_features if 'narrow' in stem_type else 6 * (num_init_features // 8) + self.features = nn.Sequential(OrderedDict([ + ('conv0', nn.Conv2d(in_chans, stem_chs_1, 3, stride=2, padding=1, bias=False)), + ('norm0', norm_layer(stem_chs_1)), + ('relu0', act_layer(inplace=True)), + ('conv1', nn.Conv2d(stem_chs_1, stem_chs_2, 3, stride=1, padding=1, bias=False)), + ('norm1', norm_layer(stem_chs_2)), + ('relu1', act_layer(inplace=True)), + ('conv2', nn.Conv2d(stem_chs_2, num_init_features, 3, stride=1, padding=1, bias=False)), + ('norm2', norm_layer(num_init_features)), + ('relu2', act_layer(inplace=True)), + ('pool0', max_pool), + ])) + else: + self.features = nn.Sequential(OrderedDict([ + ('conv0', nn.Conv2d(in_chans, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), + ('norm0', norm_layer(num_init_features)), + ('relu0', act_layer(inplace=True)), + ('pool0', max_pool), + ])) # Each denseblock num_features = num_init_features for i, num_layers in enumerate(block_config): - block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, - bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) + block = _DenseBlock( + num_layers=num_layers, + num_input_features=num_features, + bn_size=bn_size, + growth_rate=growth_rate, + drop_rate=drop_rate, + memory_efficient=memory_efficient + ) self.features.add_module('denseblock%d' % (i + 1), block) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: - trans = _Transition( - num_input_features=num_features, num_output_features=num_features // 2) + trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2) self.features.add_module('transition%d' % (i + 1), trans) num_features = num_features // 2 # Final batch norm - self.features.add_module('norm5', nn.BatchNorm2d(num_features)) + self.features.add_module('norm5', norm_layer(num_features)) + self.act = act_layer(inplace=True) # Linear layer self.num_features = num_features self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) + # Official init from torch repo. + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.constant_(m.bias, 0) + def get_classifier(self): return self.classifier @@ -136,19 +249,20 @@ class DenseNet(nn.Module): def forward_features(self, x): x = self.features(x) - x = F.relu(x, inplace=True) + x = self.act(x) return x def forward(self, x): x = self.forward_features(x) x = self.global_pool(x).flatten(1) - if self.drop_rate > 0.: - x = F.dropout(x, p=self.drop_rate, training=self.training) + # both classifier and block drop? + # if self.drop_rate > 0.: + # x = F.dropout(x, p=self.drop_rate, training=self.training) x = self.classifier(x) return x -def _filter_pretrained(state_dict): +def _filter_torchvision_pretrained(state_dict): pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') @@ -161,57 +275,90 @@ def _filter_pretrained(state_dict): return state_dict +def _densenet(variant, growth_rate, block_config, num_init_features, pretrained, **kwargs): + if kwargs.pop('features_only', False): + assert False, 'Not Implemented' # TODO + load_strict = False + kwargs.pop('num_classes', 0) + model_class = DenseNet + else: + load_strict = True + model_class = DenseNet + default_cfg = default_cfgs[variant] + model = model_class( + growth_rate=growth_rate, block_config=block_config, num_init_features=num_init_features, **kwargs) + model.default_cfg = default_cfg + if pretrained: + load_pretrained( + model, default_cfg, + num_classes=kwargs.get('num_classes', 0), + in_chans=kwargs.get('in_chans', 3), + filter_fn=_filter_torchvision_pretrained, + strict=load_strict) + return model + + @register_model -def densenet121(pretrained=False, num_classes=1000, in_chans=3, **kwargs): +def densenet121(pretrained=False, **kwargs): r"""Densenet-121 model from `"Densely Connected Convolutional Networks" ` """ - default_cfg = default_cfgs['densenet121'] - model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), - num_classes=num_classes, in_chans=in_chans, **kwargs) - model.default_cfg = default_cfg - if pretrained: - load_pretrained(model, default_cfg, num_classes, in_chans, filter_fn=_filter_pretrained) + model = _densenet( + 'densenet121', growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, + pretrained=pretrained, **kwargs) + return model + + +@register_model +def densenet121d(pretrained=False, **kwargs): + r"""Densenet-121 model from + `"Densely Connected Convolutional Networks" ` + """ + model = _densenet( + 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, + stem_type='deep', pretrained=pretrained, **kwargs) return model @register_model -def densenet169(pretrained=False, num_classes=1000, in_chans=3, **kwargs): +def densenet121tn(pretrained=False, **kwargs): + r"""Densenet-121 model from + `"Densely Connected Convolutional Networks" ` + """ + model = _densenet( + 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, + stem_type='deep_tiered_narrow', pretrained=pretrained, **kwargs) + return model + + +@register_model +def densenet169(pretrained=False, **kwargs): r"""Densenet-169 model from `"Densely Connected Convolutional Networks" ` """ - default_cfg = default_cfgs['densenet169'] - model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32), - num_classes=num_classes, in_chans=in_chans, **kwargs) - model.default_cfg = default_cfg - if pretrained: - load_pretrained(model, default_cfg, num_classes, in_chans, filter_fn=_filter_pretrained) + model = _densenet( + 'densenet169', growth_rate=32, block_config=(6, 12, 32, 32), num_init_features=64, + pretrained=pretrained, **kwargs) return model @register_model -def densenet201(pretrained=False, num_classes=1000, in_chans=3, **kwargs): +def densenet201(pretrained=False, **kwargs): r"""Densenet-201 model from `"Densely Connected Convolutional Networks" ` """ - default_cfg = default_cfgs['densenet201'] - model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32), - num_classes=num_classes, in_chans=in_chans, **kwargs) - model.default_cfg = default_cfg - if pretrained: - load_pretrained(model, default_cfg, num_classes, in_chans, filter_fn=_filter_pretrained) + model = _densenet( + 'densenet201', growth_rate=32, block_config=(6, 12, 48, 32), num_init_features=64, + pretrained=pretrained, **kwargs) return model @register_model -def densenet161(pretrained=False, num_classes=1000, in_chans=3, **kwargs): +def densenet161(pretrained=False, **kwargs): r"""Densenet-201 model from `"Densely Connected Convolutional Networks" ` """ - default_cfg = default_cfgs['densenet161'] - model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24), - num_classes=num_classes, in_chans=in_chans, **kwargs) - model.default_cfg = default_cfg - if pretrained: - load_pretrained(model, default_cfg, num_classes, in_chans, filter_fn=_filter_pretrained) + model = _densenet( + 'densenet161', growth_rate=48, block_config=(6, 12, 36, 24), num_init_features=96, + pretrained=pretrained, **kwargs) return model From 14edacdf9a660f5f74b498fe2591decb835e8c56 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 9 May 2020 18:26:41 -0700 Subject: [PATCH 03/19] DenseNet converted to support ABN (norm + act) modules. Experimenting with EvoNorm, IABN --- timm/models/densenet.py | 198 +++++++++++++++++++-------------- timm/models/layers/__init__.py | 2 + timm/models/layers/evo_norm.py | 134 ++++++++++++++++++++++ timm/models/layers/norm_act.py | 50 +++++++++ 4 files changed, 301 insertions(+), 83 deletions(-) create mode 100644 timm/models/layers/evo_norm.py create mode 100644 timm/models/layers/norm_act.py diff --git a/timm/models/densenet.py b/timm/models/densenet.py index 4b774c4a..420680f9 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -13,7 +13,7 @@ from torch.jit.annotations import List from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained -from .layers import SelectAdaptivePool2d +from .layers import SelectAdaptivePool2d, BatchNormAct2d, EvoNormBatch2d, EvoNormSample2d from .registry import register_model __all__ = ['DenseNet'] @@ -35,90 +35,88 @@ default_cfgs = { 'densenet169': _cfg(url='https://download.pytorch.org/models/densenet169-b2777c0a.pth'), 'densenet201': _cfg(url='https://download.pytorch.org/models/densenet201-c1103571.pth'), 'densenet161': _cfg(url='https://download.pytorch.org/models/densenet161-8d451a50.pth'), + 'densenet264': _cfg(url=''), } -class _DenseLayer(nn.Module): - def __init__(self, num_input_features, growth_rate, bn_size, act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, +class DenseLayer(nn.Module): + def __init__(self, num_input_features, growth_rate, bn_size, norm_act_layer=BatchNormAct2d, drop_rate=0., memory_efficient=False): - super(_DenseLayer, self).__init__() - self.add_module('norm1', norm_layer(num_input_features)), - self.add_module('relu1', act_layer(inplace=True)), + super(DenseLayer, self).__init__() + self.add_module('norm1', norm_act_layer(num_input_features)), self.add_module('conv1', nn.Conv2d( num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)), - self.add_module('norm2', norm_layer(bn_size * growth_rate)), - self.add_module('relu2', act_layer(inplace=True)), + self.add_module('norm2', norm_act_layer(bn_size * growth_rate)), self.add_module('conv2', nn.Conv2d( bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)), self.drop_rate = float(drop_rate) self.memory_efficient = memory_efficient - def bn_function(self, inputs): + def bottleneck_fn(self, xs): # type: (List[torch.Tensor]) -> torch.Tensor - concated_features = torch.cat(inputs, 1) - bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484 + concated_features = torch.cat(xs, 1) + bottleneck_output = self.conv1(self.norm1(concated_features)) # noqa: T484 return bottleneck_output # todo: rewrite when torchscript supports any - def any_requires_grad(self, input): + def any_requires_grad(self, x): # type: (List[torch.Tensor]) -> bool - for tensor in input: + for tensor in x: if tensor.requires_grad: return True return False @torch.jit.unused # noqa: T484 - def call_checkpoint_bottleneck(self, input): + def call_checkpoint_bottleneck(self, x): # type: (List[torch.Tensor]) -> torch.Tensor - def closure(*inputs): - return self.bn_function(*inputs) + def closure(*xs): + return self.bottleneck_fn(*xs) - return cp.checkpoint(closure, input) + return cp.checkpoint(closure, x) @torch.jit._overload_method # noqa: F811 - def forward(self, input): + def forward(self, x): # type: (List[torch.Tensor]) -> (torch.Tensor) pass @torch.jit._overload_method # noqa: F811 - def forward(self, input): + def forward(self, x): # type: (torch.Tensor) -> (torch.Tensor) pass # torchscript does not yet support *args, so we overload method # allowing it to take either a List[Tensor] or single Tensor - def forward(self, input): # noqa: F811 - if isinstance(input, torch.Tensor): - prev_features = [input] + def forward(self, x): # noqa: F811 + if isinstance(x, torch.Tensor): + prev_features = [x] else: - prev_features = input + prev_features = x if self.memory_efficient and self.any_requires_grad(prev_features): if torch.jit.is_scripting(): raise Exception("Memory Efficient not supported in JIT") bottleneck_output = self.call_checkpoint_bottleneck(prev_features) else: - bottleneck_output = self.bn_function(prev_features) + bottleneck_output = self.bottleneck_fn(prev_features) - new_features = self.conv2(self.relu2(self.norm2(bottleneck_output))) + new_features = self.conv2(self.norm2(bottleneck_output)) if self.drop_rate > 0: new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) return new_features -class _DenseBlock(nn.ModuleDict): +class DenseBlock(nn.ModuleDict): _version = 2 - def __init__(self, num_layers, num_input_features, bn_size, growth_rate, act_layer=nn.ReLU, - norm_layer=nn.BatchNorm2d, drop_rate=0., memory_efficient=False): - super(_DenseBlock, self).__init__() + def __init__(self, num_layers, num_input_features, bn_size, growth_rate, norm_act_layer=nn.ReLU, + drop_rate=0., memory_efficient=False): + super(DenseBlock, self).__init__() for i in range(num_layers): - layer = _DenseLayer( + layer = DenseLayer( num_input_features + i * growth_rate, growth_rate=growth_rate, bn_size=bn_size, - act_layer=act_layer, - norm_layer=norm_layer, + norm_act_layer=norm_act_layer, drop_rate=drop_rate, memory_efficient=memory_efficient, ) @@ -132,11 +130,10 @@ class _DenseBlock(nn.ModuleDict): return torch.cat(features, 1) -class _Transition(nn.Sequential): - def __init__(self, num_input_features, num_output_features, act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d): - super(_Transition, self).__init__() - self.add_module('norm', norm_layer(num_input_features)) - self.add_module('relu', act_layer(inplace=True)) +class DenseTransition(nn.Sequential): + def __init__(self, num_input_features, num_output_features, norm_act_layer=nn.BatchNorm2d): + super(DenseTransition, self).__init__() + self.add_module('norm', norm_act_layer(num_input_features)) self.add_module('conv', nn.Conv2d( num_input_features, num_output_features, kernel_size=1, stride=1, bias=False)) self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) @@ -149,7 +146,6 @@ class DenseNet(nn.Module): Args: growth_rate (int) - how many filters to add each layer (`k` in paper) block_config (list of 4 ints) - how many layers in each pooling block - num_init_features (int) - the number of filters to learn in the first convolution layer bn_size (int) - multiplicative factor for number of bottle neck layers (i.e. bn_size * k features in the bottleneck layer) drop_rate (float) - dropout rate after each dense layer @@ -158,67 +154,66 @@ class DenseNet(nn.Module): but slower. Default: *False*. See `"paper" `_ """ - def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, - bn_size=4, stem_type='', num_classes=1000, in_chans=3, global_pool='avg', - act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, aa_layer=None, drop_rate=0, memory_efficient=False): + def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem_type='', + num_classes=1000, in_chans=3, global_pool='avg', + norm_act_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False): self.num_classes = num_classes self.drop_rate = drop_rate - deep_stem = 'deep' in stem_type super(DenseNet, self).__init__() - # First convolution + # Stem + deep_stem = 'deep' in stem_type # 3x3 deep stem + num_init_features = growth_rate * 2 if aa_layer is None: - max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + stem_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) else: - max_pool = nn.Sequential(*[ + stem_pool = nn.Sequential(*[ nn.MaxPool2d(kernel_size=3, stride=1, padding=1), - aa_layer(channels=self.inplanes, stride=2)]) + aa_layer(channels=num_init_features, stride=2)]) if deep_stem: - stem_chs_1 = stem_chs_2 = num_init_features // 2 + stem_chs_1 = stem_chs_2 = growth_rate if 'tiered' in stem_type: - stem_chs_1 = 3 * (num_init_features // 8) - stem_chs_2 = num_init_features if 'narrow' in stem_type else 6 * (num_init_features // 8) + stem_chs_1 = 3 * (growth_rate // 4) + stem_chs_2 = num_init_features if 'narrow' in stem_type else 6 * (growth_rate // 4) self.features = nn.Sequential(OrderedDict([ ('conv0', nn.Conv2d(in_chans, stem_chs_1, 3, stride=2, padding=1, bias=False)), - ('norm0', norm_layer(stem_chs_1)), - ('relu0', act_layer(inplace=True)), + ('norm0', norm_act_layer(stem_chs_1)), ('conv1', nn.Conv2d(stem_chs_1, stem_chs_2, 3, stride=1, padding=1, bias=False)), - ('norm1', norm_layer(stem_chs_2)), - ('relu1', act_layer(inplace=True)), + ('norm1', norm_act_layer(stem_chs_2)), ('conv2', nn.Conv2d(stem_chs_2, num_init_features, 3, stride=1, padding=1, bias=False)), - ('norm2', norm_layer(num_init_features)), - ('relu2', act_layer(inplace=True)), - ('pool0', max_pool), + ('norm2', norm_act_layer(num_init_features)), + ('pool0', stem_pool), ])) else: self.features = nn.Sequential(OrderedDict([ ('conv0', nn.Conv2d(in_chans, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), - ('norm0', norm_layer(num_init_features)), - ('relu0', act_layer(inplace=True)), - ('pool0', max_pool), + ('norm0', norm_act_layer(num_init_features)), + ('pool0', stem_pool), ])) - # Each denseblock + # DenseBlocks num_features = num_init_features for i, num_layers in enumerate(block_config): - block = _DenseBlock( + block = DenseBlock( num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, + norm_act_layer=norm_act_layer, drop_rate=drop_rate, memory_efficient=memory_efficient ) self.features.add_module('denseblock%d' % (i + 1), block) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: - trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2) + trans = DenseTransition( + num_input_features=num_features, num_output_features=num_features // 2, + norm_act_layer=norm_act_layer) self.features.add_module('transition%d' % (i + 1), trans) num_features = num_features // 2 # Final batch norm - self.features.add_module('norm5', norm_layer(num_features)) - self.act = act_layer(inplace=True) + self.features.add_module('norm5', norm_act_layer(num_features)) # Linear layer self.num_features = num_features @@ -248,9 +243,7 @@ class DenseNet(nn.Module): self.classifier = nn.Identity() def forward_features(self, x): - x = self.features(x) - x = self.act(x) - return x + return self.features(x) def forward(self, x): x = self.forward_features(x) @@ -275,7 +268,7 @@ def _filter_torchvision_pretrained(state_dict): return state_dict -def _densenet(variant, growth_rate, block_config, num_init_features, pretrained, **kwargs): +def _densenet(variant, growth_rate, block_config, pretrained, **kwargs): if kwargs.pop('features_only', False): assert False, 'Not Implemented' # TODO load_strict = False @@ -285,8 +278,7 @@ def _densenet(variant, growth_rate, block_config, num_init_features, pretrained, load_strict = True model_class = DenseNet default_cfg = default_cfgs[variant] - model = model_class( - growth_rate=growth_rate, block_config=block_config, num_init_features=num_init_features, **kwargs) + model = model_class(growth_rate=growth_rate, block_config=block_config, **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained( @@ -304,8 +296,7 @@ def densenet121(pretrained=False, **kwargs): `"Densely Connected Convolutional Networks" ` """ model = _densenet( - 'densenet121', growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, - pretrained=pretrained, **kwargs) + 'densenet121', growth_rate=32, block_config=(6, 12, 24, 16), pretrained=pretrained, **kwargs) return model @@ -315,8 +306,8 @@ def densenet121d(pretrained=False, **kwargs): `"Densely Connected Convolutional Networks" ` """ model = _densenet( - 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, - stem_type='deep', pretrained=pretrained, **kwargs) + 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', + pretrained=pretrained, **kwargs) return model @@ -326,8 +317,42 @@ def densenet121tn(pretrained=False, **kwargs): `"Densely Connected Convolutional Networks" ` """ model = _densenet( - 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, - stem_type='deep_tiered_narrow', pretrained=pretrained, **kwargs) + 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep_tiered_narrow', + pretrained=pretrained, **kwargs) + return model + + +@register_model +def densenet121d_evob(pretrained=False, **kwargs): + r"""Densenet-121 model from + `"Densely Connected Convolutional Networks" ` + """ + model = _densenet( + 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', + norm_act_layer=EvoNormBatch2d, pretrained=pretrained, **kwargs) + return model + + +@register_model +def densenet121d_evos(pretrained=False, **kwargs): + r"""Densenet-121 model from + `"Densely Connected Convolutional Networks" ` + """ + model = _densenet( + 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', + norm_act_layer=EvoNormSample2d, pretrained=pretrained, **kwargs) + return model + + +@register_model +def densenet121d_iabn(pretrained=False, **kwargs): + r"""Densenet-121 model from + `"Densely Connected Convolutional Networks" ` + """ + from inplace_abn import InPlaceABN + model = _densenet( + 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', + norm_act_layer=InPlaceABN, pretrained=pretrained, **kwargs) return model @@ -337,8 +362,7 @@ def densenet169(pretrained=False, **kwargs): `"Densely Connected Convolutional Networks" ` """ model = _densenet( - 'densenet169', growth_rate=32, block_config=(6, 12, 32, 32), num_init_features=64, - pretrained=pretrained, **kwargs) + 'densenet169', growth_rate=32, block_config=(6, 12, 32, 32), pretrained=pretrained, **kwargs) return model @@ -348,17 +372,25 @@ def densenet201(pretrained=False, **kwargs): `"Densely Connected Convolutional Networks" ` """ model = _densenet( - 'densenet201', growth_rate=32, block_config=(6, 12, 48, 32), num_init_features=64, - pretrained=pretrained, **kwargs) + 'densenet201', growth_rate=32, block_config=(6, 12, 48, 32), pretrained=pretrained, **kwargs) return model @register_model def densenet161(pretrained=False, **kwargs): - r"""Densenet-201 model from + r"""Densenet-161 model from `"Densely Connected Convolutional Networks" ` """ model = _densenet( - 'densenet161', growth_rate=48, block_config=(6, 12, 36, 24), num_init_features=96, - pretrained=pretrained, **kwargs) + 'densenet161', growth_rate=48, block_config=(6, 12, 36, 24), pretrained=pretrained, **kwargs) + return model + + +@register_model +def densenet264(pretrained=False, **kwargs): + r"""Densenet-264 model from + `"Densely Connected Convolutional Networks" ` + """ + model = _densenet( + 'densenet264', growth_rate=48, block_config=(6, 12, 64, 48), pretrained=pretrained, **kwargs) return model diff --git a/timm/models/layers/__init__.py b/timm/models/layers/__init__.py index 4f84bb9e..12e7326e 100644 --- a/timm/models/layers/__init__.py +++ b/timm/models/layers/__init__.py @@ -19,3 +19,5 @@ from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model from .anti_aliasing import AntiAliasDownsampleLayer from .space_to_depth import SpaceToDepthModule from .blur_pool import BlurPool2d +from .norm_act import BatchNormAct2d +from .evo_norm import EvoNormBatch2d, EvoNormSample2d \ No newline at end of file diff --git a/timm/models/layers/evo_norm.py b/timm/models/layers/evo_norm.py new file mode 100644 index 00000000..79de23e9 --- /dev/null +++ b/timm/models/layers/evo_norm.py @@ -0,0 +1,134 @@ +"""EvoNormB0 (Batched) and EvoNormS0 (Sample) in PyTorch + +An attempt at getting decent performing EvoNorms running in PyTorch. +While currently faster than other impl, still quite a ways off the built-in BN +in terms of memory usage and throughput. + +Still very much a WIP, fiddling with buffer usage, in-place optimizations, and layouts. + +Hacked together by Ross Wightman +""" + +import torch +import torch.nn as nn + + +@torch.jit.script +def evo_batch_jit( + x: torch.Tensor, v: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, running_var: torch.Tensor, + momentum: float, training: bool, nonlin: bool, eps: float): + x_type = x.dtype + running_var = running_var.detach() # FIXME why is this needed, it's a buffer? + if training: + var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True) # FIXME biased, unbiased? + running_var.copy_(momentum * var + (1 - momentum) * running_var) + else: + var = running_var.clone() + + if nonlin: + # FIXME biased, unbiased? + d = (x * v.to(x_type)) + x.var(dim=(2, 3), unbiased=False, keepdim=True).add_(eps).sqrt_().to(dtype=x_type) + d = d.max(var.add(eps).sqrt_().to(dtype=x_type)) + x = x / d + return x.mul_(weight).add_(bias) + else: + return x.mul(weight).add_(bias) + + +class EvoNormBatch2d(nn.Module): + def __init__(self, num_features, momentum=0.1, nonlin=True, eps=1e-5, jit=True): + super(EvoNormBatch2d, self).__init__() + self.momentum = momentum + self.nonlin = nonlin + self.eps = eps + self.jit = jit + param_shape = (1, num_features, 1, 1) + self.weight = nn.Parameter(torch.ones(param_shape), requires_grad=True) + self.bias = nn.Parameter(torch.zeros(param_shape), requires_grad=True) + if nonlin: + self.v = nn.Parameter(torch.ones(param_shape), requires_grad=True) + self.register_buffer('running_var', torch.ones(1, num_features, 1, 1)) + self.reset_parameters() + + def reset_parameters(self): + nn.init.ones_(self.weight) + nn.init.zeros_(self.bias) + if self.nonlin: + nn.init.ones_(self.v) + + def forward(self, x): + assert x.dim() == 4, 'expected 4D input' + + if self.jit: + return evo_batch_jit( + x, self.v, self.weight, self.bias, self.running_var, self.momentum, + self.training, self.nonlin, self.eps) + else: + x_type = x.dtype + if self.training: + var = x.var(dim=(0, 2, 3), keepdim=True) + self.running_var.copy_(self.momentum * var + (1 - self.momentum) * self.running_var) + else: + var = self.running_var.clone() + + if self.nonlin: + v = self.v.to(dtype=x_type) + d = (x * v) + x.var(dim=(2, 3), keepdim=True).add_(self.eps).sqrt_().to(dtype=x_type) + d = d.max(var.add(self.eps).sqrt_().to(dtype=x_type)) + x = x / d + return x.mul_(self.weight).add_(self.bias) + else: + return x.mul(self.weight).add_(self.bias) + + +@torch.jit.script +def evo_sample_jit( + x: torch.Tensor, v: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, + groups: int, nonlin: bool, eps: float): + B, C, H, W = x.shape + assert C % groups == 0 + if nonlin: + n = (x * v).sigmoid_().reshape(B, groups, -1) + x = x.reshape(B, groups, -1) + x = n / x.var(dim=-1, unbiased=False, keepdim=True).add_(eps).sqrt_() + x = x.reshape(B, C, H, W) + return x.mul_(weight).add_(bias) + + +class EvoNormSample2d(nn.Module): + def __init__(self, num_features, nonlin=True, groups=8, eps=1e-5, jit=True): + super(EvoNormSample2d, self).__init__() + self.nonlin = nonlin + self.groups = groups + self.eps = eps + self.jit = jit + param_shape = (1, num_features, 1, 1) + self.weight = nn.Parameter(torch.ones(param_shape), requires_grad=True) + self.bias = nn.Parameter(torch.zeros(param_shape), requires_grad=True) + if nonlin: + self.v = nn.Parameter(torch.ones(param_shape), requires_grad=True) + self.reset_parameters() + + def reset_parameters(self): + nn.init.ones_(self.weight) + nn.init.zeros_(self.bias) + if self.nonlin: + nn.init.ones_(self.v) + + def forward(self, x): + assert x.dim() == 4, 'expected 4D input' + + if self.jit: + return evo_sample_jit( + x, self.v, self.weight, self.bias, self.groups, self.nonlin, self.eps) + else: + B, C, H, W = x.shape + assert C % self.groups == 0 + if self.nonlin: + n = (x * self.v).sigmoid().reshape(B, self.groups, -1) + x = x.reshape(B, self.groups, -1) + x = n / (x.std(dim=-1, unbiased=False, keepdim=True) + self.eps) + x = x.reshape(B, C, H, W) + return x.mul_(self.weight).add_(self.bias) + else: + return x.mul(self.weight).add_(self.bias) diff --git a/timm/models/layers/norm_act.py b/timm/models/layers/norm_act.py new file mode 100644 index 00000000..879a8939 --- /dev/null +++ b/timm/models/layers/norm_act.py @@ -0,0 +1,50 @@ +""" Normalization + Activation Layers +""" +from torch import nn as nn +from torch.nn import functional as F + + +class BatchNormAct2d(nn.BatchNorm2d): + """BatchNorm + Activation + + This module performs BatchNorm + Actibation in s manner that will remain bavkwards + compatible with weights trained with separate bn, act. This is why we inherit from BN + instead of composing it as a .bn member. + """ + def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, + track_running_stats=True, act_layer=nn.ReLU, inplace=True): + super(BatchNormAct2d, self).__init__(num_features, eps, momentum, affine, track_running_stats) + self.act = act_layer(inplace=inplace) + + def forward(self, x): + # FIXME cannot call parent forward() and maintain jit.script compatibility? + # x = super(BatchNormAct2d, self).forward(x) + + # BEGIN nn.BatchNorm2d forward() cut & paste + # self._check_input_dim(x) + + # exponential_average_factor is self.momentum set to + # (when it is available) only so that if gets updated + # in ONNX graph when this node is exported to ONNX. + if self.momentum is None: + exponential_average_factor = 0.0 + else: + exponential_average_factor = self.momentum + + if self.training and self.track_running_stats: + # TODO: if statement only here to tell the jit to skip emitting this when it is None + if self.num_batches_tracked is not None: + self.num_batches_tracked += 1 + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / float(self.num_batches_tracked) + else: # use exponential moving average + exponential_average_factor = self.momentum + + x = F.batch_norm( + x, self.running_mean, self.running_var, self.weight, self.bias, + self.training or not self.track_running_stats, + exponential_average_factor, self.eps) + # END BatchNorm2d forward() + + x = self.act(x) + return x From 780860d140ca8a08c1f84902c8be80afd355c954 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 9 May 2020 22:07:01 -0700 Subject: [PATCH 04/19] Add norm_act factory method, move JIT of norm layers to factory --- timm/models/densenet.py | 16 +++-- timm/models/layers/__init__.py | 3 +- timm/models/layers/create_norm_act.py | 37 ++++++++++ timm/models/layers/evo_norm.py | 97 +++++++-------------------- 4 files changed, 74 insertions(+), 79 deletions(-) create mode 100644 timm/models/layers/create_norm_act.py diff --git a/timm/models/densenet.py b/timm/models/densenet.py index 420680f9..b9f9853c 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -4,6 +4,7 @@ fixed kwargs passthrough and addition of dynamic global avg/max pool. """ import re from collections import OrderedDict +from functools import partial import torch import torch.nn as nn @@ -13,7 +14,7 @@ from torch.jit.annotations import List from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained -from .layers import SelectAdaptivePool2d, BatchNormAct2d, EvoNormBatch2d, EvoNormSample2d +from .layers import SelectAdaptivePool2d, BatchNormAct2d, create_norm_act from .registry import register_model __all__ = ['DenseNet'] @@ -327,9 +328,11 @@ def densenet121d_evob(pretrained=False, **kwargs): r"""Densenet-121 model from `"Densely Connected Convolutional Networks" ` """ + def norm_act_fn(num_features, **kwargs): + return create_norm_act('EvoNormBatch', num_features, jit=True, **kwargs) model = _densenet( 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_act_layer=EvoNormBatch2d, pretrained=pretrained, **kwargs) + norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs) return model @@ -338,9 +341,11 @@ def densenet121d_evos(pretrained=False, **kwargs): r"""Densenet-121 model from `"Densely Connected Convolutional Networks" ` """ + def norm_act_fn(num_features, **kwargs): + return create_norm_act('EvoNormSample', num_features, jit=True, **kwargs) model = _densenet( 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_act_layer=EvoNormSample2d, pretrained=pretrained, **kwargs) + norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs) return model @@ -349,10 +354,11 @@ def densenet121d_iabn(pretrained=False, **kwargs): r"""Densenet-121 model from `"Densely Connected Convolutional Networks" ` """ - from inplace_abn import InPlaceABN + def norm_act_fn(num_features, **kwargs): + return create_norm_act('iabn', num_features, **kwargs) model = _densenet( 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_act_layer=InPlaceABN, pretrained=pretrained, **kwargs) + norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs) return model diff --git a/timm/models/layers/__init__.py b/timm/models/layers/__init__.py index 12e7326e..94c98fdc 100644 --- a/timm/models/layers/__init__.py +++ b/timm/models/layers/__init__.py @@ -20,4 +20,5 @@ from .anti_aliasing import AntiAliasDownsampleLayer from .space_to_depth import SpaceToDepthModule from .blur_pool import BlurPool2d from .norm_act import BatchNormAct2d -from .evo_norm import EvoNormBatch2d, EvoNormSample2d \ No newline at end of file +from .evo_norm import EvoNormBatch2d, EvoNormSample2d +from .create_norm_act import create_norm_act diff --git a/timm/models/layers/create_norm_act.py b/timm/models/layers/create_norm_act.py new file mode 100644 index 00000000..251c0c17 --- /dev/null +++ b/timm/models/layers/create_norm_act.py @@ -0,0 +1,37 @@ +import torch +import torch.nn as nn + +from .evo_norm import EvoNormBatch2d, EvoNormSample2d +from .norm_act import BatchNormAct2d +try: + from inplace_abn import InPlaceABN + has_iabn = True +except ImportError: + has_iabn = False + + +def create_norm_act(layer_type, num_features, jit=False, **kwargs): + layer_parts = layer_type.split('_') + assert len(layer_parts) in (1, 2) + layer_class = layer_parts[0].lower() + #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else '' # FIXME support string act selection + + if layer_class == "batchnormact": + layer = BatchNormAct2d(num_features, **kwargs) # defaults to RELU of no kwargs override + elif layer_class == "batchnormrelu": + assert 'act_layer' not in kwargs + layer = BatchNormAct2d(num_features, act_layer=nn.ReLU, **kwargs) + elif layer_class == "evonormbatch": + layer = EvoNormBatch2d(num_features, **kwargs) + elif layer_class == "evonormsample": + layer = EvoNormSample2d(num_features, **kwargs) + elif layer_class == "iabn" or layer_class == "inplaceabn": + if not has_iabn: + raise ImportError( + "Pplease install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.11'") + layer = InPlaceABN(num_features, **kwargs) + else: + assert False, "Invalid norm_act layer (%s)" % layer_class + if jit: + layer = torch.jit.script(layer) + return layer diff --git a/timm/models/layers/evo_norm.py b/timm/models/layers/evo_norm.py index 79de23e9..62d49428 100644 --- a/timm/models/layers/evo_norm.py +++ b/timm/models/layers/evo_norm.py @@ -13,35 +13,12 @@ import torch import torch.nn as nn -@torch.jit.script -def evo_batch_jit( - x: torch.Tensor, v: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, running_var: torch.Tensor, - momentum: float, training: bool, nonlin: bool, eps: float): - x_type = x.dtype - running_var = running_var.detach() # FIXME why is this needed, it's a buffer? - if training: - var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True) # FIXME biased, unbiased? - running_var.copy_(momentum * var + (1 - momentum) * running_var) - else: - var = running_var.clone() - - if nonlin: - # FIXME biased, unbiased? - d = (x * v.to(x_type)) + x.var(dim=(2, 3), unbiased=False, keepdim=True).add_(eps).sqrt_().to(dtype=x_type) - d = d.max(var.add(eps).sqrt_().to(dtype=x_type)) - x = x / d - return x.mul_(weight).add_(bias) - else: - return x.mul(weight).add_(bias) - - class EvoNormBatch2d(nn.Module): - def __init__(self, num_features, momentum=0.1, nonlin=True, eps=1e-5, jit=True): + def __init__(self, num_features, momentum=0.1, nonlin=True, eps=1e-5): super(EvoNormBatch2d, self).__init__() self.momentum = momentum self.nonlin = nonlin self.eps = eps - self.jit = jit param_shape = (1, num_features, 1, 1) self.weight = nn.Parameter(torch.ones(param_shape), requires_grad=True) self.bias = nn.Parameter(torch.zeros(param_shape), requires_grad=True) @@ -58,50 +35,29 @@ class EvoNormBatch2d(nn.Module): def forward(self, x): assert x.dim() == 4, 'expected 4D input' - - if self.jit: - return evo_batch_jit( - x, self.v, self.weight, self.bias, self.running_var, self.momentum, - self.training, self.nonlin, self.eps) + x_type = x.dtype + if self.training: + var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True) + self.running_var.copy_(self.momentum * var.detach() + (1 - self.momentum) * self.running_var) else: - x_type = x.dtype - if self.training: - var = x.var(dim=(0, 2, 3), keepdim=True) - self.running_var.copy_(self.momentum * var + (1 - self.momentum) * self.running_var) - else: - var = self.running_var.clone() - - if self.nonlin: - v = self.v.to(dtype=x_type) - d = (x * v) + x.var(dim=(2, 3), keepdim=True).add_(self.eps).sqrt_().to(dtype=x_type) - d = d.max(var.add(self.eps).sqrt_().to(dtype=x_type)) - x = x / d - return x.mul_(self.weight).add_(self.bias) - else: - return x.mul(self.weight).add_(self.bias) + var = self.running_var.clone() - -@torch.jit.script -def evo_sample_jit( - x: torch.Tensor, v: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, - groups: int, nonlin: bool, eps: float): - B, C, H, W = x.shape - assert C % groups == 0 - if nonlin: - n = (x * v).sigmoid_().reshape(B, groups, -1) - x = x.reshape(B, groups, -1) - x = n / x.var(dim=-1, unbiased=False, keepdim=True).add_(eps).sqrt_() - x = x.reshape(B, C, H, W) - return x.mul_(weight).add_(bias) + if self.nonlin: + v = self.v.to(dtype=x_type) + d = (x * v) + x.var(dim=(2, 3), unbiased=False, keepdim=True).add_(self.eps).sqrt_().to(dtype=x_type) + d = d.max(var.add_(self.eps).sqrt_().to(dtype=x_type)) + x = x / d + return x.mul_(self.weight).add_(self.bias) + else: + return x.mul(self.weight).add_(self.bias) class EvoNormSample2d(nn.Module): - def __init__(self, num_features, nonlin=True, groups=8, eps=1e-5, jit=True): + def __init__(self, num_features, nonlin=True, groups=8, eps=1e-5): super(EvoNormSample2d, self).__init__() self.nonlin = nonlin self.groups = groups self.eps = eps - self.jit = jit param_shape = (1, num_features, 1, 1) self.weight = nn.Parameter(torch.ones(param_shape), requires_grad=True) self.bias = nn.Parameter(torch.zeros(param_shape), requires_grad=True) @@ -117,18 +73,13 @@ class EvoNormSample2d(nn.Module): def forward(self, x): assert x.dim() == 4, 'expected 4D input' - - if self.jit: - return evo_sample_jit( - x, self.v, self.weight, self.bias, self.groups, self.nonlin, self.eps) + B, C, H, W = x.shape + assert C % self.groups == 0 + if self.nonlin: + n = (x * self.v).sigmoid().reshape(B, self.groups, -1) + x = x.reshape(B, self.groups, -1) + x = n / x.var(dim=-1, unbiased=False, keepdim=True).add_(self.eps).sqrt_() + x = x.reshape(B, C, H, W) + return x.mul_(self.weight).add_(self.bias) else: - B, C, H, W = x.shape - assert C % self.groups == 0 - if self.nonlin: - n = (x * self.v).sigmoid().reshape(B, self.groups, -1) - x = x.reshape(B, self.groups, -1) - x = n / (x.std(dim=-1, unbiased=False, keepdim=True) + self.eps) - x = x.reshape(B, C, H, W) - return x.mul_(self.weight).add_(self.bias) - else: - return x.mul(self.weight).add_(self.bias) + return x.mul(self.weight).add_(self.bias) From 6441e9cc1b6545fd68b35d1d7eecebd96d9a5266 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 22 May 2020 16:16:45 -0700 Subject: [PATCH 05/19] Fix memory_efficient mode for DenseNets. Add AntiAliasing (Blur) support for DenseNets and create one test model. Add lr cycle/mul params to train args. --- timm/models/densenet.py | 24 +++++++++++++++++++----- timm/scheduler/scheduler_factory.py | 8 ++++---- train.py | 4 ++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/timm/models/densenet.py b/timm/models/densenet.py index b9f9853c..539d5012 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -14,7 +14,7 @@ from torch.jit.annotations import List from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import load_pretrained -from .layers import SelectAdaptivePool2d, BatchNormAct2d, create_norm_act +from .layers import SelectAdaptivePool2d, BatchNormAct2d, create_norm_act, BlurPool2d from .registry import register_model __all__ = ['DenseNet'] @@ -71,9 +71,9 @@ class DenseLayer(nn.Module): def call_checkpoint_bottleneck(self, x): # type: (List[torch.Tensor]) -> torch.Tensor def closure(*xs): - return self.bottleneck_fn(*xs) + return self.bottleneck_fn(xs) - return cp.checkpoint(closure, x) + return cp.checkpoint(closure, *x) @torch.jit._overload_method # noqa: F811 def forward(self, x): @@ -132,12 +132,15 @@ class DenseBlock(nn.ModuleDict): class DenseTransition(nn.Sequential): - def __init__(self, num_input_features, num_output_features, norm_act_layer=nn.BatchNorm2d): + def __init__(self, num_input_features, num_output_features, norm_act_layer=nn.BatchNorm2d, aa_layer=None): super(DenseTransition, self).__init__() self.add_module('norm', norm_act_layer(num_input_features)) self.add_module('conv', nn.Conv2d( num_input_features, num_output_features, kernel_size=1, stride=1, bias=False)) - self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) + if aa_layer is not None: + self.add_module('pool', aa_layer(num_output_features, stride=2)) + else: + self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) class DenseNet(nn.Module): @@ -301,6 +304,17 @@ def densenet121(pretrained=False, **kwargs): return model +@register_model +def densenetblur121d(pretrained=False, **kwargs): + r"""Densenet-121 model from + `"Densely Connected Convolutional Networks" ` + """ + model = _densenet( + 'densenet121', growth_rate=32, block_config=(6, 12, 24, 16), pretrained=pretrained, stem_type='deep', + aa_layer=BlurPool2d, **kwargs) + return model + + @register_model def densenet121d(pretrained=False, **kwargs): r"""Densenet-121 model from diff --git a/timm/scheduler/scheduler_factory.py b/timm/scheduler/scheduler_factory.py index 2320c96b..ee4220ec 100644 --- a/timm/scheduler/scheduler_factory.py +++ b/timm/scheduler/scheduler_factory.py @@ -23,12 +23,12 @@ def create_scheduler(args, optimizer): lr_scheduler = CosineLRScheduler( optimizer, t_initial=num_epochs, - t_mul=1.0, + t_mul=args.lr_cycle_mul, lr_min=args.min_lr, decay_rate=args.decay_rate, warmup_lr_init=args.warmup_lr, warmup_t=args.warmup_epochs, - cycle_limit=1, + cycle_limit=args.lr_cycle_limit, t_in_epochs=True, noise_range_t=noise_range, noise_pct=args.lr_noise_pct, @@ -40,11 +40,11 @@ def create_scheduler(args, optimizer): lr_scheduler = TanhLRScheduler( optimizer, t_initial=num_epochs, - t_mul=1.0, + t_mul=args.lr_cycle_mul, lr_min=args.min_lr, warmup_lr_init=args.warmup_lr, warmup_t=args.warmup_epochs, - cycle_limit=1, + cycle_limit=args.lr_cycle_limit, t_in_epochs=True, noise_range_t=noise_range, noise_pct=args.lr_noise_pct, diff --git a/train.py b/train.py index 899c6984..7f8d4a26 100755 --- a/train.py +++ b/train.py @@ -111,6 +111,10 @@ parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT help='learning rate noise limit percent (default: 0.67)') parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', help='learning rate noise std-dev (default: 1.0)') +parser.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT', + help='learning rate cycle len multiplier (default: 1.0)') +parser.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N', + help='learning rate cycle limit') parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', help='warmup learning rate (default: 0.0001)') parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', From eb7653614f438d1eeae259262fade32d230a5be4 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 1 Jun 2020 16:59:51 -0700 Subject: [PATCH 06/19] Monster commit, activation refactor, VoVNet, norm_act improvements, more * refactor activations into basic PyTorch, jit scripted, and memory efficient custom auto * implement hard-mish, better grad for hard-swish * add initial VovNet V1/V2 impl, fix #151 * VovNet and DenseNet first models to use NormAct layers (support BatchNormAct2d, EvoNorm, InplaceIABN) * Wrap IABN for any models that use it * make more models torchscript compatible (DPN, PNasNet, Res2Net, SelecSLS) and add tests --- tests/test_models.py | 26 +- timm/__init__.py | 3 +- timm/models/__init__.py | 2 + timm/models/densenet.py | 36 +-- timm/models/dpn.py | 64 +++- timm/models/efficientnet.py | 18 +- timm/models/efficientnet_blocks.py | 13 +- timm/models/efficientnet_builder.py | 16 +- timm/models/layers/__init__.py | 39 +-- timm/models/layers/activations.py | 109 ++----- timm/models/layers/activations_jit.py | 90 ++++++ timm/models/layers/activations_me.py | 208 +++++++++++++ timm/models/layers/cond_conv2d.py | 2 +- timm/models/layers/config.py | 74 +++++ timm/models/layers/conv2d_same.py | 3 +- timm/models/layers/conv_bn_act.py | 27 +- timm/models/layers/create_act.py | 103 +++++++ timm/models/layers/create_attn.py | 4 +- timm/models/layers/create_conv2d.py | 12 +- timm/models/layers/create_norm_act.py | 77 +++-- timm/models/layers/drop.py | 2 - timm/models/layers/evo_norm.py | 40 ++- timm/models/layers/inplace_abn.py | 85 ++++++ timm/models/layers/norm_act.py | 69 +++-- timm/models/layers/pool2d_same.py | 1 - timm/models/layers/se.py | 23 +- timm/models/layers/selective_kernel.py | 1 - timm/models/layers/separable_conv.py | 51 ++++ timm/models/layers/test_time_pool.py | 1 + timm/models/mobilenetv3.py | 20 +- timm/models/pnasnet.py | 24 +- timm/models/res2net.py | 14 +- timm/models/resnet.py | 1 - timm/models/selecsls.py | 26 +- timm/models/tresnet.py | 81 +++-- timm/models/vovnet.py | 408 +++++++++++++++++++++++++ validate.py | 10 +- 37 files changed, 1467 insertions(+), 316 deletions(-) create mode 100644 timm/models/layers/activations_jit.py create mode 100644 timm/models/layers/activations_me.py create mode 100644 timm/models/layers/config.py create mode 100644 timm/models/layers/create_act.py create mode 100644 timm/models/layers/inplace_abn.py create mode 100644 timm/models/layers/separable_conv.py create mode 100644 timm/models/vovnet.py diff --git a/tests/test_models.py b/tests/test_models.py index 02cb61bb..63be6a6e 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -4,7 +4,7 @@ import platform import os import fnmatch -from timm import list_models, create_model +from timm import list_models, create_model, set_scriptable if 'GITHUB_ACTIONS' in os.environ and 'Linux' in platform.system(): @@ -53,6 +53,8 @@ def test_model_backward(model_name, batch_size): inputs = torch.randn((batch_size, *input_size)) outputs = model(inputs) outputs.mean().backward() + for n, x in model.named_parameters(): + assert x.grad is not None, f'No gradient for {n}' num_grad = sum([x.grad.numel() for x in model.parameters() if x.grad is not None]) assert outputs.shape[-1] == 42 @@ -83,3 +85,25 @@ def test_model_default_cfgs(model_name, batch_size): assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2] assert any([k.startswith(classifier) for k in state_dict.keys()]), f'{classifier} not in model params' assert any([k.startswith(first_conv) for k in state_dict.keys()]), f'{first_conv} not in model params' + + +EXCLUDE_JIT_FILTERS = [ + '*iabn*', 'tresnet*', # models using inplace abn unlikely to ever be scriptable + 'dla*', 'hrnet*', # hopefully fix at some point +] + + +@pytest.mark.timeout(120) +@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS + EXCLUDE_JIT_FILTERS)) +@pytest.mark.parametrize('batch_size', [1]) +def test_model_forward_torchscript(model_name, batch_size): + """Run a single forward pass with each model""" + with set_scriptable(True): + model = create_model(model_name, pretrained=False) + model.eval() + input_size = (3, 128, 128) # jit compile is already a bit slow and we've tested normal res already... + model = torch.jit.script(model) + outputs = model(torch.randn((batch_size, *input_size))) + + assert outputs.shape[0] == batch_size + assert not torch.isnan(outputs).any(), 'Output included NaNs' diff --git a/timm/__init__.py b/timm/__init__.py index 86ed7a42..db3d3f22 100644 --- a/timm/__init__.py +++ b/timm/__init__.py @@ -1,2 +1,3 @@ from .version import __version__ -from .models import create_model, list_models, is_model, list_modules, model_entrypoint +from .models import create_model, list_models, is_model, list_modules, model_entrypoint, \ + is_scriptable, is_exportable, set_scriptable, set_exportable diff --git a/timm/models/__init__.py b/timm/models/__init__.py index 06d26fb3..b4fe1dea 100644 --- a/timm/models/__init__.py +++ b/timm/models/__init__.py @@ -20,9 +20,11 @@ from .sknet import * from .tresnet import * from .resnest import * from .regnet import * +from .vovnet import * from .registry import * from .factory import create_model from .helpers import load_checkpoint, resume_checkpoint from .layers import TestTimePoolHead, apply_test_time_pool from .layers import convert_splitbn_model +from .layers import is_scriptable, is_exportable, set_scriptable, set_exportable, is_no_jit, set_no_jit diff --git a/timm/models/densenet.py b/timm/models/densenet.py index 539d5012..b4e31807 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -41,13 +41,13 @@ default_cfgs = { class DenseLayer(nn.Module): - def __init__(self, num_input_features, growth_rate, bn_size, norm_act_layer=BatchNormAct2d, + def __init__(self, num_input_features, growth_rate, bn_size, norm_layer=BatchNormAct2d, drop_rate=0., memory_efficient=False): super(DenseLayer, self).__init__() - self.add_module('norm1', norm_act_layer(num_input_features)), + self.add_module('norm1', norm_layer(num_input_features)), self.add_module('conv1', nn.Conv2d( num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)), - self.add_module('norm2', norm_act_layer(bn_size * growth_rate)), + self.add_module('norm2', norm_layer(bn_size * growth_rate)), self.add_module('conv2', nn.Conv2d( bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)), self.drop_rate = float(drop_rate) @@ -109,7 +109,7 @@ class DenseLayer(nn.Module): class DenseBlock(nn.ModuleDict): _version = 2 - def __init__(self, num_layers, num_input_features, bn_size, growth_rate, norm_act_layer=nn.ReLU, + def __init__(self, num_layers, num_input_features, bn_size, growth_rate, norm_layer=nn.ReLU, drop_rate=0., memory_efficient=False): super(DenseBlock, self).__init__() for i in range(num_layers): @@ -117,7 +117,7 @@ class DenseBlock(nn.ModuleDict): num_input_features + i * growth_rate, growth_rate=growth_rate, bn_size=bn_size, - norm_act_layer=norm_act_layer, + norm_layer=norm_layer, drop_rate=drop_rate, memory_efficient=memory_efficient, ) @@ -132,9 +132,9 @@ class DenseBlock(nn.ModuleDict): class DenseTransition(nn.Sequential): - def __init__(self, num_input_features, num_output_features, norm_act_layer=nn.BatchNorm2d, aa_layer=None): + def __init__(self, num_input_features, num_output_features, norm_layer=nn.BatchNorm2d, aa_layer=None): super(DenseTransition, self).__init__() - self.add_module('norm', norm_act_layer(num_input_features)) + self.add_module('norm', norm_layer(num_input_features)) self.add_module('conv', nn.Conv2d( num_input_features, num_output_features, kernel_size=1, stride=1, bias=False)) if aa_layer is not None: @@ -160,7 +160,7 @@ class DenseNet(nn.Module): def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem_type='', num_classes=1000, in_chans=3, global_pool='avg', - norm_act_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False): + norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False): self.num_classes = num_classes self.drop_rate = drop_rate super(DenseNet, self).__init__() @@ -181,17 +181,17 @@ class DenseNet(nn.Module): stem_chs_2 = num_init_features if 'narrow' in stem_type else 6 * (growth_rate // 4) self.features = nn.Sequential(OrderedDict([ ('conv0', nn.Conv2d(in_chans, stem_chs_1, 3, stride=2, padding=1, bias=False)), - ('norm0', norm_act_layer(stem_chs_1)), + ('norm0', norm_layer(stem_chs_1)), ('conv1', nn.Conv2d(stem_chs_1, stem_chs_2, 3, stride=1, padding=1, bias=False)), - ('norm1', norm_act_layer(stem_chs_2)), + ('norm1', norm_layer(stem_chs_2)), ('conv2', nn.Conv2d(stem_chs_2, num_init_features, 3, stride=1, padding=1, bias=False)), - ('norm2', norm_act_layer(num_init_features)), + ('norm2', norm_layer(num_init_features)), ('pool0', stem_pool), ])) else: self.features = nn.Sequential(OrderedDict([ ('conv0', nn.Conv2d(in_chans, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), - ('norm0', norm_act_layer(num_init_features)), + ('norm0', norm_layer(num_init_features)), ('pool0', stem_pool), ])) @@ -203,7 +203,7 @@ class DenseNet(nn.Module): num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, - norm_act_layer=norm_act_layer, + norm_layer=norm_layer, drop_rate=drop_rate, memory_efficient=memory_efficient ) @@ -212,12 +212,12 @@ class DenseNet(nn.Module): if i != len(block_config) - 1: trans = DenseTransition( num_input_features=num_features, num_output_features=num_features // 2, - norm_act_layer=norm_act_layer) + norm_layer=norm_layer) self.features.add_module('transition%d' % (i + 1), trans) num_features = num_features // 2 # Final batch norm - self.features.add_module('norm5', norm_act_layer(num_features)) + self.features.add_module('norm5', norm_layer(num_features)) # Linear layer self.num_features = num_features @@ -346,7 +346,7 @@ def densenet121d_evob(pretrained=False, **kwargs): return create_norm_act('EvoNormBatch', num_features, jit=True, **kwargs) model = _densenet( 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs) + norm_layer=norm_act_fn, pretrained=pretrained, **kwargs) return model @@ -359,7 +359,7 @@ def densenet121d_evos(pretrained=False, **kwargs): return create_norm_act('EvoNormSample', num_features, jit=True, **kwargs) model = _densenet( 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs) + norm_layer=norm_act_fn, pretrained=pretrained, **kwargs) return model @@ -372,7 +372,7 @@ def densenet121d_iabn(pretrained=False, **kwargs): return create_norm_act('iabn', num_features, **kwargs) model = _densenet( 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs) + norm_layer=norm_act_fn, pretrained=pretrained, **kwargs) return model diff --git a/timm/models/dpn.py b/timm/models/dpn.py index 9c4fafc8..1f45095d 100644 --- a/timm/models/dpn.py +++ b/timm/models/dpn.py @@ -10,6 +10,7 @@ from __future__ import division from __future__ import print_function from collections import OrderedDict +from typing import Union, Optional, List, Tuple import torch import torch.nn as nn @@ -54,8 +55,19 @@ class CatBnAct(nn.Module): self.bn = nn.BatchNorm2d(in_chs, eps=0.001) self.act = activation_fn + @torch.jit._overload_method # noqa: F811 def forward(self, x): - x = torch.cat(x, dim=1) if isinstance(x, tuple) else x + # type: (Tuple[torch.Tensor, torch.Tensor]) -> (torch.Tensor) + pass + + @torch.jit._overload_method # noqa: F811 + def forward(self, x): + # type: (torch.Tensor) -> (torch.Tensor) + pass + + def forward(self, x): + if isinstance(x, tuple): + x = torch.cat(x, dim=1) return self.act(self.bn(x)) @@ -107,6 +119,8 @@ class DualPathBlock(nn.Module): self.key_stride = 1 self.has_proj = False + self.c1x1_w_s1 = None + self.c1x1_w_s2 = None if self.has_proj: # Using different member names here to allow easier parameter key matching for conversion if self.key_stride == 2: @@ -115,6 +129,7 @@ class DualPathBlock(nn.Module): else: self.c1x1_w_s1 = BnActConv2d( in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=1) + self.c1x1_a = BnActConv2d(in_chs=in_chs, out_chs=num_1x1_a, kernel_size=1, stride=1) self.c3x3_b = BnActConv2d( in_chs=num_1x1_a, out_chs=num_3x3_b, kernel_size=3, @@ -125,27 +140,46 @@ class DualPathBlock(nn.Module): self.c1x1_c2 = nn.Conv2d(num_3x3_b, inc, kernel_size=1, bias=False) else: self.c1x1_c = BnActConv2d(in_chs=num_3x3_b, out_chs=num_1x1_c + inc, kernel_size=1, stride=1) + self.c1x1_c1 = None + self.c1x1_c2 = None + @torch.jit._overload_method # noqa: F811 def forward(self, x): - x_in = torch.cat(x, dim=1) if isinstance(x, tuple) else x - if self.has_proj: - if self.key_stride == 2: - x_s = self.c1x1_w_s2(x_in) - else: - x_s = self.c1x1_w_s1(x_in) - x_s1 = x_s[:, :self.num_1x1_c, :, :] - x_s2 = x_s[:, self.num_1x1_c:, :, :] + # type: (Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor] + pass + + @torch.jit._overload_method # noqa: F811 + def forward(self, x): + # type: (torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor] + pass + + def forward(self, x) -> Tuple[torch.Tensor, torch.Tensor]: + if isinstance(x, tuple): + x_in = torch.cat(x, dim=1) else: + x_in = x + if self.c1x1_w_s1 is None and self.c1x1_w_s2 is None: + # self.has_proj == False, torchscript requires condition on module == None x_s1 = x[0] x_s2 = x[1] + else: + # self.has_proj == True + if self.c1x1_w_s1 is not None: + # self.key_stride = 1 + x_s = self.c1x1_w_s1(x_in) + else: + # self.key_stride = 2 + x_s = self.c1x1_w_s2(x_in) + x_s1 = x_s[:, :self.num_1x1_c, :, :] + x_s2 = x_s[:, self.num_1x1_c:, :, :] x_in = self.c1x1_a(x_in) x_in = self.c3x3_b(x_in) - if self.b: - x_in = self.c1x1_c(x_in) + x_in = self.c1x1_c(x_in) + if self.c1x1_c1 is not None: + # self.b == True, using None check for torchscript compat out1 = self.c1x1_c1(x_in) out2 = self.c1x1_c2(x_in) else: - x_in = self.c1x1_c(x_in) out1 = x_in[:, :self.num_1x1_c, :, :] out2 = x_in[:, self.num_1x1_c:, :, :] resid = x_s1 + out1 @@ -167,11 +201,9 @@ class DPN(nn.Module): # conv1 if small: - blocks['conv1_1'] = InputBlock( - num_init_features, in_chans=in_chans, kernel_size=3, padding=1) + blocks['conv1_1'] = InputBlock(num_init_features, in_chans=in_chans, kernel_size=3, padding=1) else: - blocks['conv1_1'] = InputBlock( - num_init_features, in_chans=in_chans, kernel_size=7, padding=3) + blocks['conv1_1'] = InputBlock(num_init_features, in_chans=in_chans, kernel_size=7, padding=3) # conv2 bw = 64 * bw_factor diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py index 21fbee19..fbd7f420 100644 --- a/timm/models/efficientnet.py +++ b/timm/models/efficientnet.py @@ -24,11 +24,15 @@ An implementation of EfficienNet that covers variety of related models with effi Hacked together by Ross Wightman """ +import torch.nn as nn +import torch.nn.functional as F + from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD -from .efficientnet_builder import * +from .efficientnet_blocks import round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT +from .efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights from .feature_hooks import FeatureHooks from .helpers import load_pretrained, adapt_model_from_file -from .layers import SelectAdaptivePool2d +from .layers import SelectAdaptivePool2d, create_conv2d from .registry import register_model __all__ = ['EfficientNet'] @@ -631,7 +635,7 @@ def _gen_mobilenet_v2( fix_stem=fix_stem_head, channel_multiplier=channel_multiplier, norm_kwargs=resolve_bn_args(kwargs), - act_layer=nn.ReLU6, + act_layer=resolve_act_layer(kwargs, 'relu6'), **kwargs ) model = _create_model(model_kwargs, default_cfgs[variant], pretrained) @@ -741,7 +745,7 @@ def _gen_efficientnet(variant, channel_multiplier=1.0, depth_multiplier=1.0, pre num_features=round_channels(1280, channel_multiplier, 8, None), stem_size=32, channel_multiplier=channel_multiplier, - act_layer=Swish, + act_layer=resolve_act_layer(kwargs, 'swish'), norm_kwargs=resolve_bn_args(kwargs), variant=variant, **kwargs, @@ -772,7 +776,7 @@ def _gen_efficientnet_edge(variant, channel_multiplier=1.0, depth_multiplier=1.0 stem_size=32, channel_multiplier=channel_multiplier, norm_kwargs=resolve_bn_args(kwargs), - act_layer=nn.ReLU, + act_layer=resolve_act_layer(kwargs, 'relu'), **kwargs, ) model = _create_model(model_kwargs, default_cfgs[variant], pretrained) @@ -802,7 +806,7 @@ def _gen_efficientnet_condconv( stem_size=32, channel_multiplier=channel_multiplier, norm_kwargs=resolve_bn_args(kwargs), - act_layer=Swish, + act_layer=resolve_act_layer(kwargs, 'swish'), **kwargs, ) model = _create_model(model_kwargs, default_cfgs[variant], pretrained) @@ -842,7 +846,7 @@ def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0 stem_size=32, fix_stem=True, channel_multiplier=channel_multiplier, - act_layer=nn.ReLU6, + act_layer=resolve_act_layer(kwargs, 'relu6'), norm_kwargs=resolve_bn_args(kwargs), **kwargs, ) diff --git a/timm/models/efficientnet_blocks.py b/timm/models/efficientnet_blocks.py index cc4cdef1..5f64dc37 100644 --- a/timm/models/efficientnet_blocks.py +++ b/timm/models/efficientnet_blocks.py @@ -1,9 +1,9 @@ import torch import torch.nn as nn from torch.nn import functional as F -from .layers.activations import sigmoid -from .layers import create_conv2d, drop_path +from .layers import create_conv2d, drop_path, get_act_layer +from .layers.activations import sigmoid # Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per # papers and TF reference implementations. PT momentum equiv for TF decay is (1 - TF decay) @@ -52,6 +52,13 @@ def resolve_se_args(kwargs, in_chs, act_layer=None): return se_kwargs +def resolve_act_layer(kwargs, default='relu'): + act_layer = kwargs.pop('act_layer', default) + if isinstance(act_layer, str): + act_layer = get_act_layer(act_layer) + return act_layer + + def make_divisible(v, divisor=8, min_value=None): min_value = min_value or divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) @@ -213,7 +220,7 @@ class InvertedResidual(nn.Module): has_se = se_ratio is not None and se_ratio > 0. self.has_residual = (in_chs == out_chs and stride == 1) and not noskip self.drop_path_rate = drop_path_rate - + print(act_layer) # Point-wise expansion self.conv_pw = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs) self.bn1 = norm_layer(mid_chs, **norm_kwargs) diff --git a/timm/models/efficientnet_builder.py b/timm/models/efficientnet_builder.py index 842098cf..1e06b4f3 100644 --- a/timm/models/efficientnet_builder.py +++ b/timm/models/efficientnet_builder.py @@ -1,13 +1,15 @@ import logging import math import re -from collections.__init__ import OrderedDict +from collections import OrderedDict from copy import deepcopy import torch.nn as nn -from .layers import CondConv2d, get_condconv_initializer -from .layers.activations import HardSwish, Swish + from .efficientnet_blocks import * +from .layers import CondConv2d, get_condconv_initializer + +__all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights"] def _parse_ksize(ss): @@ -57,13 +59,13 @@ def _decode_block_str(block_str): key = op[0] v = op[1:] if v == 're': - value = nn.ReLU + value = get_act_layer('relu') elif v == 'r6': - value = nn.ReLU6 + value = get_act_layer('relu6') elif v == 'hs': - value = HardSwish + value = get_act_layer('hard_swish') elif v == 'sw': - value = Swish + value = get_act_layer('swish') else: continue options[key] = value diff --git a/timm/models/layers/__init__.py b/timm/models/layers/__init__.py index e007a46d..b9c26fea 100644 --- a/timm/models/layers/__init__.py +++ b/timm/models/layers/__init__.py @@ -1,25 +1,28 @@ -from .padding import get_padding -from .pool2d_same import AvgPool2dSame -from .conv2d_same import Conv2dSame -from .conv_bn_act import ConvBnAct -from .mixed_conv2d import MixedConv2d -from .cond_conv2d import CondConv2d, get_condconv_initializer -from .pool2d_same import create_pool2d -from .create_conv2d import create_conv2d -from .create_attn import create_attn -from .selective_kernel import SelectiveKernelConv -from .se import SEModule -from .eca import EcaModule, CecaModule from .activations import * from .adaptive_avgmax_pool import \ adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d -from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path -from .test_time_pool import TestTimePoolHead, apply_test_time_pool -from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model from .anti_aliasing import AntiAliasDownsampleLayer -from .space_to_depth import SpaceToDepthModule from .blur_pool import BlurPool2d -from .norm_act import BatchNormAct2d +from .cond_conv2d import CondConv2d, get_condconv_initializer +from .config import is_exportable, is_scriptable, set_exportable, set_scriptable, is_no_jit, set_no_jit +from .conv2d_same import Conv2dSame +from .conv_bn_act import ConvBnAct +from .create_act import create_act_layer, get_act_layer, get_act_fn +from .create_attn import create_attn +from .create_conv2d import create_conv2d +from .create_norm_act import create_norm_act, get_norm_act_layer +from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path +from .eca import EcaModule, CecaModule from .evo_norm import EvoNormBatch2d, EvoNormSample2d -from .create_norm_act import create_norm_act +from .inplace_abn import InplaceAbn +from .mixed_conv2d import MixedConv2d +from .norm_act import BatchNormAct2d +from .padding import get_padding +from .pool2d_same import AvgPool2dSame, create_pool2d +from .se import SEModule +from .selective_kernel import SelectiveKernelConv +from .separable_conv import SeparableConv2d, SeparableConvBnAct +from .space_to_depth import SpaceToDepthModule +from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model +from .test_time_pool import TestTimePoolHead, apply_test_time_pool from .weight_init import trunc_normal_ diff --git a/timm/models/layers/activations.py b/timm/models/layers/activations.py index 6f8d2f89..71904935 100644 --- a/timm/models/layers/activations.py +++ b/timm/models/layers/activations.py @@ -6,85 +6,15 @@ easily be swapped. All have an `inplace` arg even if not used. Hacked together by Ross Wightman """ - import torch from torch import nn as nn from torch.nn import functional as F -_USE_MEM_EFFICIENT_ISH = True -if _USE_MEM_EFFICIENT_ISH: - # This version reduces memory overhead of Swish during training by - # recomputing torch.sigmoid(x) in backward instead of saving it. - @torch.jit.script - def swish_jit_fwd(x): - return x.mul(torch.sigmoid(x)) - - - @torch.jit.script - def swish_jit_bwd(x, grad_output): - x_sigmoid = torch.sigmoid(x) - return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid))) - - - class SwishJitAutoFn(torch.autograd.Function): - """ torch.jit.script optimised Swish - Inspired by conversation btw Jeremy Howard & Adam Pazske - https://twitter.com/jeremyphoward/status/1188251041835315200 - """ - - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return swish_jit_fwd(x) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - return swish_jit_bwd(x, grad_output) - - - def swish(x, _inplace=False): - return SwishJitAutoFn.apply(x) - - - @torch.jit.script - def mish_jit_fwd(x): - return x.mul(torch.tanh(F.softplus(x))) - - - @torch.jit.script - def mish_jit_bwd(x, grad_output): - x_sigmoid = torch.sigmoid(x) - x_tanh_sp = F.softplus(x).tanh() - return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp)) - - - class MishJitAutoFn(torch.autograd.Function): - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return mish_jit_fwd(x) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - return mish_jit_bwd(x, grad_output) - - def mish(x, _inplace=False): - return MishJitAutoFn.apply(x) - -else: - def swish(x, inplace: bool = False): - """Swish - Described in: https://arxiv.org/abs/1710.05941 - """ - return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid()) - - - def mish(x, _inplace: bool = False): - """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 - """ - return x.mul(F.softplus(x).tanh()) +def swish(x, inplace: bool = False): + """Swish - Described in: https://arxiv.org/abs/1710.05941 + """ + return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid()) class Swish(nn.Module): @@ -96,13 +26,21 @@ class Swish(nn.Module): return swish(x, self.inplace) +def mish(x, inplace: bool = False): + """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 + NOTE: I don't have a working inplace variant + """ + return x.mul(F.softplus(x).tanh()) + + class Mish(nn.Module): + """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 + """ def __init__(self, inplace: bool = False): super(Mish, self).__init__() - self.inplace = inplace def forward(self, x): - return mish(x, self.inplace) + return mish(x) def sigmoid(x, inplace: bool = False): @@ -162,3 +100,22 @@ class HardSigmoid(nn.Module): def forward(self, x): return hard_sigmoid(x, self.inplace) + +def hard_mish(x, inplace: bool = False): + """ Hard Mish + Experimental, based on notes by Mish author Diganta Misra at + https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md + """ + if inplace: + return x.mul_(0.5 * (x + 2).clamp(min=0, max=2)) + else: + return 0.5 * x * (x + 2).clamp(min=0, max=2) + + +class HardMish(nn.Module): + def __init__(self, inplace: bool = False): + super(HardMish, self).__init__() + self.inplace = inplace + + def forward(self, x): + return hard_mish(x, self.inplace) diff --git a/timm/models/layers/activations_jit.py b/timm/models/layers/activations_jit.py new file mode 100644 index 00000000..dd3277fa --- /dev/null +++ b/timm/models/layers/activations_jit.py @@ -0,0 +1,90 @@ +""" Activations + +A collection of jit-scripted activations fn and modules with a common interface so that they can +easily be swapped. All have an `inplace` arg even if not used. + +All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not +currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted +versions if they contain in-place ops. + +Hacked together by Ross Wightman +""" + +import torch +from torch import nn as nn +from torch.nn import functional as F + + +@torch.jit.script +def swish_jit(x, inplace: bool = False): + """Swish - Described in: https://arxiv.org/abs/1710.05941 + """ + return x.mul(x.sigmoid()) + + +@torch.jit.script +def mish_jit(x, _inplace: bool = False): + """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 + """ + return x.mul(F.softplus(x).tanh()) + + +class SwishJit(nn.Module): + def __init__(self, inplace: bool = False): + super(SwishJit, self).__init__() + + def forward(self, x): + return swish_jit(x) + + +class MishJit(nn.Module): + def __init__(self, inplace: bool = False): + super(MishJit, self).__init__() + + def forward(self, x): + return mish_jit(x) + + +@torch.jit.script +def hard_sigmoid_jit(x, inplace: bool = False): + # return F.relu6(x + 3.) / 6. + return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? + + +class HardSigmoidJit(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSigmoidJit, self).__init__() + + def forward(self, x): + return hard_sigmoid_jit(x) + + +@torch.jit.script +def hard_swish_jit(x, inplace: bool = False): + # return x * (F.relu6(x + 3.) / 6) + return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? + + +class HardSwishJit(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSwishJit, self).__init__() + + def forward(self, x): + return hard_swish_jit(x) + + +@torch.jit.script +def hard_mish_jit(x, inplace: bool = False): + """ Hard Mish + Experimental, based on notes by Mish author Diganta Misra at + https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md + """ + return 0.5 * x * (x + 2).clamp(min=0, max=2) + + +class HardMishJit(nn.Module): + def __init__(self, inplace: bool = False): + super(HardMishJit, self).__init__() + + def forward(self, x): + return hard_mish_jit(x) diff --git a/timm/models/layers/activations_me.py b/timm/models/layers/activations_me.py new file mode 100644 index 00000000..9c492f1e --- /dev/null +++ b/timm/models/layers/activations_me.py @@ -0,0 +1,208 @@ +""" Activations (memory-efficient w/ custom autograd) + +A collection of activations fn and modules with a common interface so that they can +easily be swapped. All have an `inplace` arg even if not used. + +These activations are not compatible with jit scripting or ONNX export of the model, please use either +the JIT or basic versions of the activations. + +Hacked together by Ross Wightman +""" + +import torch +from torch import nn as nn +from torch.nn import functional as F + + +@torch.jit.script +def swish_jit_fwd(x): + return x.mul(torch.sigmoid(x)) + + +@torch.jit.script +def swish_jit_bwd(x, grad_output): + x_sigmoid = torch.sigmoid(x) + return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid))) + + +class SwishJitAutoFn(torch.autograd.Function): + """ torch.jit.script optimised Swish w/ memory-efficient checkpoint + Inspired by conversation btw Jeremy Howard & Adam Pazske + https://twitter.com/jeremyphoward/status/1188251041835315200 + """ + + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return swish_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return swish_jit_bwd(x, grad_output) + + +def swish_me(x, inplace=False): + return SwishJitAutoFn.apply(x) + + +class SwishMe(nn.Module): + def __init__(self, inplace: bool = False): + super(SwishMe, self).__init__() + + def forward(self, x): + return SwishJitAutoFn.apply(x) + + +@torch.jit.script +def mish_jit_fwd(x): + return x.mul(torch.tanh(F.softplus(x))) + + +@torch.jit.script +def mish_jit_bwd(x, grad_output): + x_sigmoid = torch.sigmoid(x) + x_tanh_sp = F.softplus(x).tanh() + return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp)) + + +class MishJitAutoFn(torch.autograd.Function): + """ Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 + A memory efficient, jit scripted variant of Mish + """ + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return mish_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return mish_jit_bwd(x, grad_output) + + +def mish_me(x, inplace=False): + return MishJitAutoFn.apply(x) + + +class MishMe(nn.Module): + def __init__(self, inplace: bool = False): + super(MishMe, self).__init__() + + def forward(self, x): + return MishJitAutoFn.apply(x) + + +@torch.jit.script +def hard_sigmoid_jit_fwd(x, inplace: bool = False): + return (x + 3).clamp(min=0, max=6).div(6.) + + +@torch.jit.script +def hard_sigmoid_jit_bwd(x, grad_output): + m = torch.ones_like(x) * ((x >= -3.) & (x <= 3.)) / 6. + return grad_output * m + + +class HardSigmoidJitAutoFn(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return hard_sigmoid_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return hard_sigmoid_jit_bwd(x, grad_output) + + +def hard_sigmoid_me(x, inplace: bool = False): + return HardSigmoidJitAutoFn.apply(x) + + +class HardSigmoidMe(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSigmoidMe, self).__init__() + + def forward(self, x): + return HardSigmoidJitAutoFn.apply(x) + + +@torch.jit.script +def hard_swish_jit_fwd(x): + return x * (x + 3).clamp(min=0, max=6).div(6.) + + +@torch.jit.script +def hard_swish_jit_bwd(x, grad_output): + m = torch.ones_like(x) * (x >= 3.) + m = torch.where((x >= -3.) & (x <= 3.), x / 3. + .5, m) + return grad_output * m + + +class HardSwishJitAutoFn(torch.autograd.Function): + """A memory efficient, jit-scripted HardSwish activation""" + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return hard_swish_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return hard_swish_jit_bwd(x, grad_output) + + +def hard_swish_me(x, inplace=False): + return HardSwishJitAutoFn.apply(x) + + +class HardSwishMe(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSwishMe, self).__init__() + + def forward(self, x): + return HardSwishJitAutoFn.apply(x) + + +@torch.jit.script +def hard_mish_jit_fwd(x): + return 0.5 * x * (x + 2).clamp(min=0, max=2) + + +@torch.jit.script +def hard_mish_jit_bwd(x, grad_output): + m = torch.ones_like(x) * (x >= -2.) + m = torch.where((x >= -2.) & (x <= 0.), x + 1., m) + return grad_output * m + + +class HardMishJitAutoFn(torch.autograd.Function): + """ A memory efficient, jit scripted variant of Hard Mish + Experimental, based on notes by Mish author Diganta Misra at + https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md + """ + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return mish_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return mish_jit_bwd(x, grad_output) + + +def hard_mish_me(x, inplace: bool = False): + return HardMishJitAutoFn.apply(x) + + +class HardMishMe(nn.Module): + def __init__(self, inplace: bool = False): + super(HardMishMe, self).__init__() + + def forward(self, x): + return HardMishJitAutoFn.apply(x) + + + diff --git a/timm/models/layers/cond_conv2d.py b/timm/models/layers/cond_conv2d.py index 0241b501..b1759d99 100644 --- a/timm/models/layers/cond_conv2d.py +++ b/timm/models/layers/cond_conv2d.py @@ -15,7 +15,7 @@ from torch.nn import functional as F from .helpers import tup_pair from .conv2d_same import conv2d_same -from timm.models.layers.padding import get_padding_value +from .padding import get_padding_value def get_condconv_initializer(initializer, num_experts, expert_shape): diff --git a/timm/models/layers/config.py b/timm/models/layers/config.py new file mode 100644 index 00000000..2c0faf23 --- /dev/null +++ b/timm/models/layers/config.py @@ -0,0 +1,74 @@ +""" Model / Layer Config Singleton +""" +from typing import Any + +__all__ = ['is_exportable', 'is_scriptable', 'set_exportable', 'set_scriptable', 'is_no_jit', 'set_no_jit'] + +# Set to True if prefer to have layers with no jit optimization (includes activations) +_NO_JIT = False + +# Set to True if prefer to have activation layers with no jit optimization +_NO_ACTIVATION_JIT = False + +# Set to True if exporting a model with Same padding via ONNX +_EXPORTABLE = False + +# Set to True if wanting to use torch.jit.script on a model +_SCRIPTABLE = False + + +def is_no_jit(): + return _NO_JIT + + +class set_no_jit: + def __init__(self, mode: bool) -> None: + global _NO_JIT + self.prev = _NO_JIT + _NO_JIT = mode + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _NO_JIT + _NO_JIT = self.prev + return False + + +def is_exportable(): + return _EXPORTABLE + + +class set_exportable: + def __init__(self, mode: bool) -> None: + global _EXPORTABLE + self.prev = _EXPORTABLE + _EXPORTABLE = mode + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _EXPORTABLE + _EXPORTABLE = self.prev + return False + + +def is_scriptable(): + return _SCRIPTABLE + + +class set_scriptable: + def __init__(self, mode: bool) -> None: + global _SCRIPTABLE + self.prev = _SCRIPTABLE + _SCRIPTABLE = mode + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _SCRIPTABLE + _SCRIPTABLE = self.prev + return False diff --git a/timm/models/layers/conv2d_same.py b/timm/models/layers/conv2d_same.py index 863d1783..06f08b4e 100644 --- a/timm/models/layers/conv2d_same.py +++ b/timm/models/layers/conv2d_same.py @@ -7,8 +7,7 @@ import torch.nn as nn import torch.nn.functional as F from typing import Tuple, Optional -from timm.models.layers.padding import get_padding_value -from .padding import pad_same +from .padding import pad_same, get_padding_value def conv2d_same( diff --git a/timm/models/layers/conv_bn_act.py b/timm/models/layers/conv_bn_act.py index d7835320..43f6760e 100644 --- a/timm/models/layers/conv_bn_act.py +++ b/timm/models/layers/conv_bn_act.py @@ -4,33 +4,28 @@ Hacked together by Ross Wightman """ from torch import nn as nn -from timm.models.layers import get_padding +from .create_conv2d import create_conv2d +from .create_norm_act import convert_norm_act_type class ConvBnAct(nn.Module): - def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, dilation=1, groups=1, - drop_block=None, act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, aa_layer=None): + def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding='', dilation=1, groups=1, + norm_layer=nn.BatchNorm2d, norm_kwargs=None, act_layer=nn.ReLU, apply_act=True, + drop_block=None, aa_layer=None): super(ConvBnAct, self).__init__() - padding = get_padding(kernel_size, stride, dilation) # assuming PyTorch style padding for this block use_aa = aa_layer is not None - self.conv = nn.Conv2d( - in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1 if use_aa else stride, + self.conv = create_conv2d( + in_channels, out_channels, kernel_size, stride=1 if use_aa else stride, padding=padding, dilation=dilation, groups=groups, bias=False) - self.bn = norm_layer(out_channels) + + # NOTE for backwards compatibility with models that use separate norm and act layer definitions + norm_act_layer, norm_act_args = convert_norm_act_type(norm_layer, act_layer, norm_kwargs) + self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block, **norm_act_args) self.aa = aa_layer(channels=out_channels) if stride == 2 and use_aa else None - self.drop_block = drop_block - if act_layer is not None: - self.act = act_layer(inplace=True) - else: - self.act = None def forward(self, x): x = self.conv(x) x = self.bn(x) - if self.drop_block is not None: - x = self.drop_block(x) - if self.act is not None: - x = self.act(x) if self.aa is not None: x = self.aa(x) return x diff --git a/timm/models/layers/create_act.py b/timm/models/layers/create_act.py new file mode 100644 index 00000000..66ab1e84 --- /dev/null +++ b/timm/models/layers/create_act.py @@ -0,0 +1,103 @@ +from .activations import * +from .activations_jit import * +from .activations_me import * +from .config import is_exportable, is_scriptable, is_no_jit + + +_ACT_FN_DEFAULT = dict( + swish=swish, + mish=mish, + relu=F.relu, + relu6=F.relu6, + sigmoid=sigmoid, + tanh=tanh, + hard_sigmoid=hard_sigmoid, + hard_swish=hard_swish, + hard_mish=hard_mish, +) + +_ACT_FN_JIT = dict( + swish=swish_jit, + mish=mish_jit, + hard_sigmoid=hard_sigmoid_jit, + hard_swish=hard_swish_jit, + hard_mish=hard_mish_jit +) + +_ACT_FN_ME = dict( + swish=swish_me, + mish=mish_me, + hard_sigmoid=hard_sigmoid_me, + hard_swish=hard_swish_me, + hard_mish=hard_mish_me, +) + +_ACT_LAYER_DEFAULT = dict( + swish=Swish, + mish=Mish, + relu=nn.ReLU, + relu6=nn.ReLU6, + sigmoid=Sigmoid, + tanh=Tanh, + hard_sigmoid=HardSigmoid, + hard_swish=HardSwish, + hard_mish=HardMish, +) + +_ACT_LAYER_JIT = dict( + swish=SwishJit, + mish=MishJit, + hard_sigmoid=HardSigmoidJit, + hard_swish=HardSwishJit, + hard_mish=HardMishJit +) + +_ACT_LAYER_ME = dict( + swish=SwishMe, + mish=MishMe, + hard_sigmoid=HardSigmoidMe, + hard_swish=HardSwishMe, + hard_mish=HardMishMe, +) + + +def get_act_fn(name='relu'): + """ Activation Function Factory + Fetching activation fns by name with this function allows export or torch script friendly + functions to be returned dynamically based on current config. + """ + if not name: + return None + if not (is_no_jit() or is_exportable() or is_scriptable()): + # If not exporting or scripting the model, first look for a memory-efficient version with + # custom autograd, then fallback + if name in _ACT_FN_ME: + return _ACT_FN_ME[name] + if not is_no_jit(): + if name in _ACT_FN_JIT: + return _ACT_FN_JIT[name] + return _ACT_FN_DEFAULT[name] + + +def get_act_layer(name='relu'): + """ Activation Layer Factory + Fetching activation layers by name with this function allows export or torch script friendly + functions to be returned dynamically based on current config. + """ + if not name: + return None + if not (is_no_jit() or is_exportable() or is_scriptable()): + if name in _ACT_LAYER_ME: + return _ACT_LAYER_ME[name] + if not is_no_jit(): + if name in _ACT_LAYER_JIT: + return _ACT_LAYER_JIT[name] + return _ACT_LAYER_DEFAULT[name] + + +def create_act_layer(name, inplace=False, **kwargs): + act_layer = get_act_layer(name) + if act_layer is not None: + return act_layer(inplace=inplace, **kwargs) + else: + return None diff --git a/timm/models/layers/create_attn.py b/timm/models/layers/create_attn.py index 94c4e4e7..24eccaa0 100644 --- a/timm/models/layers/create_attn.py +++ b/timm/models/layers/create_attn.py @@ -3,7 +3,7 @@ Hacked together by Ross Wightman """ import torch -from .se import SEModule +from .se import SEModule, EffectiveSEModule from .eca import EcaModule, CecaModule from .cbam import CbamModule, LightCbamModule @@ -15,6 +15,8 @@ def create_attn(attn_type, channels, **kwargs): attn_type = attn_type.lower() if attn_type == 'se': module_cls = SEModule + elif attn_type == 'ese': + module_cls = EffectiveSEModule elif attn_type == 'eca': module_cls = EcaModule elif attn_type == 'ceca': diff --git a/timm/models/layers/create_conv2d.py b/timm/models/layers/create_conv2d.py index 527c80a3..34fbd44f 100644 --- a/timm/models/layers/create_conv2d.py +++ b/timm/models/layers/create_conv2d.py @@ -8,23 +8,23 @@ from .cond_conv2d import CondConv2d from .conv2d_same import create_conv2d_pad -def create_conv2d(in_chs, out_chs, kernel_size, **kwargs): +def create_conv2d(in_channels, out_channels, kernel_size, **kwargs): """ Select a 2d convolution implementation based on arguments Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d. Used extensively by EfficientNet, MobileNetv3 and related networks. """ - assert 'groups' not in kwargs # only use 'depthwise' bool arg if isinstance(kernel_size, list): assert 'num_experts' not in kwargs # MixNet + CondConv combo not supported currently + assert 'groups' not in kwargs # MixedConv groups are defined by kernel list # We're going to use only lists for defining the MixedConv2d kernel groups, # ints, tuples, other iterables will continue to pass to normal conv and specify h, w. - m = MixedConv2d(in_chs, out_chs, kernel_size, **kwargs) + m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs) else: depthwise = kwargs.pop('depthwise', False) - groups = out_chs if depthwise else 1 + groups = out_channels if depthwise else kwargs.pop('groups', 1) if 'num_experts' in kwargs and kwargs['num_experts'] > 0: - m = CondConv2d(in_chs, out_chs, kernel_size, groups=groups, **kwargs) + m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs) else: - m = create_conv2d_pad(in_chs, out_chs, kernel_size, groups=groups, **kwargs) + m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs) return m diff --git a/timm/models/layers/create_norm_act.py b/timm/models/layers/create_norm_act.py index 251c0c17..7bdaa125 100644 --- a/timm/models/layers/create_norm_act.py +++ b/timm/models/layers/create_norm_act.py @@ -1,37 +1,64 @@ +import types +import functools + import torch import torch.nn as nn from .evo_norm import EvoNormBatch2d, EvoNormSample2d -from .norm_act import BatchNormAct2d -try: - from inplace_abn import InPlaceABN - has_iabn = True -except ImportError: - has_iabn = False +from .norm_act import BatchNormAct2d, GroupNormAct +from .inplace_abn import InplaceAbn +_NORM_ACT_TYPES = {BatchNormAct2d, GroupNormAct, EvoNormBatch2d, EvoNormSample2d, InplaceAbn} -def create_norm_act(layer_type, num_features, jit=False, **kwargs): - layer_parts = layer_type.split('_') - assert len(layer_parts) in (1, 2) - layer_class = layer_parts[0].lower() - #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else '' # FIXME support string act selection - - if layer_class == "batchnormact": - layer = BatchNormAct2d(num_features, **kwargs) # defaults to RELU of no kwargs override - elif layer_class == "batchnormrelu": - assert 'act_layer' not in kwargs - layer = BatchNormAct2d(num_features, act_layer=nn.ReLU, **kwargs) + +def get_norm_act_layer(layer_class): + layer_class = layer_class.replace('_', '').lower() + if layer_class.startswith("batchnorm"): + layer = BatchNormAct2d + elif layer_class.startswith("groupnorm"): + layer = GroupNormAct elif layer_class == "evonormbatch": - layer = EvoNormBatch2d(num_features, **kwargs) + layer = EvoNormBatch2d elif layer_class == "evonormsample": - layer = EvoNormSample2d(num_features, **kwargs) + layer = EvoNormSample2d elif layer_class == "iabn" or layer_class == "inplaceabn": - if not has_iabn: - raise ImportError( - "Pplease install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.11'") - layer = InPlaceABN(num_features, **kwargs) + layer = InplaceAbn else: assert False, "Invalid norm_act layer (%s)" % layer_class - if jit: - layer = torch.jit.script(layer) return layer + + +def create_norm_act(layer_type, num_features, apply_act=True, jit=False, **kwargs): + layer_parts = layer_type.split('-') # e.g. batchnorm-leaky_relu + assert len(layer_parts) in (1, 2) + layer = get_norm_act_layer(layer_parts[0]) + #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else '' # FIXME support string act selection? + layer_instance = layer(num_features, apply_act=apply_act, **kwargs) + if jit: + layer_instance = torch.jit.script(layer_instance) + return layer_instance + + +def convert_norm_act_type(norm_layer, act_layer, norm_kwargs=None): + assert isinstance(norm_layer, (type, str, types.FunctionType, functools.partial)) + assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, functools.partial)) + norm_act_args = norm_kwargs.copy() if norm_kwargs else {} + if isinstance(norm_layer, str): + norm_act_layer = get_norm_act_layer(norm_layer) + elif norm_layer in _NORM_ACT_TYPES: + norm_act_layer = norm_layer + elif isinstance(norm_layer, (types.FunctionType, functools.partial)): + # assuming this is a lambda/fn/bound partial that creates norm_act layer + norm_act_layer = norm_layer + else: + type_name = norm_layer.__name__.lower() + if type_name.startswith('batchnorm'): + norm_act_layer = BatchNormAct2d + elif type_name.startswith('groupnorm'): + norm_act_layer = GroupNormAct + else: + assert False, f"No equivalent norm_act layer for {type_name}" + # Must pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. + # Newer models will use `apply_act` and likely have `act_layer` arg bound to relevant NormAct types. + norm_act_args.update(dict(act_layer=act_layer)) + return norm_act_layer, norm_act_args diff --git a/timm/models/layers/drop.py b/timm/models/layers/drop.py index 5f2008c0..c91b969e 100644 --- a/timm/models/layers/drop.py +++ b/timm/models/layers/drop.py @@ -17,8 +17,6 @@ Hacked together by Ross Wightman import torch import torch.nn as nn import torch.nn.functional as F -import numpy as np -import math def drop_block_2d( diff --git a/timm/models/layers/evo_norm.py b/timm/models/layers/evo_norm.py index 62d49428..c7c00b80 100644 --- a/timm/models/layers/evo_norm.py +++ b/timm/models/layers/evo_norm.py @@ -2,9 +2,9 @@ An attempt at getting decent performing EvoNorms running in PyTorch. While currently faster than other impl, still quite a ways off the built-in BN -in terms of memory usage and throughput. +in terms of memory usage and throughput (roughly 5x mem, 1/2 - 1/3x speed). -Still very much a WIP, fiddling with buffer usage, in-place optimizations, and layouts. +Still very much a WIP, fiddling with buffer usage, in-place/jit optimizations, and layouts. Hacked together by Ross Wightman """ @@ -14,15 +14,15 @@ import torch.nn as nn class EvoNormBatch2d(nn.Module): - def __init__(self, num_features, momentum=0.1, nonlin=True, eps=1e-5): + def __init__(self, num_features, apply_act=True, momentum=0.1, eps=1e-5, drop_block=None): super(EvoNormBatch2d, self).__init__() + self.apply_act = apply_act # apply activation (non-linearity) self.momentum = momentum - self.nonlin = nonlin self.eps = eps param_shape = (1, num_features, 1, 1) self.weight = nn.Parameter(torch.ones(param_shape), requires_grad=True) self.bias = nn.Parameter(torch.zeros(param_shape), requires_grad=True) - if nonlin: + if apply_act: self.v = nn.Parameter(torch.ones(param_shape), requires_grad=True) self.register_buffer('running_var', torch.ones(1, num_features, 1, 1)) self.reset_parameters() @@ -30,7 +30,7 @@ class EvoNormBatch2d(nn.Module): def reset_parameters(self): nn.init.ones_(self.weight) nn.init.zeros_(self.bias) - if self.nonlin: + if self.apply_act: nn.init.ones_(self.v) def forward(self, x): @@ -40,46 +40,42 @@ class EvoNormBatch2d(nn.Module): var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True) self.running_var.copy_(self.momentum * var.detach() + (1 - self.momentum) * self.running_var) else: - var = self.running_var.clone() + var = self.running_var - if self.nonlin: + if self.apply_act: v = self.v.to(dtype=x_type) - d = (x * v) + x.var(dim=(2, 3), unbiased=False, keepdim=True).add_(self.eps).sqrt_().to(dtype=x_type) - d = d.max(var.add_(self.eps).sqrt_().to(dtype=x_type)) + d = (x * v) + (x.var(dim=(2, 3), unbiased=False, keepdim=True) + self.eps).sqrt().to(dtype=x_type) + d = d.max((var + self.eps).sqrt().to(dtype=x_type)) x = x / d - return x.mul_(self.weight).add_(self.bias) - else: - return x.mul(self.weight).add_(self.bias) + return x * self.weight + self.bias class EvoNormSample2d(nn.Module): - def __init__(self, num_features, nonlin=True, groups=8, eps=1e-5): + def __init__(self, num_features, apply_act=True, groups=8, eps=1e-5, drop_block=None): super(EvoNormSample2d, self).__init__() - self.nonlin = nonlin + self.apply_act = apply_act # apply activation (non-linearity) self.groups = groups self.eps = eps param_shape = (1, num_features, 1, 1) self.weight = nn.Parameter(torch.ones(param_shape), requires_grad=True) self.bias = nn.Parameter(torch.zeros(param_shape), requires_grad=True) - if nonlin: + if apply_act: self.v = nn.Parameter(torch.ones(param_shape), requires_grad=True) self.reset_parameters() def reset_parameters(self): nn.init.ones_(self.weight) nn.init.zeros_(self.bias) - if self.nonlin: + if self.apply_act: nn.init.ones_(self.v) def forward(self, x): assert x.dim() == 4, 'expected 4D input' B, C, H, W = x.shape assert C % self.groups == 0 - if self.nonlin: + if self.apply_act: n = (x * self.v).sigmoid().reshape(B, self.groups, -1) x = x.reshape(B, self.groups, -1) - x = n / x.var(dim=-1, unbiased=False, keepdim=True).add_(self.eps).sqrt_() + x = n / (x.var(dim=-1, unbiased=False, keepdim=True) + self.eps).sqrt() x = x.reshape(B, C, H, W) - return x.mul_(self.weight).add_(self.bias) - else: - return x.mul(self.weight).add_(self.bias) + return x * self.weight + self.bias diff --git a/timm/models/layers/inplace_abn.py b/timm/models/layers/inplace_abn.py new file mode 100644 index 00000000..d78079db --- /dev/null +++ b/timm/models/layers/inplace_abn.py @@ -0,0 +1,85 @@ +import torch +from torch import nn as nn + +try: + from inplace_abn.functions import inplace_abn, inplace_abn_sync + has_iabn = True +except ImportError: + has_iabn = False + + def inplace_abn(x, weight, bias, running_mean, running_var, + training=True, momentum=0.1, eps=1e-05, activation="leaky_relu", activation_param=0.01): + raise ImportError( + "Please install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.11'") + + def inplace_abn_sync(**kwargs): + inplace_abn(**kwargs) + + +class InplaceAbn(nn.Module): + """Activated Batch Normalization + + This gathers a BatchNorm and an activation function in a single module + + Parameters + ---------- + num_features : int + Number of feature channels in the input and output. + eps : float + Small constant to prevent numerical issues. + momentum : float + Momentum factor applied to compute running statistics. + affine : bool + If `True` apply learned scale and shift transformation after normalization. + act_layer : str or nn.Module type + Name or type of the activation functions, one of: `leaky_relu`, `elu` + act_param : float + Negative slope for the `leaky_relu` activation. + """ + + def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, apply_act=True, + act_layer="leaky_relu", act_param=0.01, drop_block=None,): + super(InplaceAbn, self).__init__() + self.num_features = num_features + self.affine = affine + self.eps = eps + self.momentum = momentum + if apply_act: + if isinstance(act_layer, str): + assert act_layer in ('leaky_relu', 'elu', 'identity') + self.act_name = act_layer + else: + # convert act layer passed as type to string + if isinstance(act_layer, nn.ELU): + self.act_name = 'elu' + elif isinstance(act_layer, nn.LeakyReLU): + self.act_name = 'leaky_relu' + else: + assert False, f'Invalid act layer {act_layer.__name__} for IABN' + else: + self.act_name = 'identity' + self.act_param = act_param + if self.affine: + self.weight = nn.Parameter(torch.ones(num_features)) + self.bias = nn.Parameter(torch.zeros(num_features)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + self.register_buffer('running_mean', torch.zeros(num_features)) + self.register_buffer('running_var', torch.ones(num_features)) + self.reset_parameters() + + def reset_parameters(self): + nn.init.constant_(self.running_mean, 0) + nn.init.constant_(self.running_var, 1) + if self.affine: + nn.init.constant_(self.weight, 1) + nn.init.constant_(self.bias, 0) + + def forward(self, x): + output = inplace_abn( + x, self.weight, self.bias, self.running_mean, self.running_var, + self.training, self.momentum, self.eps, self.act_name, self.act_param) + if isinstance(output, tuple): + output = output[0] + return output diff --git a/timm/models/layers/norm_act.py b/timm/models/layers/norm_act.py index 879a8939..48c4d6da 100644 --- a/timm/models/layers/norm_act.py +++ b/timm/models/layers/norm_act.py @@ -1,28 +1,33 @@ """ Normalization + Activation Layers """ +import torch from torch import nn as nn from torch.nn import functional as F +from .create_act import get_act_layer + class BatchNormAct2d(nn.BatchNorm2d): """BatchNorm + Activation - This module performs BatchNorm + Actibation in s manner that will remain bavkwards + This module performs BatchNorm + Activation in a manner that will remain backwards compatible with weights trained with separate bn, act. This is why we inherit from BN instead of composing it as a .bn member. """ - def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, - track_running_stats=True, act_layer=nn.ReLU, inplace=True): - super(BatchNormAct2d, self).__init__(num_features, eps, momentum, affine, track_running_stats) - self.act = act_layer(inplace=inplace) - - def forward(self, x): - # FIXME cannot call parent forward() and maintain jit.script compatibility? - # x = super(BatchNormAct2d, self).forward(x) - - # BEGIN nn.BatchNorm2d forward() cut & paste - # self._check_input_dim(x) + def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, + apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): + super(BatchNormAct2d, self).__init__( + num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats) + if isinstance(act_layer, str): + act_layer = get_act_layer(act_layer) + if act_layer is not None and apply_act: + self.act = act_layer(inplace=inplace) + else: + self.act = None + def _forward_jit(self, x): + """ A cut & paste of the contents of the PyTorch BatchNorm2d forward function + """ # exponential_average_factor is self.momentum set to # (when it is available) only so that if gets updated # in ONNX graph when this node is exported to ONNX. @@ -41,10 +46,40 @@ class BatchNormAct2d(nn.BatchNorm2d): exponential_average_factor = self.momentum x = F.batch_norm( - x, self.running_mean, self.running_var, self.weight, self.bias, - self.training or not self.track_running_stats, - exponential_average_factor, self.eps) - # END BatchNorm2d forward() + x, self.running_mean, self.running_var, self.weight, self.bias, + self.training or not self.track_running_stats, + exponential_average_factor, self.eps) + return x + + @torch.jit.ignore + def _forward_python(self, x): + return super(BatchNormAct2d, self).forward(x) + + def forward(self, x): + # FIXME cannot call parent forward() and maintain jit.script compatibility? + if torch.jit.is_scripting(): + x = self._forward_jit(x) + else: + self._forward_python(x) + if self.act is not None: + x = self.act(x) + return x + - x = self.act(x) +class GroupNormAct(nn.GroupNorm): + + def __init__(self, num_groups, num_channels, eps=1e-5, affine=True, + apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): + super(GroupNormAct, self).__init__(num_groups, num_channels, eps=eps, affine=affine) + if isinstance(act_layer, str): + act_layer = get_act_layer(act_layer) + if act_layer is not None and apply_act: + self.act = act_layer(inplace=inplace) + else: + self.act = None + + def forward(self, x): + x = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) + if self.act is not None: + x = self.act(x) return x diff --git a/timm/models/layers/pool2d_same.py b/timm/models/layers/pool2d_same.py index 40f6dacc..7135f831 100644 --- a/timm/models/layers/pool2d_same.py +++ b/timm/models/layers/pool2d_same.py @@ -6,7 +6,6 @@ import torch import torch.nn as nn import torch.nn.functional as F from typing import Union, List, Tuple, Optional -import math from .helpers import tup_pair from .padding import pad_same, get_padding_value diff --git a/timm/models/layers/se.py b/timm/models/layers/se.py index 6bb4723e..83389fc5 100644 --- a/timm/models/layers/se.py +++ b/timm/models/layers/se.py @@ -1,9 +1,11 @@ from torch import nn as nn +from .create_act import get_act_fn class SEModule(nn.Module): - def __init__(self, channels, reduction=16, act_layer=nn.ReLU, min_channels=8, reduction_channels=None): + def __init__(self, channels, reduction=16, act_layer=nn.ReLU, min_channels=8, reduction_channels=None, + gate_fn='hard_sigmoid'): super(SEModule, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) reduction_channels = reduction_channels or max(channels // reduction, min_channels) @@ -12,10 +14,27 @@ class SEModule(nn.Module): self.act = act_layer(inplace=True) self.fc2 = nn.Conv2d( reduction_channels, channels, kernel_size=1, padding=0, bias=True) + self.gate_fn = get_act_fn(gate_fn) def forward(self, x): x_se = self.avg_pool(x) x_se = self.fc1(x_se) x_se = self.act(x_se) x_se = self.fc2(x_se) - return x * x_se.sigmoid() + return x * self.gate_fn(x_se) + + +class EffectiveSEModule(nn.Module): + """ 'Effective Squeeze-Excitation + From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 + """ + def __init__(self, channel, gate_fn='hard_sigmoid'): + super(EffectiveSEModule, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0) + self.gate_fn = get_act_fn(gate_fn) + + def forward(self, x): + x_se = self.avg_pool(x) + x_se = self.fc(x_se) + return x * self.gate_fn(x_se, inplace=True) diff --git a/timm/models/layers/selective_kernel.py b/timm/models/layers/selective_kernel.py index ed9132de..e7535f71 100644 --- a/timm/models/layers/selective_kernel.py +++ b/timm/models/layers/selective_kernel.py @@ -4,7 +4,6 @@ Paper: Selective Kernel Networks (https://arxiv.org/abs/1903.06586) Hacked together by Ross Wightman """ - import torch from torch import nn as nn diff --git a/timm/models/layers/separable_conv.py b/timm/models/layers/separable_conv.py new file mode 100644 index 00000000..3df0387a --- /dev/null +++ b/timm/models/layers/separable_conv.py @@ -0,0 +1,51 @@ +from torch import nn as nn + +from .create_conv2d import create_conv2d +from .create_norm_act import convert_norm_act_type + + +class SeparableConvBnAct(nn.Module): + """ Separable Conv w/ trailing Norm and Activation + """ + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, + channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, norm_kwargs=None, + act_layer=nn.ReLU, apply_act=True, drop_block=None): + super(SeparableConvBnAct, self).__init__() + norm_kwargs = norm_kwargs or {} + + self.conv_dw = create_conv2d( + in_channels, int(in_channels * channel_multiplier), kernel_size, + stride=stride, dilation=dilation, padding=padding, depthwise=True) + + self.conv_pw = create_conv2d( + int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) + + norm_act_layer, norm_act_args = convert_norm_act_type(norm_layer, act_layer, norm_kwargs) + self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block, **norm_act_args) + + def forward(self, x): + x = self.conv_dw(x) + x = self.conv_pw(x) + if self.bn is not None: + x = self.bn(x) + return x + + +class SeparableConv2d(nn.Module): + """ Separable Conv + """ + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, + channel_multiplier=1.0, pw_kernel_size=1): + super(SeparableConv2d, self).__init__() + + self.conv_dw = create_conv2d( + in_channels, int(in_channels * channel_multiplier), kernel_size, + stride=stride, dilation=dilation, padding=padding, depthwise=True) + + self.conv_pw = create_conv2d( + int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) + + def forward(self, x): + x = self.conv_dw(x) + x = self.conv_pw(x) + return x diff --git a/timm/models/layers/test_time_pool.py b/timm/models/layers/test_time_pool.py index dcfc66ca..b2f3d2c3 100644 --- a/timm/models/layers/test_time_pool.py +++ b/timm/models/layers/test_time_pool.py @@ -6,6 +6,7 @@ Hacked together by Ross Wightman import logging from torch import nn import torch.nn.functional as F + from .adaptive_avgmax_pool import adaptive_avgmax_pool2d diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py index 8daebdf0..e1a700b0 100644 --- a/timm/models/mobilenetv3.py +++ b/timm/models/mobilenetv3.py @@ -7,13 +7,15 @@ Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244 Hacked together by Ross Wightman """ +import torch.nn as nn +import torch.nn.functional as F from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD -from .efficientnet_builder import * +from .efficientnet_blocks import round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT +from .efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights from .feature_hooks import FeatureHooks from .helpers import load_pretrained -from .layers import SelectAdaptivePool2d, create_conv2d -from .layers.activations import HardSwish, hard_sigmoid +from .layers import SelectAdaptivePool2d, create_conv2d, get_act_fn, hard_sigmoid from .registry import register_model __all__ = ['MobileNetV3'] @@ -273,8 +275,8 @@ def _gen_mobilenet_v3_rw(variant, channel_multiplier=1.0, pretrained=False, **kw head_bias=False, channel_multiplier=channel_multiplier, norm_kwargs=resolve_bn_args(kwargs), - act_layer=HardSwish, - se_kwargs=dict(gate_fn=hard_sigmoid, reduce_mid=True, divisor=1), + act_layer=resolve_act_layer(kwargs, 'hard_swish'), + se_kwargs=dict(gate_fn=get_act_fn('hard_sigmoid'), reduce_mid=True, divisor=1), **kwargs, ) model = _create_model(model_kwargs, default_cfgs[variant], pretrained) @@ -293,7 +295,7 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg if 'small' in variant: num_features = 1024 if 'minimal' in variant: - act_layer = nn.ReLU + act_layer = resolve_act_layer(kwargs, 'relu') arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s2_e1_c16'], @@ -309,7 +311,7 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg ['cn_r1_k1_s1_c576'], ] else: - act_layer = HardSwish + act_layer = resolve_act_layer(kwargs, 'hard_swish') arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s2_e1_c16_se0.25_nre'], # relu @@ -327,7 +329,7 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg else: num_features = 1280 if 'minimal' in variant: - act_layer = nn.ReLU + act_layer = resolve_act_layer(kwargs, 'relu') arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16'], @@ -345,7 +347,7 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg ['cn_r1_k1_s1_c960'], ] else: - act_layer = HardSwish + act_layer = resolve_act_layer(kwargs, 'hard_swish') arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_nre'], # relu diff --git a/timm/models/pnasnet.py b/timm/models/pnasnet.py index 97c2f86d..56614bd6 100644 --- a/timm/models/pnasnet.py +++ b/timm/models/pnasnet.py @@ -43,11 +43,12 @@ class MaxPool(nn.Module): self.pool = nn.MaxPool2d(kernel_size, stride=stride, padding=padding) def forward(self, x): - if self.zero_pad: + if self.zero_pad is not None: x = self.zero_pad(x) - x = self.pool(x) - if self.zero_pad: + x = self.pool(x) x = x[:, :, 1:, 1:] + else: + x = self.pool(x) return x @@ -90,11 +91,12 @@ class BranchSeparables(nn.Module): def forward(self, x): x = self.relu_1(x) - if self.zero_pad: + if self.zero_pad is not None: x = self.zero_pad(x) - x = self.separable_1(x) - if self.zero_pad: + x = self.separable_1(x) x = x[:, :, 1:, 1:].contiguous() + else: + x = self.separable_1(x) x = self.bn_sep_1(x) x = self.relu_2(x) x = self.separable_2(x) @@ -171,15 +173,14 @@ class CellBase(nn.Module): x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right x_comb_iter_4_left = self.comb_iter_4_left(x_left) - if self.comb_iter_4_right: + if self.comb_iter_4_right is not None: x_comb_iter_4_right = self.comb_iter_4_right(x_right) else: x_comb_iter_4_right = x_right x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right x_out = torch.cat( - [x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, - x_comb_iter_4], 1) + [x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1) return x_out @@ -280,9 +281,8 @@ class Cell(CellBase): kernel_size=3, stride=stride, zero_pad=zero_pad) if is_reduction: - self.comb_iter_4_right = ReluConvBn(out_channels_right, - out_channels_right, - kernel_size=1, stride=stride) + self.comb_iter_4_right = ReluConvBn( + out_channels_right, out_channels_right, kernel_size=1, stride=stride) else: self.comb_iter_4_right = None diff --git a/timm/models/res2net.py b/timm/models/res2net.py index 3e3882fe..b095de30 100644 --- a/timm/models/res2net.py +++ b/timm/models/res2net.py @@ -77,6 +77,8 @@ class Bottle2neck(nn.Module): if self.is_first: # FIXME this should probably have count_include_pad=False, but hurts original weights self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1) + else: + self.pool = None self.conv3 = nn.Conv2d(width * scale, outplanes, kernel_size=1, bias=False) self.bn3 = norm_layer(outplanes) @@ -97,14 +99,22 @@ class Bottle2neck(nn.Module): spx = torch.split(out, self.width, 1) spo = [] + sp = spx[0] for i, (conv, bn) in enumerate(zip(self.convs, self.bns)): - sp = spx[i] if i == 0 or self.is_first else sp + spx[i] + if self.is_first: + sp = spx[i] + else: + sp = sp + spx[i] sp = conv(sp) sp = bn(sp) sp = self.relu(sp) spo.append(sp) if self.scale > 1: - spo.append(self.pool(spx[-1]) if self.is_first else spx[-1]) + if self.pool is not None: + # self.is_first == True, None check for torchscript + spo.append(self.pool(spx[-1])) + else: + spo.append(spx[-1]) out = torch.cat(spo, 1) out = self.conv3(out) diff --git a/timm/models/resnet.py b/timm/models/resnet.py index 430bbb49..8750c5bd 100644 --- a/timm/models/resnet.py +++ b/timm/models/resnet.py @@ -200,7 +200,6 @@ class BasicBlock(nn.Module): class Bottleneck(nn.Module): - __constants__ = ['se', 'downsample'] # for pre 1.4 torchscript compat expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, cardinality=1, base_width=64, diff --git a/timm/models/selecsls.py b/timm/models/selecsls.py index 77bdd2c9..b7573086 100644 --- a/timm/models/selecsls.py +++ b/timm/models/selecsls.py @@ -9,6 +9,7 @@ https://arxiv.org/abs/1907.00837 Based on ResNet implementation in https://github.com/rwightman/pytorch-image-models and SelecSLS Net implementation in https://github.com/mehtadushy/SelecSLS-Pytorch """ +from typing import List import torch import torch.nn as nn @@ -52,6 +53,27 @@ default_cfgs = { } +class SequentialList(nn.Sequential): + + def __init__(self, *args): + super(SequentialList, self).__init__(*args) + + @torch.jit._overload_method # noqa: F811 + def forward(self, x): + # type: (List[torch.Tensor]) -> (List[torch.Tensor]) + pass + + @torch.jit._overload_method # noqa: F811 + def forward(self, x): + # type: (torch.Tensor) -> (List[torch.Tensor]) + pass + + def forward(self, x) -> List[torch.Tensor]: + for module in self: + x = module(x) + return x + + def conv_bn(in_chs, out_chs, k=3, stride=1, padding=None, dilation=1): if padding is None: padding = ((stride - 1) + dilation * (k - 1)) // 2 @@ -77,7 +99,7 @@ class SelecSLSBlock(nn.Module): self.conv5 = conv_bn(mid_chs, mid_chs // 2, 3) self.conv6 = conv_bn(2 * mid_chs + (0 if is_first else skip_chs), out_chs, 1) - def forward(self, x): + def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]: assert isinstance(x, list) assert len(x) in [1, 2] @@ -113,7 +135,7 @@ class SelecSLS(nn.Module): super(SelecSLS, self).__init__() self.stem = conv_bn(in_chans, 32, stride=2) - self.features = nn.Sequential(*[cfg['block'](*block_args) for block_args in cfg['features']]) + self.features = SequentialList(*[cfg['block'](*block_args) for block_args in cfg['features']]) self.head = nn.Sequential(*[conv_bn(*conv_args) for conv_args in cfg['head']]) self.num_features = cfg['num_features'] diff --git a/timm/models/tresnet.py b/timm/models/tresnet.py index 55a6e195..a4274b2f 100644 --- a/timm/models/tresnet.py +++ b/timm/models/tresnet.py @@ -13,15 +13,9 @@ import torch.nn as nn import torch.nn.functional as F from .helpers import load_pretrained -from .layers import SpaceToDepthModule, AntiAliasDownsampleLayer, SelectAdaptivePool2d +from .layers import SpaceToDepthModule, AntiAliasDownsampleLayer, SelectAdaptivePool2d, InplaceAbn from .registry import register_model -try: - from inplace_abn import InPlaceABN - has_iabn = True -except ImportError: - has_iabn = False - __all__ = ['tresnet_m', 'tresnet_l', 'tresnet_xl'] @@ -91,37 +85,37 @@ class FastSEModule(nn.Module): def IABN2Float(module: nn.Module) -> nn.Module: """If `module` is IABN don't use half precision.""" - if isinstance(module, InPlaceABN): + if isinstance(module, InplaceAbn): module.float() for child in module.children(): IABN2Float(child) return module -def conv2d_ABN(ni, nf, stride, activation="leaky_relu", kernel_size=3, activation_param=1e-2, groups=1): +def conv2d_iabn(ni, nf, stride, kernel_size=3, groups=1, act_layer="leaky_relu", act_param=1e-2): return nn.Sequential( nn.Conv2d( ni, nf, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=groups, bias=False), - InPlaceABN(num_features=nf, activation=activation, activation_param=activation_param) + InplaceAbn(nf, act_layer=act_layer, act_param=act_param) ) class BasicBlock(nn.Module): expansion = 1 - def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, anti_alias_layer=None): + def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, aa_layer=None): super(BasicBlock, self).__init__() if stride == 1: - self.conv1 = conv2d_ABN(inplanes, planes, stride=1, activation_param=1e-3) + self.conv1 = conv2d_iabn(inplanes, planes, stride=1, act_param=1e-3) else: - if anti_alias_layer is None: - self.conv1 = conv2d_ABN(inplanes, planes, stride=2, activation_param=1e-3) + if aa_layer is None: + self.conv1 = conv2d_iabn(inplanes, planes, stride=2, act_param=1e-3) else: self.conv1 = nn.Sequential( - conv2d_ABN(inplanes, planes, stride=1, activation_param=1e-3), - anti_alias_layer(channels=planes, filt_size=3, stride=2)) + conv2d_iabn(inplanes, planes, stride=1, act_param=1e-3), + aa_layer(channels=planes, filt_size=3, stride=2)) - self.conv2 = conv2d_ABN(planes, planes, stride=1, activation="identity") + self.conv2 = conv2d_iabn(planes, planes, stride=1, act_layer="identity") self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride @@ -148,24 +142,25 @@ class BasicBlock(nn.Module): class Bottleneck(nn.Module): expansion = 4 - def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, anti_alias_layer=None): + def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, + act_layer="leaky_relu", aa_layer=None): super(Bottleneck, self).__init__() - self.conv1 = conv2d_ABN( - inplanes, planes, kernel_size=1, stride=1, activation="leaky_relu", activation_param=1e-3) + self.conv1 = conv2d_iabn( + inplanes, planes, kernel_size=1, stride=1, act_layer=act_layer, act_param=1e-3) if stride == 1: - self.conv2 = conv2d_ABN( - planes, planes, kernel_size=3, stride=1, activation="leaky_relu", activation_param=1e-3) + self.conv2 = conv2d_iabn( + planes, planes, kernel_size=3, stride=1, act_layer=act_layer, act_param=1e-3) else: - if anti_alias_layer is None: - self.conv2 = conv2d_ABN( - planes, planes, kernel_size=3, stride=2, activation="leaky_relu", activation_param=1e-3) + if aa_layer is None: + self.conv2 = conv2d_iabn( + planes, planes, kernel_size=3, stride=2, act_layer=act_layer, act_param=1e-3) else: self.conv2 = nn.Sequential( - conv2d_ABN(planes, planes, kernel_size=3, stride=1, activation="leaky_relu", activation_param=1e-3), - anti_alias_layer(channels=planes, filt_size=3, stride=2)) + conv2d_iabn(planes, planes, kernel_size=3, stride=1, act_layer=act_layer, act_param=1e-3), + aa_layer(channels=planes, filt_size=3, stride=2)) - self.conv3 = conv2d_ABN( - planes, planes * self.expansion, kernel_size=1, stride=1, activation="identity") + self.conv3 = conv2d_iabn( + planes, planes * self.expansion, kernel_size=1, stride=1, act_layer="identity") self.relu = nn.ReLU(inplace=True) self.downsample = downsample @@ -195,30 +190,26 @@ class Bottleneck(nn.Module): class TResNet(nn.Module): def __init__(self, layers, in_chans=3, num_classes=1000, width_factor=1.0, no_aa_jit=False, global_pool='avg', drop_rate=0.): - if not has_iabn: - raise ImportError( - "For TResNet models, please install InplaceABN: " - "'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.11'") self.num_classes = num_classes self.drop_rate = drop_rate super(TResNet, self).__init__() # JIT layers space_to_depth = SpaceToDepthModule() - anti_alias_layer = partial(AntiAliasDownsampleLayer, no_jit=no_aa_jit) + aa_layer = partial(AntiAliasDownsampleLayer, no_jit=no_aa_jit) # TResnet stages self.inplanes = int(64 * width_factor) self.planes = int(64 * width_factor) - conv1 = conv2d_ABN(in_chans * 16, self.planes, stride=1, kernel_size=3) + conv1 = conv2d_iabn(in_chans * 16, self.planes, stride=1, kernel_size=3) layer1 = self._make_layer( - BasicBlock, self.planes, layers[0], stride=1, use_se=True, anti_alias_layer=anti_alias_layer) # 56x56 + BasicBlock, self.planes, layers[0], stride=1, use_se=True, aa_layer=aa_layer) # 56x56 layer2 = self._make_layer( - BasicBlock, self.planes * 2, layers[1], stride=2, use_se=True, anti_alias_layer=anti_alias_layer) # 28x28 + BasicBlock, self.planes * 2, layers[1], stride=2, use_se=True, aa_layer=aa_layer) # 28x28 layer3 = self._make_layer( - Bottleneck, self.planes * 4, layers[2], stride=2, use_se=True, anti_alias_layer=anti_alias_layer) # 14x14 + Bottleneck, self.planes * 4, layers[2], stride=2, use_se=True, aa_layer=aa_layer) # 14x14 layer4 = self._make_layer( - Bottleneck, self.planes * 8, layers[3], stride=2, use_se=False, anti_alias_layer=anti_alias_layer) # 7x7 + Bottleneck, self.planes * 8, layers[3], stride=2, use_se=False, aa_layer=aa_layer) # 7x7 # body self.body = nn.Sequential(OrderedDict([ @@ -239,7 +230,7 @@ class TResNet(nn.Module): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu') - elif isinstance(m, nn.BatchNorm2d) or isinstance(m, InPlaceABN): + elif isinstance(m, nn.BatchNorm2d) or isinstance(m, InplaceAbn): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) @@ -251,24 +242,24 @@ class TResNet(nn.Module): m.conv3[1].weight = nn.Parameter(torch.zeros_like(m.conv3[1].weight)) # BN to zero if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) - def _make_layer(self, block, planes, blocks, stride=1, use_se=True, anti_alias_layer=None): + def _make_layer(self, block, planes, blocks, stride=1, use_se=True, aa_layer=None): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: layers = [] if stride == 2: # avg pooling before 1x1 conv layers.append(nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True, count_include_pad=False)) - layers += [conv2d_ABN( - self.inplanes, planes * block.expansion, kernel_size=1, stride=1, activation="identity")] + layers += [conv2d_iabn( + self.inplanes, planes * block.expansion, kernel_size=1, stride=1, act_layer="identity")] downsample = nn.Sequential(*layers) layers = [] layers.append(block( - self.inplanes, planes, stride, downsample, use_se=use_se, anti_alias_layer=anti_alias_layer)) + self.inplanes, planes, stride, downsample, use_se=use_se, aa_layer=aa_layer)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append( - block(self.inplanes, planes, use_se=use_se, anti_alias_layer=anti_alias_layer)) + block(self.inplanes, planes, use_se=use_se, aa_layer=aa_layer)) return nn.Sequential(*layers) def get_classifier(self): diff --git a/timm/models/vovnet.py b/timm/models/vovnet.py new file mode 100644 index 00000000..bedff10c --- /dev/null +++ b/timm/models/vovnet.py @@ -0,0 +1,408 @@ +""" VoVNet (V1 & V2) + +Papers: +* `An Energy and GPU-Computation Efficient Backbone Network` - https://arxiv.org/abs/1904.09730 +* `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 + +Looked at https://github.com/youngwanLEE/vovnet-detectron2 & +https://github.com/stigma0617/VoVNet.pytorch/blob/master/models_vovnet/vovnet.py +for some reference, rewrote most of the code. + +Hacked together by Ross Wightman +""" + +from typing import List + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from .registry import register_model +from .helpers import load_pretrained +from .layers import ConvBnAct, SeparableConvBnAct, BatchNormAct2d, SelectAdaptivePool2d, \ + create_attn, create_norm_act, get_norm_act_layer + + +# model cfgs adapted from https://github.com/youngwanLEE/vovnet-detectron2 & +# https://github.com/stigma0617/VoVNet.pytorch/blob/master/models_vovnet/vovnet.py +model_cfgs = dict( + vovnet39a=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=5, + block_per_stage=[1, 1, 2, 2], + residual=False, + depthwise=False, + attn='', + ), + vovnet57a=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=5, + block_per_stage=[1, 1, 4, 3], + residual=False, + depthwise=False, + attn='', + + ), + ese_vovnet19b_slim_dw=dict( + stem_ch=[64, 64, 64], + stage_conv_ch=[64, 80, 96, 112], + stage_out_ch=[112, 256, 384, 512], + layer_per_block=3, + block_per_stage=[1, 1, 1, 1], + residual=True, + depthwise=True, + attn='ese', + + ), + ese_vovnet19b_dw=dict( + stem_ch=[64, 64, 64], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=3, + block_per_stage=[1, 1, 1, 1], + residual=True, + depthwise=True, + attn='ese', + ), + ese_vovnet19b_slim=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[64, 80, 96, 112], + stage_out_ch=[112, 256, 384, 512], + layer_per_block=3, + block_per_stage=[1, 1, 1, 1], + residual=True, + depthwise=False, + attn='ese', + ), + ese_vovnet19b=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=3, + block_per_stage=[1, 1, 1, 1], + residual=True, + depthwise=False, + attn='ese', + + ), + ese_vovnet39b=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=5, + block_per_stage=[1, 1, 2, 2], + residual=True, + depthwise=False, + attn='ese', + ), + ese_vovnet57b=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=5, + block_per_stage=[1, 1, 4, 3], + residual=True, + depthwise=False, + attn='ese', + + ), + ese_vovnet99b=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=5, + block_per_stage=[1, 3, 9, 3], + residual=True, + depthwise=False, + attn='ese', + ), + eca_vovnet39b=dict( + stem_ch=[64, 64, 128], + stage_conv_ch=[128, 160, 192, 224], + stage_out_ch=[256, 512, 768, 1024], + layer_per_block=5, + block_per_stage=[1, 1, 2, 2], + residual=True, + depthwise=False, + attn='eca', + ), +) + + +def _cfg(url=''): + return { + 'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7), + 'crop_pct': 0.875, 'interpolation': 'bicubic', + 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, + 'first_conv': 'stem.0.conv', 'classifier': 'head.fc', + } + + +default_cfgs = dict( + vovnet39a=_cfg(url=''), + vovnet57a=_cfg(url=''), + ese_vovnet19b_slim_dw=_cfg(url=''), + ese_vovnet19b_dw=_cfg(url=''), + ese_vovnet19b_slim=_cfg(url=''), + ese_vovnet39b=_cfg(url=''), + ese_vovnet57b=_cfg(url=''), + ese_vovnet99b=_cfg(url=''), + eca_vovnet39b=_cfg(url=''), +) + + +class SequentialAppendList(nn.Sequential): + def __init__(self, *args): + super(SequentialAppendList, self).__init__(*args) + + def forward(self, x: torch.Tensor, concat_list: List[torch.Tensor]) -> torch.Tensor: + for i, module in enumerate(self): + if i == 0: + concat_list.append(module(x)) + else: + concat_list.append(module(concat_list[-1])) + x = torch.cat(concat_list, dim=1) + return x + + +class OsaBlock(nn.Module): + + def __init__(self, in_chs, mid_chs, out_chs, layer_per_block, residual=False, + depthwise=False, attn='', norm_layer=BatchNormAct2d): + super(OsaBlock, self).__init__() + + self.residual = residual + self.depthwise = depthwise + + next_in_chs = in_chs + if self.depthwise and next_in_chs != mid_chs: + assert not residual + self.conv_reduction = ConvBnAct(next_in_chs, mid_chs, 1, norm_layer=norm_layer) + else: + self.conv_reduction = None + + mid_convs = [] + for i in range(layer_per_block): + if self.depthwise: + conv = SeparableConvBnAct(mid_chs, mid_chs, norm_layer=norm_layer) + else: + conv = ConvBnAct(next_in_chs, mid_chs, 3, norm_layer=norm_layer) + next_in_chs = mid_chs + mid_convs.append(conv) + self.conv_mid = SequentialAppendList(*mid_convs) + + # feature aggregation + next_in_chs = in_chs + layer_per_block * mid_chs + self.conv_concat = ConvBnAct(next_in_chs, out_chs, norm_layer=norm_layer) + + if attn: + self.attn = create_attn(attn, out_chs) + else: + self.attn = None + + def forward(self, x): + output = [x] + if self.conv_reduction is not None: + x = self.conv_reduction(x) + x = self.conv_mid(x, output) + x = self.conv_concat(x) + if self.attn is not None: + x = self.attn(x) + if self.residual: + x = x + output[0] + return x + + +class OsaStage(nn.Module): + + def __init__(self, in_chs, mid_chs, out_chs, block_per_stage, layer_per_block, + downsample=True, residual=True, depthwise=False, attn='ese', norm_layer=BatchNormAct2d): + super(OsaStage, self).__init__() + + if downsample: + self.pool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) + else: + self.pool = None + + blocks = [] + for i in range(block_per_stage): + last_block = i == block_per_stage - 1 + blocks += [OsaBlock( + in_chs if i == 0 else out_chs, mid_chs, out_chs, layer_per_block, residual=residual and i > 0, + depthwise=depthwise, attn=attn if last_block else '', norm_layer=norm_layer) + ] + self.blocks = nn.Sequential(*blocks) + + def forward(self, x): + if self.pool is not None: + x = self.pool(x) + x = self.blocks(x) + return x + + +class ClassifierHead(nn.Module): + """Head.""" + + def __init__(self, in_chs, num_classes, pool_type='avg', drop_rate=0.): + super(ClassifierHead, self).__init__() + self.drop_rate = drop_rate + self.global_pool = SelectAdaptivePool2d(pool_type=pool_type) + if num_classes > 0: + self.fc = nn.Linear(in_chs, num_classes, bias=True) + else: + self.fc = nn.Identity() + + def forward(self, x): + x = self.global_pool(x).flatten(1) + if self.drop_rate: + x = F.dropout(x, p=float(self.drop_rate), training=self.training) + x = self.fc(x) + return x + + +class VovNet(nn.Module): + + def __init__(self, cfg, in_chans=3, num_classes=1000, global_pool='avg', drop_rate=0., stem_stride=4, + norm_layer=BatchNormAct2d): + """ VovNet (v2) + """ + super(VovNet, self).__init__() + self.num_classes = num_classes + self.drop_rate = drop_rate + assert stem_stride in (4, 2) + + stem_ch = cfg["stem_ch"] + stage_conv_ch = cfg["stage_conv_ch"] + stage_out_ch = cfg["stage_out_ch"] + block_per_stage = cfg["block_per_stage"] + layer_per_block = cfg["layer_per_block"] + + # Stem module + last_stem_stride = stem_stride // 2 + conv_type = SeparableConvBnAct if cfg["depthwise"] else ConvBnAct + self.stem = nn.Sequential(*[ + ConvBnAct(in_chans, stem_ch[0], 3, stride=2, norm_layer=norm_layer), + conv_type(stem_ch[0], stem_ch[1], 3, stride=1, norm_layer=norm_layer), + conv_type(stem_ch[1], stem_ch[2], 3, stride=last_stem_stride, norm_layer=norm_layer), + ]) + + # OSA stages + in_ch_list = stem_ch[-1:] + stage_out_ch[:-1] + stage_args = dict( + residual=cfg["residual"], depthwise=cfg["depthwise"], attn=cfg["attn"], norm_layer=norm_layer) + stages = [] + for i in range(4): # num_stages + downsample = stem_stride == 2 or i > 0 # first stage has no stride/downsample if stem_stride is 4 + stages += [OsaStage( + in_ch_list[i], stage_conv_ch[i], stage_out_ch[i], block_per_stage[i], layer_per_block, + downsample=downsample, **stage_args) + ] + self.num_features = stage_out_ch[i] + self.stages = nn.Sequential(*stages) + + self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=drop_rate) + + for n, m in self.named_modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1.) + nn.init.constant_(m.bias, 0.) + + def get_classifier(self): + return self.head.fc + + def reset_classifier(self, num_classes, global_pool='avg'): + self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=self.drop_rate) + + def forward_features(self, x): + x = self.stem(x) + return self.stages(x) + + def forward(self, x): + x = self.forward_features(x) + return self.head(x) + + +def _vovnet(variant, pretrained=False, **kwargs): + load_strict = True + model_class = VovNet + if kwargs.pop('features_only', False): + assert False, 'Not Implemented' # TODO + load_strict = False + kwargs.pop('num_classes', 0) + model_cfg = model_cfgs[variant] + default_cfg = default_cfgs[variant] + model = model_class(model_cfg, **kwargs) + model.default_cfg = default_cfg + if pretrained: + load_pretrained( + model, default_cfg, + num_classes=kwargs.get('num_classes', 0), in_chans=kwargs.get('in_chans', 3), strict=load_strict) + return model + + + +@register_model +def vovnet39a(pretrained=False, **kwargs): + return _vovnet('vovnet39a', pretrained=pretrained, **kwargs) + + +@register_model +def vovnet57a(pretrained=False, **kwargs): + return _vovnet('vovnet57a', pretrained=pretrained, **kwargs) + + +@register_model +def ese_vovnet19b_slim_dw(pretrained=False, **kwargs): + return _vovnet('ese_vovnet19b_slim_dw', pretrained=pretrained, **kwargs) + + +@register_model +def ese_vovnet19b_dw(pretrained=False, **kwargs): + return _vovnet('ese_vovnet19b_dw', pretrained=pretrained, **kwargs) + + +@register_model +def ese_vovnet19b_slim(pretrained=False, **kwargs): + return _vovnet('ese_vovnet19b_slim', pretrained=pretrained, **kwargs) + + +@register_model +def ese_vovnet39b(pretrained=False, **kwargs): + return _vovnet('ese_vovnet39b', pretrained=pretrained, **kwargs) + + +@register_model +def ese_vovnet57b(pretrained=False, **kwargs): + return _vovnet('ese_vovnet57b', pretrained=pretrained, **kwargs) + + +@register_model +def ese_vovnet99b(pretrained=False, **kwargs): + return _vovnet('ese_vovnet99b', pretrained=pretrained, **kwargs) + + +@register_model +def eca_vovnet39b(pretrained=False, **kwargs): + return _vovnet('eca_vovnet39b', pretrained=pretrained, **kwargs) + + +# Experimental Models + +@register_model +def ese_vovnet39b_iabn(pretrained=False, **kwargs): + norm_layer = get_norm_act_layer('iabn') + return _vovnet('ese_vovnet39b', pretrained=pretrained, norm_layer=norm_layer, **kwargs) + + +@register_model +def ese_vovnet39b_evos(pretrained=False, **kwargs): + def norm_act_fn(num_features, **kwargs): + return create_norm_act('EvoNormSample', num_features, jit=False, **kwargs) + return _vovnet('ese_vovnet39b', pretrained=pretrained, norm_layer=norm_act_fn, **kwargs) diff --git a/validate.py b/validate.py index f8ac7c55..ca031263 100755 --- a/validate.py +++ b/validate.py @@ -24,7 +24,8 @@ try: except ImportError: has_apex = False -from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models +from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models,\ + set_scriptable, set_no_jit from timm.data import Dataset, DatasetTar, create_loader, resolve_data_config from timm.utils import accuracy, AverageMeter, natural_key, setup_default_logging @@ -84,6 +85,9 @@ def validate(args): args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher + if args.torchscript: + set_scriptable(True) + # create model model = create_model( args.model, @@ -141,8 +145,10 @@ def validate(args): top5 = AverageMeter() model.eval() - end = time.time() with torch.no_grad(): + # warmup, reduce variability of first batch time, especially for comparing torchscript vs non + model(torch.randn((args.batch_size,) + data_config['input_size']).cuda()) + end = time.time() for i, (input, target) in enumerate(loader): if args.no_prefetcher: target = target.cuda() From f28170df3f461ed3a8dfb89246c8bf1e6280feb1 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 1 Jun 2020 17:26:42 -0700 Subject: [PATCH 07/19] Fix an an untested change, remove a debug print --- timm/models/efficientnet_blocks.py | 2 +- timm/models/layers/norm_act.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/timm/models/efficientnet_blocks.py b/timm/models/efficientnet_blocks.py index 5f64dc37..236623ff 100644 --- a/timm/models/efficientnet_blocks.py +++ b/timm/models/efficientnet_blocks.py @@ -220,7 +220,7 @@ class InvertedResidual(nn.Module): has_se = se_ratio is not None and se_ratio > 0. self.has_residual = (in_chs == out_chs and stride == 1) and not noskip self.drop_path_rate = drop_path_rate - print(act_layer) + # Point-wise expansion self.conv_pw = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs) self.bn1 = norm_layer(mid_chs, **norm_kwargs) diff --git a/timm/models/layers/norm_act.py b/timm/models/layers/norm_act.py index 48c4d6da..ad1b1eca 100644 --- a/timm/models/layers/norm_act.py +++ b/timm/models/layers/norm_act.py @@ -60,7 +60,7 @@ class BatchNormAct2d(nn.BatchNorm2d): if torch.jit.is_scripting(): x = self._forward_jit(x) else: - self._forward_python(x) + x = self._forward_python(x) if self.act is not None: x = self.act(x) return x From 88129b2569dec4725a84c8a072c7613327ee25cb Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 2 Jun 2020 21:06:10 -0700 Subject: [PATCH 08/19] Add set_layer_config contextmgr to adjust all layer configs at once, use in create_module with new args. Remove a few old warning causing constant annotations for jit. --- timm/models/dpn.py | 2 +- timm/models/factory.py | 20 +++++++++---- timm/models/inception_resnet_v2.py | 1 - timm/models/layers/__init__.py | 3 +- timm/models/layers/cond_conv2d.py | 2 +- timm/models/layers/config.py | 47 ++++++++++++++++++++++++++++-- timm/models/layers/pool2d_same.py | 2 +- validate.py | 6 ++-- 8 files changed, 65 insertions(+), 18 deletions(-) diff --git a/timm/models/dpn.py b/timm/models/dpn.py index 1f45095d..fa4e39fb 100644 --- a/timm/models/dpn.py +++ b/timm/models/dpn.py @@ -10,7 +10,7 @@ from __future__ import division from __future__ import print_function from collections import OrderedDict -from typing import Union, Optional, List, Tuple +from typing import Tuple import torch import torch.nn as nn diff --git a/timm/models/factory.py b/timm/models/factory.py index fbcd004d..03d8cc1f 100644 --- a/timm/models/factory.py +++ b/timm/models/factory.py @@ -1,5 +1,6 @@ from .registry import is_model, is_model_in_modules, model_entrypoint from .helpers import load_checkpoint +from .layers import set_layer_config def create_model( @@ -8,6 +9,9 @@ def create_model( num_classes=1000, in_chans=3, checkpoint_path='', + scriptable=None, + exportable=None, + no_jit=None, **kwargs): """Create a model @@ -17,13 +21,16 @@ def create_model( num_classes (int): number of classes for final fully connected layer (default: 1000) in_chans (int): number of input channels / colors (default: 3) checkpoint_path (str): path of checkpoint to load after model is initialized + scriptable (bool): set layer config so that model is jit scriptable (not working for all models yet) + exportable (bool): set layer config so that model is traceable / ONNX exportable (not fully impl/obeyed yet) + no_jit (bool): set layer config so that model doesn't utilize jit scripted layers (so far activations only) Keyword Args: drop_rate (float): dropout rate for training (default: 0.0) global_pool (str): global pool type (default: 'avg') **: other kwargs are model specific """ - margs = dict(pretrained=pretrained, num_classes=num_classes, in_chans=in_chans) + model_args = dict(pretrained=pretrained, num_classes=num_classes, in_chans=in_chans) # Only EfficientNet and MobileNetV3 models have support for batchnorm params or drop_connect_rate passed as args is_efficientnet = is_model_in_modules(model_name, ['efficientnet', 'mobilenetv3']) @@ -47,11 +54,12 @@ def create_model( if kwargs.get('drop_path_rate', None) is None: kwargs.pop('drop_path_rate', None) - if is_model(model_name): - create_fn = model_entrypoint(model_name) - model = create_fn(**margs, **kwargs) - else: - raise RuntimeError('Unknown model (%s)' % model_name) + with set_layer_config(scriptable=scriptable, exportable=exportable, no_jit=no_jit): + if is_model(model_name): + create_fn = model_entrypoint(model_name) + model = create_fn(**model_args, **kwargs) + else: + raise RuntimeError('Unknown model (%s)' % model_name) if checkpoint_path: load_checkpoint(model, checkpoint_path) diff --git a/timm/models/inception_resnet_v2.py b/timm/models/inception_resnet_v2.py index 34b14570..f8772cc8 100644 --- a/timm/models/inception_resnet_v2.py +++ b/timm/models/inception_resnet_v2.py @@ -193,7 +193,6 @@ class Mixed_7a(nn.Module): class Block8(nn.Module): - __constants__ = ['relu'] # for pre 1.4 torchscript compat def __init__(self, scale=1.0, no_relu=False): super(Block8, self).__init__() diff --git a/timm/models/layers/__init__.py b/timm/models/layers/__init__.py index b9c26fea..1ebc4be0 100644 --- a/timm/models/layers/__init__.py +++ b/timm/models/layers/__init__.py @@ -4,7 +4,8 @@ from .adaptive_avgmax_pool import \ from .anti_aliasing import AntiAliasDownsampleLayer from .blur_pool import BlurPool2d from .cond_conv2d import CondConv2d, get_condconv_initializer -from .config import is_exportable, is_scriptable, set_exportable, set_scriptable, is_no_jit, set_no_jit +from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\ + set_layer_config from .conv2d_same import Conv2dSame from .conv_bn_act import ConvBnAct from .create_act import create_act_layer, get_act_layer, get_act_fn diff --git a/timm/models/layers/cond_conv2d.py b/timm/models/layers/cond_conv2d.py index b1759d99..df98f71a 100644 --- a/timm/models/layers/cond_conv2d.py +++ b/timm/models/layers/cond_conv2d.py @@ -38,7 +38,7 @@ class CondConv2d(nn.Module): Grouped convolution hackery for parallel execution of the per-sample kernel filters inspired by this discussion: https://github.com/pytorch/pytorch/issues/17983 """ - __constants__ = ['bias', 'in_channels', 'out_channels', 'dynamic_padding'] + __constants__ = ['in_channels', 'out_channels', 'dynamic_padding'] def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding='', dilation=1, groups=1, bias=False, num_experts=4): diff --git a/timm/models/layers/config.py b/timm/models/layers/config.py index 2c0faf23..f07b9d78 100644 --- a/timm/models/layers/config.py +++ b/timm/models/layers/config.py @@ -1,13 +1,18 @@ -""" Model / Layer Config Singleton +""" Model / Layer Config singleton state """ -from typing import Any +from typing import Any, Optional -__all__ = ['is_exportable', 'is_scriptable', 'set_exportable', 'set_scriptable', 'is_no_jit', 'set_no_jit'] +__all__ = [ + 'is_exportable', 'is_scriptable', 'is_no_jit', + 'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config' +] # Set to True if prefer to have layers with no jit optimization (includes activations) _NO_JIT = False # Set to True if prefer to have activation layers with no jit optimization +# NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying +# the jit flags so far are activations. This will change as more layers are updated and/or added. _NO_ACTIVATION_JIT = False # Set to True if exporting a model with Same padding via ONNX @@ -72,3 +77,39 @@ class set_scriptable: global _SCRIPTABLE _SCRIPTABLE = self.prev return False + + +class set_layer_config: + """ Layer config context manager that allows setting all layer config flags at once. + If a flag arg is None, it will not change the current value. + """ + def __init__( + self, + scriptable: Optional[bool] = None, + exportable: Optional[bool] = None, + no_jit: Optional[bool] = None, + no_activation_jit: Optional[bool] = None): + global _SCRIPTABLE + global _EXPORTABLE + global _NO_JIT + global _NO_ACTIVATION_JIT + self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT + if scriptable is not None: + _SCRIPTABLE = scriptable + if exportable is not None: + _EXPORTABLE = exportable + if no_jit is not None: + _NO_JIT = no_jit + if no_activation_jit is not None: + _NO_ACTIVATION_JIT = no_activation_jit + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _SCRIPTABLE + global _EXPORTABLE + global _NO_JIT + global _NO_ACTIVATION_JIT + _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev + return False diff --git a/timm/models/layers/pool2d_same.py b/timm/models/layers/pool2d_same.py index 7135f831..51242619 100644 --- a/timm/models/layers/pool2d_same.py +++ b/timm/models/layers/pool2d_same.py @@ -5,7 +5,7 @@ Hacked together by Ross Wightman import torch import torch.nn as nn import torch.nn.functional as F -from typing import Union, List, Tuple, Optional +from typing import List, Tuple, Optional from .helpers import tup_pair from .padding import pad_same, get_padding_value diff --git a/validate.py b/validate.py index ca031263..50010cce 100755 --- a/validate.py +++ b/validate.py @@ -85,15 +85,13 @@ def validate(args): args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher - if args.torchscript: - set_scriptable(True) - # create model model = create_model( args.model, + pretrained=args.pretrained, num_classes=args.num_classes, in_chans=3, - pretrained=args.pretrained) + scriptable=args.torchscript) if args.checkpoint: load_checkpoint(model, args.checkpoint, args.use_ema) From 7be299504fd0d49619abf027ee48d4e33af0a51c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 3 Jun 2020 00:00:37 -0700 Subject: [PATCH 09/19] Add missing feature_info() on MobileNetV3, make hook feature output order/type consistent with bottleneck (list, decreasing fmap size) --- timm/models/efficientnet.py | 5 ++++- timm/models/feature_hooks.py | 7 +++++-- timm/models/mobilenetv3.py | 14 +++++++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py index fbd7f420..47cd0b9d 100644 --- a/timm/models/efficientnet.py +++ b/timm/models/efficientnet.py @@ -24,9 +24,12 @@ An implementation of EfficienNet that covers variety of related models with effi Hacked together by Ross Wightman """ +import torch import torch.nn as nn import torch.nn.functional as F +from typing import List + from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from .efficientnet_blocks import round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT from .efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights @@ -471,7 +474,7 @@ class EfficientNetFeatures(nn.Module): return self._feature_info[idx] return [self._feature_info[i] for i in self.out_indices] - def forward(self, x): + def forward(self, x) -> List[torch.Tensor]: x = self.conv_stem(x) x = self.bn1(x) x = self.act1(x) diff --git a/timm/models/feature_hooks.py b/timm/models/feature_hooks.py index 8ffcda86..7b7b3da1 100644 --- a/timm/models/feature_hooks.py +++ b/timm/models/feature_hooks.py @@ -1,5 +1,8 @@ +import torch + from collections import defaultdict, OrderedDict from functools import partial +from typing import List class FeatureHooks: @@ -25,7 +28,7 @@ class FeatureHooks: x = x[0] # unwrap input tuple self._feature_outputs[x.device][name] = x - def get_output(self, device): - output = tuple(self._feature_outputs[device].values())[::-1] + def get_output(self, device) -> List[torch.tensor]: + output = list(self._feature_outputs[device].values()) self._feature_outputs[device] = OrderedDict() # clear after reading return output diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py index e1a700b0..9c4a9af5 100644 --- a/timm/models/mobilenetv3.py +++ b/timm/models/mobilenetv3.py @@ -7,9 +7,12 @@ Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244 Hacked together by Ross Wightman """ +import torch import torch.nn as nn import torch.nn.functional as F +from typing import List + from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from .efficientnet_blocks import round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT from .efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights @@ -206,7 +209,16 @@ class MobileNetV3Features(nn.Module): return self._feature_info[idx]['num_chs'] return [self._feature_info[i]['num_chs'] for i in self.out_indices] - def forward(self, x): + def feature_info(self, idx=None): + """ Feature Channel Shortcut + Returns feature channel count for each output index if idx == None. If idx is an integer, will + return feature channel count for that feature block index (independent of out_indices setting). + """ + if isinstance(idx, int): + return self._feature_info[idx] + return [self._feature_info[i] for i in self.out_indices] + + def forward(self, x) -> List[torch.Tensor]: x = self.conv_stem(x) x = self.bn1(x) x = self.act1(x) From e78daf586a427895516acf9997be60b74f9592fe Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 3 Jun 2020 13:30:03 -0700 Subject: [PATCH 10/19] better densenet121 and densenetblur121d weights --- timm/models/densenet.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/timm/models/densenet.py b/timm/models/densenet.py index b4e31807..d8edacd7 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -30,13 +30,16 @@ def _cfg(url=''): default_cfgs = { - 'densenet121': _cfg(url='https://download.pytorch.org/models/densenet121-a639ec97.pth'), + 'densenet121': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/densenet121_ra-50efcf5c.pth'), 'densenet121d': _cfg(url=''), - 'densenet121tn': _cfg(url=''), + 'densenetblur121d': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/densenetblur121d_ra-100dcfbc.pth'), 'densenet169': _cfg(url='https://download.pytorch.org/models/densenet169-b2777c0a.pth'), 'densenet201': _cfg(url='https://download.pytorch.org/models/densenet201-c1103571.pth'), 'densenet161': _cfg(url='https://download.pytorch.org/models/densenet161-8d451a50.pth'), 'densenet264': _cfg(url=''), + 'tv_densenet121': _cfg(url='https://download.pytorch.org/models/densenet121-a639ec97.pth'), } @@ -160,7 +163,8 @@ class DenseNet(nn.Module): def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem_type='', num_classes=1000, in_chans=3, global_pool='avg', - norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False): + norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False, + aa_stem_only=True): self.num_classes = num_classes self.drop_rate = drop_rate super(DenseNet, self).__init__() @@ -209,10 +213,11 @@ class DenseNet(nn.Module): ) self.features.add_module('denseblock%d' % (i + 1), block) num_features = num_features + num_layers * growth_rate + transition_aa_layer = None if aa_stem_only else aa_layer if i != len(block_config) - 1: trans = DenseTransition( num_input_features=num_features, num_output_features=num_features // 2, - norm_layer=norm_layer) + norm_layer=norm_layer, aa_layer=transition_aa_layer) self.features.add_module('transition%d' % (i + 1), trans) num_features = num_features // 2 @@ -310,7 +315,7 @@ def densenetblur121d(pretrained=False, **kwargs): `"Densely Connected Convolutional Networks" ` """ model = _densenet( - 'densenet121', growth_rate=32, block_config=(6, 12, 24, 16), pretrained=pretrained, stem_type='deep', + 'densenetblur121d', growth_rate=32, block_config=(6, 12, 24, 16), pretrained=pretrained, stem_type='deep', aa_layer=BlurPool2d, **kwargs) return model @@ -326,17 +331,6 @@ def densenet121d(pretrained=False, **kwargs): return model -@register_model -def densenet121tn(pretrained=False, **kwargs): - r"""Densenet-121 model from - `"Densely Connected Convolutional Networks" ` - """ - model = _densenet( - 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep_tiered_narrow', - pretrained=pretrained, **kwargs) - return model - - @register_model def densenet121d_evob(pretrained=False, **kwargs): r"""Densenet-121 model from @@ -414,3 +408,13 @@ def densenet264(pretrained=False, **kwargs): model = _densenet( 'densenet264', growth_rate=48, block_config=(6, 12, 64, 48), pretrained=pretrained, **kwargs) return model + + +@register_model +def tv_densenet121(pretrained=False, **kwargs): + r"""Densenet-121 model with original Torchvision weights, from + `"Densely Connected Convolutional Networks" ` + """ + model = _densenet( + 'tv_densenet121', growth_rate=32, block_config=(6, 12, 24, 16), pretrained=pretrained, **kwargs) + return model From a7e8cadd1523b1e05f76e188dc97ed51983b529d Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 3 Jun 2020 17:13:52 -0700 Subject: [PATCH 11/19] Remove pointless densenet configs, add an iabn version of 264 as it makes more sense to try someday... --- timm/models/densenet.py | 52 +++++++++++------------------------------ 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/timm/models/densenet.py b/timm/models/densenet.py index d8edacd7..59a15a85 100644 --- a/timm/models/densenet.py +++ b/timm/models/densenet.py @@ -39,6 +39,7 @@ default_cfgs = { 'densenet201': _cfg(url='https://download.pytorch.org/models/densenet201-c1103571.pth'), 'densenet161': _cfg(url='https://download.pytorch.org/models/densenet161-8d451a50.pth'), 'densenet264': _cfg(url=''), + 'densenet264d_iabn': _cfg(url=''), 'tv_densenet121': _cfg(url='https://download.pytorch.org/models/densenet121-a639ec97.pth'), } @@ -331,45 +332,6 @@ def densenet121d(pretrained=False, **kwargs): return model -@register_model -def densenet121d_evob(pretrained=False, **kwargs): - r"""Densenet-121 model from - `"Densely Connected Convolutional Networks" ` - """ - def norm_act_fn(num_features, **kwargs): - return create_norm_act('EvoNormBatch', num_features, jit=True, **kwargs) - model = _densenet( - 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_layer=norm_act_fn, pretrained=pretrained, **kwargs) - return model - - -@register_model -def densenet121d_evos(pretrained=False, **kwargs): - r"""Densenet-121 model from - `"Densely Connected Convolutional Networks" ` - """ - def norm_act_fn(num_features, **kwargs): - return create_norm_act('EvoNormSample', num_features, jit=True, **kwargs) - model = _densenet( - 'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_layer=norm_act_fn, pretrained=pretrained, **kwargs) - return model - - -@register_model -def densenet121d_iabn(pretrained=False, **kwargs): - r"""Densenet-121 model from - `"Densely Connected Convolutional Networks" ` - """ - def norm_act_fn(num_features, **kwargs): - return create_norm_act('iabn', num_features, **kwargs) - model = _densenet( - 'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep', - norm_layer=norm_act_fn, pretrained=pretrained, **kwargs) - return model - - @register_model def densenet169(pretrained=False, **kwargs): r"""Densenet-169 model from @@ -410,6 +372,18 @@ def densenet264(pretrained=False, **kwargs): return model +@register_model +def densenet264d_iabn(pretrained=False, **kwargs): + r"""Densenet-264 model with deep stem and Inplace-ABN + """ + def norm_act_fn(num_features, **kwargs): + return create_norm_act('iabn', num_features, **kwargs) + model = _densenet( + 'densenet264d_iabn', growth_rate=48, block_config=(6, 12, 64, 48), stem_type='deep', + norm_layer=norm_act_fn, pretrained=pretrained, **kwargs) + return model + + @register_model def tv_densenet121(pretrained=False, **kwargs): r"""Densenet-121 model with original Torchvision weights, from From 4ddde1d3a45ef7848a5f6fd4d87676ba1141246c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 5 Jun 2020 11:04:51 -0700 Subject: [PATCH 12/19] Fix two regressions --- timm/models/layers/se.py | 2 +- timm/models/res2net.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/timm/models/layers/se.py b/timm/models/layers/se.py index 83389fc5..4e47cc96 100644 --- a/timm/models/layers/se.py +++ b/timm/models/layers/se.py @@ -5,7 +5,7 @@ from .create_act import get_act_fn class SEModule(nn.Module): def __init__(self, channels, reduction=16, act_layer=nn.ReLU, min_channels=8, reduction_channels=None, - gate_fn='hard_sigmoid'): + gate_fn='sigmoid'): super(SEModule, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) reduction_channels = reduction_channels or max(channels // reduction, min_channels) diff --git a/timm/models/res2net.py b/timm/models/res2net.py index b095de30..c3773dd5 100644 --- a/timm/models/res2net.py +++ b/timm/models/res2net.py @@ -99,9 +99,9 @@ class Bottle2neck(nn.Module): spx = torch.split(out, self.width, 1) spo = [] - sp = spx[0] + sp = spx[0] # redundant, for torchscript for i, (conv, bn) in enumerate(zip(self.convs, self.bns)): - if self.is_first: + if i == 0 or self.is_first: sp = spx[i] else: sp = sp + spx[i] From 13c0a6290e29cb6c1b1ca7a8928f31a2c00faedd Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 6 Jun 2020 22:49:19 -0700 Subject: [PATCH 13/19] Add ESE-VovNet39 weights, 79.32 top-1 --- timm/models/vovnet.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/timm/models/vovnet.py b/timm/models/vovnet.py index bedff10c..70dbac12 100644 --- a/timm/models/vovnet.py +++ b/timm/models/vovnet.py @@ -149,7 +149,8 @@ default_cfgs = dict( ese_vovnet19b_slim_dw=_cfg(url=''), ese_vovnet19b_dw=_cfg(url=''), ese_vovnet19b_slim=_cfg(url=''), - ese_vovnet39b=_cfg(url=''), + ese_vovnet39b=_cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ese_vovnet39b-f912fe73.pth'), ese_vovnet57b=_cfg(url=''), ese_vovnet99b=_cfg(url=''), eca_vovnet39b=_cfg(url=''), @@ -313,6 +314,8 @@ class VovNet(nn.Module): elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1.) nn.init.constant_(m.bias, 0.) + elif isinstance(m, nn.Linear): + nn.init.zeros_(m.bias) def get_classifier(self): return self.head.fc From b41b8d0108221ca4c8a1f04602dfa49da49f7b0d Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 9 Jun 2020 14:36:24 -0700 Subject: [PATCH 14/19] Update results csv files --- results/results-imagenet-a.csv | 53 ++++++++++++++--- results/results-imagenet.csv | 57 ++++++++++++++---- .../results-imagenetv2-matched-frequency.csv | 35 +++++++---- results/results-sketch.csv | 59 +++++++++++++++---- 4 files changed, 160 insertions(+), 44 deletions(-) diff --git a/results/results-imagenet-a.csv b/results/results-imagenet-a.csv index 9f54457c..14045ac8 100644 --- a/results/results-imagenet-a.csv +++ b/results/results-imagenet-a.csv @@ -14,7 +14,9 @@ tf_efficientnet_b8,29.3733,70.6267,57.0667,42.9333,87.41,672,0.954,bicubic ig_resnext101_32x8d,28.7067,71.2933,52.32,47.68,88.79,224,0.875,bilinear swsl_resnext101_32x16d,27.9467,72.0533,52.32,47.68,194.03,224,0.875,bilinear tf_efficientnet_b7_ap,27.8133,72.1867,54.7733,45.2267,66.35,600,0.949,bicubic +resnest269e,27.6133,72.3867,53.1067,46.8933,110.93,416,0.875,bilinear tresnet_xl_448,26.88,73.12,51.0933,48.9067,78.44,448,0.875,bilinear +resnest200e,26.4267,73.5733,51.9333,48.0667,70.2,320,0.875,bilinear swsl_resnext101_32x4d,25.3467,74.6533,49.6267,50.3733,44.18,224,0.875,bilinear tf_efficientnet_b7,25.2533,74.7467,51.6667,48.3333,66.35,600,0.949,bicubic tresnet_l_448,24.5733,75.4267,48.6,51.4,55.99,448,0.875,bilinear @@ -26,6 +28,7 @@ tf_efficientnet_b3_ns,19.4133,80.5867,44.6267,55.3733,12.23,300,0.904,bicubic swsl_resnext50_32x4d,18.0667,81.9333,41.8667,58.1333,25.03,224,0.875,bilinear ssl_resnext101_32x16d,17.2133,82.7867,39.9467,60.0533,194.03,224,0.875,bilinear tf_efficientnet_b5,17.0667,82.9333,41.9067,58.0933,30.39,456,0.934,bicubic +resnest101e,16.4933,83.5067,40.7467,59.2533,48.28,256,0.875,bilinear swsl_resnet50,15.9867,84.0133,38.8533,61.1467,25.56,224,0.875,bilinear ssl_resnext101_32x8d,15.12,84.88,37.72,62.28,88.79,224,0.875,bilinear tf_efficientnet_b4_ap,13.68,86.32,35.92,64.08,19.34,380,0.922,bicubic @@ -36,14 +39,16 @@ nasnetalarge,12.5733,87.4267,33.4133,66.5867,88.75,331,0.875,bicubic ssl_resnext101_32x4d,12.12,87.88,31.8933,68.1067,44.18,224,0.875,bilinear tf_efficientnet_b2_ns,11.7867,88.2133,32.96,67.04,9.11,260,0.89,bicubic gluon_senet154,9.9067,90.0933,26.4533,73.5467,115.09,224,0.875,bicubic +resnest50d_4s2x40d,9.7867,90.2133,29.1467,70.8533,30.42,224,0.875,bicubic ssl_resnext50_32x4d,9.6667,90.3333,28.4267,71.5733,25.03,224,0.875,bilinear senet154,9.4533,90.5467,26.44,73.56,115.09,224,0.875,bilinear tresnet_xl,9.3067,90.6933,28.4133,71.5867,78.44,224,0.875,bilinear -efficientnet_b3a,9.2667,90.7333,28.4267,71.5733,12.23,320,1,bicubic +efficientnet_b3a,9.2667,90.7333,28.4267,71.5733,12.23,320,1.0,bicubic efficientnet_b3,8.9467,91.0533,28.2133,71.7867,12.23,300,0.904,bicubic inception_v4,8.92,91.08,24.7067,75.2933,42.68,299,0.875,bicubic gluon_seresnext101_64x4d,8.8667,91.1333,27.32,72.68,88.23,224,0.875,bicubic tf_efficientnet_b1_ns,8.6133,91.3867,27.28,72.72,7.79,240,0.882,bicubic +resnest50d_1s4x24d,8.52,91.48,26.7867,73.2133,25.68,224,0.875,bicubic ecaresnet50d,8.5067,91.4933,26.2667,73.7333,25.58,224,0.875,bicubic gluon_xception65,8.4667,91.5333,25.1333,74.8667,39.92,299,0.875,bicubic gluon_resnet152_v1d,8.4133,91.5867,23.4533,76.5467,60.21,224,0.875,bicubic @@ -55,23 +60,30 @@ ens_adv_inception_resnet_v2,7.9867,92.0133,23.8267,76.1733,55.84,299,0.8975,bicu tf_efficientnet_lite4,7.9333,92.0667,25.56,74.44,13.01,380,0.92,bilinear tresnet_l,7.88,92.12,25.1867,74.8133,55.99,224,0.875,bilinear gluon_resnet152_v1s,7.8667,92.1333,23.1733,76.8267,60.32,224,0.875,bicubic +resnest50d,7.7467,92.2533,25.2933,74.7067,27.48,224,0.875,bilinear gluon_resnext101_64x4d,7.7067,92.2933,23.24,76.76,83.46,224,0.875,bicubic skresnext50_32x4d,7.08,92.92,23.0267,76.9733,27.48,224,0.875,bicubic -ssl_resnet50,7,93,23.92,76.08,25.56,224,0.875,bilinear +ssl_resnet50,7.0,93.0,23.92,76.08,25.56,224,0.875,bilinear +regnety_320,6.92,93.08,23.04,76.96,145.05,224,0.875,bicubic ecaresnet101d_pruned,6.8,93.2,24.2,75.8,24.88,224,0.875,bicubic ecaresnetlight,6.76,93.24,22.56,77.44,30.16,224,0.875,bicubic -efficientnet_b2a,6.76,93.24,23.4933,76.5067,9.11,288,1,bicubic +efficientnet_b2a,6.76,93.24,23.4933,76.5067,9.11,288,1.0,bicubic seresnext101_32x4d,6.4133,93.5867,21.52,78.48,48.96,224,0.875,bilinear efficientnet_b2,6.0933,93.9067,21.9333,78.0667,9.11,260,0.875,bicubic gluon_resnext101_32x4d,6.04,93.96,21.1333,78.8667,44.18,224,0.875,bicubic +regnetx_320,5.9867,94.0133,19.88,80.12,107.81,224,0.875,bicubic +ese_vovnet39b,5.9733,94.0267,21.2933,78.7067,24.57,224,0.875,bicubic gluon_resnet101_v1d,5.92,94.08,19.9467,80.0533,44.57,224,0.875,bicubic gluon_seresnext50_32x4d,5.7867,94.2133,21.4267,78.5733,27.56,224,0.875,bicubic efficientnet_b3_pruned,5.7333,94.2667,21.36,78.64,9.86,300,0.904,bicubic +regnety_160,5.64,94.36,19.3467,80.6533,83.59,224,0.875,bicubic gluon_inception_v3,5.5067,94.4933,19.9467,80.0533,23.83,299,0.875,bicubic mixnet_xl,5.48,94.52,21.0933,78.9067,11.9,224,0.875,bicubic tresnet_m,5.44,94.56,19.96,80.04,31.39,224,0.875,bilinear +regnety_120,5.4133,94.5867,19.8533,80.1467,51.82,224,0.875,bicubic gluon_resnet101_v1s,5.28,94.72,19.5467,80.4533,44.67,224,0.875,bicubic hrnet_w64,5.1333,94.8667,19.4533,80.5467,128.06,224,0.875,bilinear +regnety_080,5.0,95.0,18.6,81.4,39.18,224,0.875,bicubic efficientnet_b2_pruned,4.9467,95.0533,19.3467,80.6533,8.31,260,0.89,bicubic dpn107,4.88,95.12,17.6133,82.3867,86.92,224,0.875,bicubic gluon_resnet152_v1c,4.8667,95.1333,17.7733,82.2267,60.21,224,0.875,bicubic @@ -84,38 +96,45 @@ gluon_resnet152_v1b,4.5867,95.4133,16.5333,83.4667,60.19,224,0.875,bicubic ecaresnet50d_pruned,4.5467,95.4533,18.5467,81.4533,19.94,224,0.875,bicubic dpn92,4.4933,95.5067,18.2,81.8,37.67,224,0.875,bicubic hrnet_w44,4.4933,95.5067,17.3467,82.6533,67.06,224,0.875,bilinear +regnetx_160,4.3733,95.6267,17.0933,82.9067,54.28,224,0.875,bicubic resnext50d_32x4d,4.3467,95.6533,17.7733,82.2267,25.05,224,0.875,bicubic xception,4.3467,95.6533,16.76,83.24,22.86,299,0.8975,bicubic seresnext50_32x4d,4.28,95.72,17.8133,82.1867,27.56,224,0.875,bilinear resnext50_32x4d,4.2533,95.7467,18.3867,81.6133,25.03,224,0.875,bicubic tf_efficientnet_cc_b1_8e,4.24,95.76,15.9467,84.0533,39.72,240,0.882,bicubic +regnety_064,4.2267,95.7733,17.1867,82.8133,30.58,224,0.875,bicubic tf_efficientnet_el,4.2267,95.7733,18.1733,81.8267,10.59,300,0.904,bicubic -inception_v3,4.2,95.8,16.2933,83.7067,27.16,299,0.875,bicubic +inception_v3,4.1867,95.8133,16.2933,83.7067,23.83,299,0.875,bicubic tf_efficientnet_b2_ap,4.1733,95.8267,18.32,81.68,9.11,260,0.89,bicubic seresnet152,4.1467,95.8533,15.8933,84.1067,66.82,224,0.875,bilinear resnext101_32x8d,4.1333,95.8667,16.9867,83.0133,88.79,224,0.875,bilinear tf_efficientnet_b0_ns,4.1333,95.8667,17.68,82.32,5.29,224,0.875,bicubic dpn98,4.08,95.92,15.9467,84.0533,61.57,224,0.875,bicubic -res2net101_26w_4s,4,96,14.8267,85.1733,45.21,224,0.875,bilinear +res2net101_26w_4s,4.0,96.0,14.8267,85.1733,45.21,224,0.875,bilinear efficientnet_b1,3.9733,96.0267,15.76,84.24,7.79,240,0.875,bicubic tf_efficientnet_lite3,3.9333,96.0667,16.52,83.48,8.2,300,0.904,bilinear tf_efficientnet_b2,3.7733,96.2267,16.6133,83.3867,9.11,260,0.89,bicubic +regnety_040,3.7467,96.2533,16.4,83.6,20.65,224,0.875,bicubic hrnet_w30,3.68,96.32,15.5733,84.4267,37.71,224,0.875,bilinear hrnet_w32,3.6533,96.3467,14.7867,85.2133,41.23,224,0.875,bilinear hrnet_w40,3.6533,96.3467,15.44,84.56,57.56,224,0.875,bilinear +regnetx_120,3.6267,96.3733,15.9733,84.0267,46.11,224,0.875,bicubic seresnext26t_32x4d,3.6133,96.3867,15.8933,84.1067,16.82,224,0.875,bicubic tf_efficientnet_b1_ap,3.5467,96.4533,15.0667,84.9333,7.79,240,0.882,bicubic seresnext26tn_32x4d,3.5067,96.4933,15.76,84.24,16.81,224,0.875,bicubic +resnest26d,3.4933,96.5067,15.6667,84.3333,17.07,224,0.875,bilinear dla169,3.4667,96.5333,15.3333,84.6667,53.99,224,0.875,bilinear gluon_resnext50_32x4d,3.4533,96.5467,16.12,83.88,25.03,224,0.875,bicubic mixnet_l,3.44,96.56,15.3067,84.6933,7.33,224,0.875,bicubic seresnext26d_32x4d,3.4,96.6,16.16,83.84,16.81,224,0.875,bicubic -resnetblur50,3.3333,96.6667,15.5867,84.4133,25.56,224,0.875,bicubic res2net50_26w_8s,3.3333,96.6667,14.04,85.96,48.4,224,0.875,bilinear +resnetblur50,3.3333,96.6667,15.5867,84.4133,25.56,224,0.875,bicubic dla102x,3.3067,96.6933,15.12,84.88,26.77,224,0.875,bilinear gluon_resnet101_v1c,3.3067,96.6933,14.12,85.88,44.57,224,0.875,bicubic seresnet101,3.2533,96.7467,15.4533,84.5467,49.33,224,0.875,bilinear +densenetblur121d,3.0667,96.9333,14.28,85.72,8.0,224,0.875,bicubic dla60_res2next,3.04,96.96,14.4533,85.5467,17.33,224,0.875,bilinear +regnety_032,3.0267,96.9733,14.24,85.76,19.44,224,0.875,bicubic gluon_resnet50_v1d,3.0133,96.9867,14.6267,85.3733,25.58,224,0.875,bicubic wide_resnet101_2,2.96,97.04,13.9467,86.0533,126.89,224,0.875,bilinear efficientnet_b1_pruned,2.9333,97.0667,14.4133,85.5867,6.33,240,0.882,bicubic @@ -124,6 +143,7 @@ tf_efficientnet_b1,2.8667,97.1333,13.5067,86.4933,7.79,240,0.882,bicubic res2net50_26w_6s,2.84,97.16,12.6,87.4,37.05,224,0.875,bilinear efficientnet_b0,2.8133,97.1867,13.9067,86.0933,5.29,224,0.875,bicubic tf_mixnet_l,2.8133,97.1867,13.04,86.96,7.33,224,0.875,bicubic +regnetx_064,2.7867,97.2133,13.88,86.12,26.21,224,0.875,bicubic dpn68b,2.7067,97.2933,12.64,87.36,12.61,224,0.875,bicubic selecsls60b,2.6933,97.3067,13.1733,86.8267,32.77,224,0.875,bicubic tf_efficientnet_cc_b0_8e,2.68,97.32,12.7733,87.2267,24.01,224,0.875,bicubic @@ -134,24 +154,30 @@ mixnet_m,2.5467,97.4533,12.4267,87.5733,5.01,224,0.875,bicubic skresnet34,2.52,97.48,12.7733,87.2267,22.28,224,0.875,bicubic efficientnet_es,2.3733,97.6267,13.88,86.12,5.44,224,0.875,bicubic resnet152,2.36,97.64,12.2,87.8,60.19,224,0.875,bilinear +regnetx_080,2.3467,97.6533,12.6933,87.3067,39.57,224,0.875,bicubic swsl_resnet18,2.3333,97.6667,11.2133,88.7867,11.69,224,0.875,bilinear wide_resnet50_2,2.32,97.68,11.8,88.2,68.88,224,0.875,bilinear seresnext26_32x4d,2.2933,97.7067,12.44,87.56,16.79,224,0.875,bicubic hrnet_w18,2.2667,97.7333,11.8533,88.1467,21.3,224,0.875,bilinear dla102,2.2533,97.7467,12.12,87.88,33.73,224,0.875,bilinear resnet50,2.2267,97.7733,11.3333,88.6667,25.56,224,0.875,bicubic +regnety_016,2.1733,97.8267,11.44,88.56,11.2,224,0.875,bicubic +regnetx_040,2.16,97.84,11.8,88.2,22.12,224,0.875,bicubic +resnest14d,2.1467,97.8533,10.4,89.6,10.61,224,0.875,bilinear selecsls60,2.08,97.92,12.84,87.16,30.67,224,0.875,bicubic tf_efficientnet_cc_b0_4e,2.08,97.92,10.9733,89.0267,13.31,224,0.875,bicubic res2next50,2.0667,97.9333,11.4533,88.5467,24.67,224,0.875,bilinear seresnet50,2.0667,97.9333,12.2667,87.7333,28.09,224,0.875,bilinear densenet161,1.9733,98.0267,10.5867,89.4133,28.68,224,0.875,bicubic tf_efficientnet_b0_ap,1.96,98.04,10.8,89.2,5.29,224,0.875,bicubic +regnetx_032,1.92,98.08,10.9467,89.0533,15.3,224,0.875,bicubic tf_efficientnet_em,1.8133,98.1867,11.6267,88.3733,6.9,240,0.882,bicubic tf_mixnet_m,1.8133,98.1867,10.5467,89.4533,5.01,224,0.875,bicubic tf_efficientnet_lite2,1.8,98.2,11.1467,88.8533,6.09,260,0.89,bicubic res2net50_14w_8s,1.7867,98.2133,10.3467,89.6533,25.06,224,0.875,bilinear res2net50_26w_4s,1.7733,98.2267,10.44,89.56,25.7,224,0.875,bilinear mobilenetv3_large_100,1.76,98.24,10.2933,89.7067,5.48,224,0.875,bicubic +densenet121,1.7333,98.2667,10.8533,89.1467,7.98,224,0.875,bicubic tf_efficientnet_b0,1.6933,98.3067,9.7333,90.2667,5.29,224,0.875,bicubic tv_resnext50_32x4d,1.68,98.32,10.6,89.4,25.03,224,0.875,bilinear mobilenetv3_rw,1.6667,98.3333,10.7333,89.2667,5.48,224,0.875,bicubic @@ -163,6 +189,7 @@ gluon_resnet50_v1c,1.5467,98.4533,10.6133,89.3867,25.58,224,0.875,bicubic semnasnet_100,1.5467,98.4533,9.32,90.68,3.89,224,0.875,bicubic selecsls42b,1.4667,98.5333,10.44,89.56,32.46,224,0.875,bicubic tf_efficientnet_lite1,1.4533,98.5467,9.7067,90.2933,5.42,240,0.882,bicubic +regnety_008,1.4267,98.5733,8.9467,91.0533,6.26,224,0.875,bicubic ssl_resnet18,1.3867,98.6133,8.16,91.84,11.69,224,0.875,bilinear dla60,1.3467,98.6533,9.4667,90.5333,22.33,224,0.875,bilinear dpn68,1.3467,98.6533,8.8133,91.1867,12.61,224,0.875,bicubic @@ -178,19 +205,26 @@ seresnet34,1.12,98.88,7.4,92.6,21.96,224,0.875,bilinear tf_efficientnet_es,1.12,98.88,8.6,91.4,5.44,224,0.875,bicubic spnasnet_100,1.1067,98.8933,8.2533,91.7467,4.42,224,0.875,bilinear tf_efficientnet_lite0,1.1067,98.8933,7.4933,92.5067,4.65,224,0.875,bicubic +regnetx_016,1.0933,98.9067,8.6267,91.3733,9.19,224,0.875,bicubic dla34,1.08,98.92,7.6933,92.3067,15.78,224,0.875,bilinear +regnety_006,1.0533,98.9467,8.4,91.6,6.06,224,0.875,bicubic +regnety_004,1.0133,98.9867,7.3333,92.6667,4.34,224,0.875,bicubic resnet34,0.9867,99.0133,7.5333,92.4667,21.8,224,0.875,bilinear mobilenetv2_110d,0.9333,99.0667,8.1067,91.8933,4.52,224,0.875,bicubic gluon_resnet34_v1b,0.8933,99.1067,6.6,93.4,21.8,224,0.875,bicubic hrnet_w18_small_v2,0.8933,99.1067,7.3867,92.6133,15.6,224,0.875,bilinear +regnetx_008,0.8933,99.1067,6.9067,93.0933,7.26,224,0.875,bicubic skresnet18,0.88,99.12,7.3867,92.6133,11.96,224,0.875,bicubic mnasnet_100,0.8667,99.1333,7.8667,92.1333,4.38,224,0.875,bicubic tf_mobilenetv3_large_075,0.8667,99.1333,6.72,93.28,3.99,224,0.875,bilinear +regnetx_006,0.76,99.24,6.4933,93.5067,6.2,224,0.875,bicubic tf_mobilenetv3_small_100,0.7467,99.2533,4.6667,95.3333,2.54,224,0.875,bilinear seresnet18,0.72,99.28,6.0267,93.9733,11.78,224,0.875,bicubic -densenet121,0.68,99.32,6.9067,93.0933,7.98,224,0.875,bicubic +regnetx_004,0.6933,99.3067,5.5067,94.4933,5.16,224,0.875,bicubic +tv_densenet121,0.68,99.32,6.9067,93.0933,7.98,224,0.875,bicubic +regnety_002,0.6667,99.3333,5.5333,94.4667,3.16,224,0.875,bicubic tf_mobilenetv3_small_075,0.6267,99.3733,4.1733,95.8267,2.04,224,0.875,bilinear -resnet26,0.6,99.4,6.88,93.12,16,224,0.875,bicubic +resnet26,0.6,99.4,6.88,93.12,16.0,224,0.875,bicubic tv_resnet34,0.6,99.4,5.52,94.48,21.8,224,0.875,bilinear mobilenetv2_100,0.5333,99.4667,6.1867,93.8133,3.5,224,0.875,bicubic dla46_c,0.52,99.48,4.1867,95.8133,1.31,224,0.875,bilinear @@ -201,4 +235,5 @@ dla46x_c,0.4133,99.5867,4.44,95.56,1.08,224,0.875,bilinear gluon_resnet18_v1b,0.3867,99.6133,4.7867,95.2133,11.69,224,0.875,bicubic tf_mobilenetv3_small_minimal_100,0.36,99.64,2.8667,97.1333,2.04,224,0.875,bilinear resnet18,0.2933,99.7067,4.04,95.96,11.69,224,0.875,bilinear -tv_resnet50,0,100,2.8933,97.1067,25.56,224,0.875,bilinear +regnetx_002,0.2267,99.7733,3.9867,96.0133,2.68,224,0.875,bicubic +tv_resnet50,0.0,100.0,2.8933,97.1067,25.56,224,0.875,bilinear diff --git a/results/results-imagenet.csv b/results/results-imagenet.csv index 75310e45..d7daaf8a 100644 --- a/results/results-imagenet.csv +++ b/results/results-imagenet.csv @@ -14,15 +14,18 @@ tf_efficientnet_b7,84.932,15.068,97.208,2.792,66.35,600,0.949,bicubic tf_efficientnet_b6_ap,84.786,15.214,97.138,2.862,43.04,528,0.942,bicubic swsl_resnext101_32x8d,84.294,15.706,97.174,2.826,88.79,224,0.875,bilinear tf_efficientnet_b5_ap,84.254,15.746,96.976,3.024,30.39,456,0.934,bicubic +resnest269e,84.186,15.814,96.922,3.078,110.93,416,0.875,bilinear ig_resnext101_32x16d,84.176,15.824,97.196,2.804,194.03,224,0.875,bilinear tf_efficientnet_b6,84.112,15.888,96.884,3.116,43.04,528,0.942,bicubic tf_efficientnet_b3_ns,84.054,15.946,96.912,3.088,12.23,300,0.904,bicubic +resnest200e,83.834,16.166,96.838,3.162,70.2,320,0.875,bilinear tf_efficientnet_b5,83.816,16.184,96.75,3.25,30.39,456,0.934,bicubic swsl_resnext101_32x16d,83.338,16.662,96.852,3.148,194.03,224,0.875,bilinear tf_efficientnet_b4_ap,83.248,16.752,96.388,3.612,19.34,380,0.922,bicubic swsl_resnext101_32x4d,83.234,16.766,96.756,3.244,44.18,224,0.875,bilinear tresnet_xl_448,83.048,16.952,96.174,3.826,78.44,448,0.875,bilinear tf_efficientnet_b4,83.016,16.984,96.298,3.702,19.34,380,0.922,bicubic +resnest101e,82.89,17.11,96.324,3.676,48.28,256,0.875,bilinear pnasnet5large,82.74,17.26,96.04,3.96,86.06,331,0.875,bicubic ig_resnext101_32x8d,82.688,17.312,96.632,3.368,88.79,224,0.875,bilinear nasnetalarge,82.558,17.442,96.036,3.964,88.75,331,0.875,bicubic @@ -31,7 +34,7 @@ tresnet_l_448,82.268,17.732,95.978,4.022,55.99,448,0.875,bilinear swsl_resnext50_32x4d,82.18,17.82,96.228,3.772,25.03,224,0.875,bilinear ecaresnet101d,82.166,17.834,96.052,3.948,44.57,224,0.875,bicubic tresnet_xl,82.07,17.93,95.928,4.072,78.44,224,0.875,bilinear -efficientnet_b3a,81.874,18.126,95.84,4.16,12.23,320,1,bicubic +efficientnet_b3a,81.874,18.126,95.84,4.16,12.23,320,1.0,bicubic ssl_resnext101_32x16d,81.836,18.164,96.094,3.906,194.03,224,0.875,bilinear tf_efficientnet_b3_ap,81.828,18.172,95.624,4.376,12.23,300,0.904,bicubic tresnet_m_448,81.712,18.288,95.57,4.43,31.39,448,0.875,bilinear @@ -44,14 +47,18 @@ tf_efficientnet_b1_ns,81.386,18.614,95.738,4.262,7.79,240,0.882,bicubic senet154,81.304,18.696,95.498,4.502,115.09,224,0.875,bilinear gluon_senet154,81.224,18.776,95.356,4.644,115.09,224,0.875,bicubic swsl_resnet50,81.18,18.82,95.986,4.014,25.56,224,0.875,bilinear +resnest50d_4s2x40d,81.114,18.886,95.568,4.432,30.42,224,0.875,bicubic gluon_resnet152_v1s,81.012,18.988,95.416,4.584,60.32,224,0.875,bicubic +resnest50d_1s4x24d,80.99,19.01,95.322,4.678,25.68,224,0.875,bicubic +resnest50d,80.958,19.042,95.382,4.618,27.48,224,0.875,bilinear ssl_resnext101_32x4d,80.928,19.072,95.728,4.272,44.18,224,0.875,bilinear gluon_seresnext101_32x4d,80.902,19.098,95.294,4.706,48.96,224,0.875,bicubic gluon_seresnext101_64x4d,80.89,19.11,95.304,4.696,88.23,224,0.875,bicubic efficientnet_b3_pruned,80.856,19.144,95.24,4.76,9.86,300,0.904,bicubic +regnety_320,80.814,19.186,95.24,4.76,145.05,224,0.875,bicubic ecaresnet101d_pruned,80.808,19.192,95.628,4.372,24.88,224,0.875,bicubic tresnet_m,80.796,19.204,94.856,5.144,31.39,224,0.875,bilinear -efficientnet_b2a,80.608,19.392,95.31,4.69,9.11,288,1,bicubic +efficientnet_b2a,80.608,19.392,95.31,4.69,9.11,288,1.0,bicubic ecaresnet50d,80.604,19.396,95.322,4.678,25.58,224,0.875,bicubic gluon_resnext101_64x4d,80.602,19.398,94.994,5.006,83.46,224,0.875,bicubic mixnet_xl,80.478,19.522,94.932,5.068,11.9,224,0.875,bicubic @@ -61,10 +68,13 @@ ecaresnetlight,80.454,19.546,95.256,4.744,30.16,224,0.875,bicubic tf_efficientnet_el,80.448,19.552,95.16,4.84,10.59,300,0.904,bicubic gluon_resnet101_v1d,80.424,19.576,95.02,4.98,44.57,224,0.875,bicubic efficientnet_b2,80.402,19.598,95.076,4.924,9.11,260,0.875,bicubic +regnety_120,80.382,19.618,95.128,4.872,51.82,224,0.875,bicubic gluon_resnext101_32x4d,80.334,19.666,94.926,5.074,44.18,224,0.875,bicubic ssl_resnext50_32x4d,80.328,19.672,95.404,4.596,25.03,224,0.875,bilinear tf_efficientnet_b2_ap,80.306,19.694,95.028,4.972,9.11,260,0.89,bicubic gluon_resnet101_v1s,80.3,19.7,95.15,4.85,44.67,224,0.875,bicubic +regnety_160,80.3,19.7,94.962,5.038,83.59,224,0.875,bicubic +regnetx_320,80.246,19.754,95.022,4.978,107.81,224,0.875,bicubic seresnext101_32x4d,80.236,19.764,95.028,4.972,48.96,224,0.875,bilinear dpn107,80.164,19.836,94.912,5.088,86.92,224,0.875,bicubic inception_v4,80.156,19.844,94.974,5.026,42.68,299,0.875,bicubic @@ -75,18 +85,23 @@ ens_adv_inception_resnet_v2,79.976,20.024,94.946,5.054,55.84,299,0.8975,bicubic efficientnet_b2_pruned,79.918,20.082,94.848,5.152,8.31,260,0.89,bicubic gluon_resnet152_v1c,79.916,20.084,94.842,5.158,60.21,224,0.875,bicubic gluon_seresnext50_32x4d,79.912,20.088,94.818,5.182,27.56,224,0.875,bicubic +regnety_080,79.868,20.132,94.832,5.168,39.18,224,0.875,bicubic +regnetx_160,79.866,20.134,94.828,5.172,54.28,224,0.875,bicubic dpn131,79.828,20.172,94.704,5.296,79.25,224,0.875,bicubic tf_efficientnet_lite3,79.812,20.188,94.914,5.086,8.2,300,0.904,bilinear resnext50_32x4d,79.762,20.238,94.6,5.4,25.03,224,0.875,bicubic ecaresnet50d_pruned,79.718,20.282,94.89,5.11,19.94,224,0.875,bicubic +regnety_064,79.712,20.288,94.774,5.226,30.58,224,0.875,bicubic gluon_resnet152_v1b,79.692,20.308,94.738,5.262,60.19,224,0.875,bicubic resnext50d_32x4d,79.674,20.326,94.868,5.132,25.05,224,0.875,bicubic dpn98,79.636,20.364,94.594,5.406,61.57,224,0.875,bicubic gluon_xception65,79.604,20.396,94.748,5.252,39.92,299,0.875,bicubic +regnetx_120,79.59,20.41,94.74,5.26,46.11,224,0.875,bicubic gluon_resnet101_v1c,79.544,20.456,94.586,5.414,44.57,224,0.875,bicubic hrnet_w64,79.472,20.528,94.65,5.35,128.06,224,0.875,bilinear dla102x2,79.452,20.548,94.644,5.356,41.75,224,0.875,bilinear gluon_resnext50_32x4d,79.356,20.644,94.424,5.576,25.03,224,0.875,bicubic +ese_vovnet39b,79.32,20.68,94.71,5.29,24.57,224,0.875,bicubic resnext101_32x8d,79.312,20.688,94.526,5.474,88.79,224,0.875,bilinear hrnet_w48,79.31,20.69,94.518,5.482,77.47,224,0.875,bilinear gluon_resnet101_v1b,79.304,20.696,94.524,5.476,44.55,224,0.875,bicubic @@ -94,15 +109,19 @@ tf_efficientnet_cc_b1_8e,79.298,20.702,94.364,5.636,39.72,240,0.882,bicubic resnetblur50,79.29,20.71,94.632,5.368,25.56,224,0.875,bicubic tf_efficientnet_b1_ap,79.278,20.722,94.308,5.692,7.79,240,0.882,bicubic ssl_resnet50,79.228,20.772,94.832,5.168,25.56,224,0.875,bilinear +regnety_040,79.222,20.778,94.656,5.344,20.65,224,0.875,bicubic res2net50_26w_8s,79.21,20.79,94.362,5.638,48.4,224,0.875,bilinear +regnetx_080,79.198,20.802,94.558,5.442,39.57,224,0.875,bicubic res2net101_26w_4s,79.196,20.804,94.44,5.56,45.21,224,0.875,bilinear seresnext50_32x4d,79.076,20.924,94.434,5.566,27.56,224,0.875,bilinear gluon_resnet50_v1d,79.074,20.926,94.476,5.524,25.58,224,0.875,bicubic +regnetx_064,79.066,20.934,94.456,5.544,26.21,224,0.875,bicubic xception,79.048,20.952,94.392,5.608,22.86,299,0.8975,bicubic resnet50,79.032,20.968,94.384,5.616,25.56,224,0.875,bicubic mixnet_l,78.976,21.024,94.184,5.816,7.33,224,0.875,bicubic hrnet_w40,78.934,21.066,94.466,5.534,57.56,224,0.875,bilinear hrnet_w44,78.894,21.106,94.37,5.63,67.06,224,0.875,bilinear +regnety_032,78.87,21.13,94.402,5.598,19.44,224,0.875,bicubic wide_resnet101_2,78.846,21.154,94.284,5.716,126.89,224,0.875,bilinear tf_efficientnet_b1,78.832,21.168,94.196,5.804,7.79,240,0.882,bicubic gluon_inception_v3,78.804,21.196,94.38,5.62,23.83,299,0.875,bicubic @@ -115,6 +134,8 @@ seresnet152,78.658,21.342,94.374,5.626,66.82,224,0.875,bilinear tf_efficientnet_b0_ns,78.652,21.348,94.368,5.632,5.29,224,0.875,bicubic res2net50_26w_6s,78.574,21.426,94.126,5.874,37.05,224,0.875,bilinear dla102x,78.508,21.492,94.234,5.766,26.77,224,0.875,bilinear +regnetx_040,78.486,21.514,94.242,5.758,22.12,224,0.875,bicubic +resnest26d,78.482,21.518,94.29,5.71,17.07,224,0.875,bilinear dla60_res2net,78.472,21.528,94.204,5.796,21.15,224,0.875,bilinear wide_resnet50_2,78.468,21.532,94.086,5.914,68.88,224,0.875,bilinear dla60_res2next,78.448,21.552,94.144,5.856,17.33,224,0.875,bilinear @@ -122,10 +143,11 @@ hrnet_w32,78.448,21.552,94.188,5.812,41.23,224,0.875,bilinear selecsls60b,78.418,21.582,94.166,5.834,32.77,224,0.875,bicubic seresnet101,78.396,21.604,94.258,5.742,49.33,224,0.875,bilinear resnet152,78.312,21.688,94.046,5.954,60.19,224,0.875,bilinear -efficientnet_b1_pruned,78.242,21.758,93.832,6.168,6.33,240,0.882,bicubic dla60x,78.242,21.758,94.022,5.978,17.65,224,0.875,bilinear +efficientnet_b1_pruned,78.242,21.758,93.832,6.168,6.33,240,0.882,bicubic res2next50,78.242,21.758,93.892,6.108,24.67,224,0.875,bilinear -hrnet_w30,78.196,21.804,94.218,5.782,37.71,224,0.875,bilinear +hrnet_w30,78.196,21.804,94.22,5.78,37.71,224,0.875,bilinear +regnetx_032,78.166,21.834,94.08,5.92,15.3,224,0.875,bicubic res2net50_14w_8s,78.152,21.848,93.842,6.158,25.06,224,0.875,bilinear efficientnet_es,78.054,21.946,93.93,6.07,5.44,224,0.875,bicubic dla102,78.026,21.974,93.95,6.05,33.73,224,0.875,bilinear @@ -136,16 +158,17 @@ selecsls60,77.982,22.018,93.832,6.168,30.67,224,0.875,bicubic res2net50_26w_4s,77.946,22.054,93.852,6.148,25.7,224,0.875,bilinear tf_efficientnet_cc_b0_8e,77.908,22.092,93.656,6.344,24.01,224,0.875,bicubic tf_inception_v3,77.856,22.144,93.644,6.356,23.83,299,0.875,bicubic +regnety_016,77.852,22.148,93.716,6.284,11.2,224,0.875,bicubic efficientnet_b0,77.692,22.308,93.532,6.468,5.29,224,0.875,bicubic seresnet50,77.636,22.364,93.752,6.248,28.09,224,0.875,bilinear tv_resnext50_32x4d,77.618,22.382,93.698,6.302,25.03,224,0.875,bilinear seresnext26d_32x4d,77.604,22.396,93.612,6.388,16.81,224,0.875,bicubic +adv_inception_v3,77.58,22.42,93.724,6.276,23.83,299,0.875,bicubic gluon_resnet50_v1b,77.578,22.422,93.718,6.282,25.56,224,0.875,bicubic -adv_inception_v3,77.576,22.424,93.724,6.276,23.83,299,0.875,bicubic dpn68b,77.514,22.486,93.822,6.178,12.61,224,0.875,bicubic res2net50_48w_2s,77.514,22.486,93.548,6.452,25.29,224,0.875,bilinear tf_efficientnet_lite2,77.46,22.54,93.746,6.254,6.09,260,0.89,bicubic -inception_v3,77.434,22.566,93.478,6.522,27.16,299,0.875,bicubic +inception_v3,77.436,22.564,93.476,6.524,23.83,299,0.875,bicubic resnet101,77.374,22.626,93.546,6.454,44.55,224,0.875,bilinear densenet161,77.348,22.652,93.648,6.352,28.68,224,0.875,bicubic tf_efficientnet_cc_b0_4e,77.304,22.696,93.332,6.668,13.31,224,0.875,bicubic @@ -156,46 +179,58 @@ mixnet_m,77.256,22.744,93.418,6.582,5.01,224,0.875,bicubic selecsls42b,77.176,22.824,93.392,6.608,32.46,224,0.875,bicubic seresnext26_32x4d,77.1,22.9,93.31,6.69,16.79,224,0.875,bicubic tf_efficientnet_b0_ap,77.084,22.916,93.254,6.746,5.29,224,0.875,bicubic -dla60,77.022,22.978,93.308,6.692,22.33,224,0.875,bilinear +dla60,77.024,22.976,93.308,6.692,22.33,224,0.875,bilinear tf_mixnet_m,76.95,23.05,93.156,6.844,5.01,224,0.875,bicubic +regnetx_016,76.93,23.07,93.418,6.582,9.19,224,0.875,bicubic skresnet34,76.91,23.09,93.316,6.684,22.28,224,0.875,bicubic tf_efficientnet_b0,76.84,23.16,93.226,6.774,5.29,224,0.875,bicubic hrnet_w18,76.756,23.244,93.442,6.558,21.3,224,0.875,bilinear resnet26d,76.68,23.32,93.166,6.834,16.01,224,0.875,bicubic tf_efficientnet_lite1,76.638,23.362,93.232,6.768,5.42,240,0.882,bicubic +densenetblur121d,76.576,23.424,93.19,6.81,8.0,224,0.875,bicubic mobilenetv2_140,76.524,23.476,92.99,7.01,6.11,224,0.875,bicubic +regnety_008,76.314,23.686,93.062,6.938,6.26,224,0.875,bicubic dpn68,76.306,23.694,92.97,7.03,12.61,224,0.875,bicubic tv_resnet50,76.13,23.87,92.862,7.138,25.56,224,0.875,bilinear mixnet_s,75.988,24.012,92.794,7.206,4.13,224,0.875,bicubic densenet169,75.912,24.088,93.024,6.976,14.15,224,0.875,bicubic mobilenetv3_large_100,75.768,24.232,92.54,7.46,5.48,224,0.875,bicubic tf_mixnet_s,75.648,24.352,92.636,7.364,4.13,224,0.875,bicubic -mobilenetv3_rw,75.628,24.372,92.708,7.292,5.48,224,0.875,bicubic +mobilenetv3_rw,75.628,24.372,92.71,7.29,5.48,224,0.875,bicubic +densenet121,75.574,24.426,92.656,7.344,7.98,224,0.875,bicubic tf_mobilenetv3_large_100,75.516,24.484,92.6,7.4,5.48,224,0.875,bilinear +resnest14d,75.504,24.496,92.514,7.486,10.61,224,0.875,bilinear semnasnet_100,75.456,24.544,92.592,7.408,3.89,224,0.875,bicubic -resnet26,75.292,24.708,92.57,7.43,16,224,0.875,bicubic +resnet26,75.292,24.708,92.57,7.43,16.0,224,0.875,bicubic +regnety_006,75.26,24.74,92.528,7.472,6.06,224,0.875,bicubic hrnet_w18_small_v2,75.126,24.874,92.416,7.584,15.6,224,0.875,bilinear fbnetc_100,75.12,24.88,92.386,7.614,5.57,224,0.875,bilinear resnet34,75.112,24.888,92.288,7.712,21.8,224,0.875,bilinear mobilenetv2_110d,75.052,24.948,92.18,7.82,4.52,224,0.875,bicubic +regnetx_008,75.022,24.978,92.344,7.656,7.26,224,0.875,bicubic tf_efficientnet_lite0,74.842,25.158,92.17,7.83,4.65,224,0.875,bicubic seresnet34,74.808,25.192,92.126,7.874,21.96,224,0.875,bilinear -densenet121,74.752,25.248,92.152,7.848,7.98,224,0.875,bicubic +tv_densenet121,74.752,25.248,92.152,7.848,7.98,224,0.875,bicubic mnasnet_100,74.656,25.344,92.126,7.874,4.38,224,0.875,bicubic dla34,74.636,25.364,92.064,7.936,15.78,224,0.875,bilinear gluon_resnet34_v1b,74.58,25.42,91.988,8.012,21.8,224,0.875,bicubic spnasnet_100,74.08,25.92,91.832,8.168,4.42,224,0.875,bilinear +regnety_004,74.026,25.974,91.748,8.252,4.34,224,0.875,bicubic +regnetx_006,73.862,26.138,91.68,8.32,6.2,224,0.875,bicubic tf_mobilenetv3_large_075,73.442,26.558,91.352,8.648,3.99,224,0.875,bilinear tv_resnet34,73.314,26.686,91.42,8.58,21.8,224,0.875,bilinear swsl_resnet18,73.286,26.714,91.732,8.268,11.69,224,0.875,bilinear skresnet18,73.044,26.956,91.178,8.822,11.96,224,0.875,bicubic mobilenetv2_100,72.978,27.022,91.016,8.984,3.5,224,0.875,bicubic -ssl_resnet18,72.6,27.4,91.418,8.582,11.69,224,0.875,bilinear +ssl_resnet18,72.6,27.4,91.416,8.584,11.69,224,0.875,bilinear +regnetx_004,72.406,27.594,90.83,9.17,5.16,224,0.875,bicubic hrnet_w18_small,72.342,27.658,90.672,9.328,13.19,224,0.875,bilinear tf_mobilenetv3_large_minimal_100,72.244,27.756,90.636,9.364,3.92,224,0.875,bilinear seresnet18,71.758,28.242,90.334,9.666,11.78,224,0.875,bicubic gluon_resnet18_v1b,70.83,29.17,89.756,10.244,11.69,224,0.875,bicubic +regnety_002,70.282,29.718,89.54,10.46,3.16,224,0.875,bicubic resnet18,69.758,30.242,89.078,10.922,11.69,224,0.875,bilinear +regnetx_002,68.754,31.246,88.548,11.452,2.68,224,0.875,bicubic tf_mobilenetv3_small_100,67.918,32.082,87.662,12.338,2.54,224,0.875,bilinear dla60x_c,67.908,32.092,88.434,11.566,1.34,224,0.875,bilinear dla46x_c,65.98,34.02,86.98,13.02,1.08,224,0.875,bilinear diff --git a/results/results-imagenetv2-matched-frequency.csv b/results/results-imagenetv2-matched-frequency.csv index 55e5cd95..de63b292 100644 --- a/results/results-imagenetv2-matched-frequency.csv +++ b/results/results-imagenetv2-matched-frequency.csv @@ -17,12 +17,15 @@ tf_efficientnet_b7,74.72,25.28,92.22,7.78,66.35,600,0.949,bicubic tf_efficientnet_b5_ap,74.59,25.41,91.99,8.01,30.39,456,0.934,bicubic swsl_resnext101_32x4d,74.15,25.85,91.99,8.01,44.18,224,0.875,bilinear swsl_resnext101_32x16d,74.01,25.99,92.17,7.83,194.03,224,0.875,bilinear +resnest200e,73.93,26.07,91.58,8.42,70.2,320,0.875,bilinear tf_efficientnet_b6,73.9,26.1,91.75,8.25,43.04,528,0.942,bicubic tf_efficientnet_b3_ns,73.87,26.13,91.86,8.14,12.23,300,0.904,bicubic ig_resnext101_32x8d,73.66,26.34,92.15,7.85,88.79,224,0.875,bilinear tf_efficientnet_b5,73.54,26.46,91.46,8.54,30.39,456,0.934,bicubic +resnest269e,73.46,26.54,91.68,8.32,110.93,416,0.875,bilinear tf_efficientnet_b4_ap,72.89,27.11,90.98,9.02,19.34,380,0.922,bicubic swsl_resnext50_32x4d,72.58,27.42,90.84,9.16,25.03,224,0.875,bilinear +resnest101e,72.55,27.45,90.81,9.19,48.28,256,0.875,bilinear tresnet_xl_448,72.55,27.45,90.31,9.69,78.44,448,0.875,bilinear pnasnet5large,72.37,27.63,90.26,9.74,86.06,331,0.875,bicubic nasnetalarge,72.31,27.69,90.51,9.49,88.75,331,0.875,bicubic @@ -34,9 +37,10 @@ tresnet_l_448,71.6,28.4,90.06,9.94,55.99,448,0.875,bilinear ecaresnet101d,71.5,28.5,90.31,9.69,44.57,224,0.875,bicubic ssl_resnext101_32x8d,71.49,28.51,90.47,9.53,88.79,224,0.875,bilinear ssl_resnext101_32x16d,71.4,28.6,90.55,9.45,194.03,224,0.875,bilinear -tresnet_m_448,71,29,88.68,11.32,31.39,448,0.875,bilinear +tresnet_m_448,71.0,29.0,88.68,11.32,31.39,448,0.875,bilinear +resnest50d_4s2x40d,70.94,29.06,89.71,10.29,30.42,224,0.875,bicubic tf_efficientnet_b3_ap,70.92,29.08,89.43,10.57,12.23,300,0.904,bicubic -efficientnet_b3a,70.87,29.13,89.72,10.28,12.23,320,1,bicubic +efficientnet_b3a,70.87,29.13,89.72,10.28,12.23,320,1.0,bicubic tf_efficientnet_b1_ns,70.85,29.15,90.11,9.89,7.79,240,0.882,bicubic tresnet_l,70.83,29.17,89.61,10.39,55.99,224,0.875,bilinear efficientnet_b3,70.76,29.24,89.84,10.16,12.23,300,0.904,bicubic @@ -45,7 +49,9 @@ gluon_senet154,70.6,29.4,88.92,11.08,115.09,224,0.875,bicubic ssl_resnext101_32x4d,70.5,29.5,89.76,10.24,44.18,224,0.875,bilinear senet154,70.48,29.52,88.99,11.01,115.09,224,0.875,bilinear gluon_seresnext101_64x4d,70.44,29.56,89.35,10.65,88.23,224,0.875,bicubic +resnest50d_1s4x24d,70.43,29.57,89.24,10.76,25.68,224,0.875,bicubic tf_efficientnet_lite4,70.43,29.57,89.12,10.88,13.01,380,0.92,bilinear +resnest50d,70.42,29.58,88.76,11.24,27.48,224,0.875,bilinear gluon_resnet152_v1s,70.32,29.68,88.87,11.13,60.32,224,0.875,bicubic ecaresnet101d_pruned,70.12,29.88,89.58,10.42,24.88,224,0.875,bicubic inception_resnet_v2,70.12,29.88,88.68,11.32,55.84,299,0.8975,bicubic @@ -54,16 +60,16 @@ gluon_resnet152_v1d,69.95,30.05,88.47,11.53,60.21,224,0.875,bicubic ecaresnet50d,69.83,30.17,89.37,10.63,25.58,224,0.875,bicubic gluon_resnext101_64x4d,69.69,30.31,88.26,11.74,83.46,224,0.875,bicubic ssl_resnext50_32x4d,69.69,30.31,89.42,10.58,25.03,224,0.875,bilinear -tresnet_m,69.65,30.35,88,12,31.39,224,0.875,bilinear +tresnet_m,69.65,30.35,88.0,12.0,31.39,224,0.875,bilinear efficientnet_b3_pruned,69.58,30.42,88.97,11.03,9.86,300,0.904,bicubic ens_adv_inception_resnet_v2,69.52,30.48,88.5,11.5,55.84,299,0.8975,bicubic -efficientnet_b2a,69.49,30.51,88.68,11.32,9.11,288,1,bicubic +efficientnet_b2a,69.49,30.51,88.68,11.32,9.11,288,1.0,bicubic inception_v4,69.35,30.65,88.78,11.22,42.68,299,0.875,bicubic seresnext101_32x4d,69.34,30.66,88.05,11.95,48.96,224,0.875,bilinear ecaresnetlight,69.33,30.67,89.22,10.78,30.16,224,0.875,bicubic gluon_resnet152_v1c,69.13,30.87,87.89,12.11,60.21,224,0.875,bicubic mixnet_xl,69.08,30.92,88.31,11.69,11.9,224,0.875,bicubic -efficientnet_b2,69,31,88.62,11.38,9.11,260,0.875,bicubic +efficientnet_b2,69.0,31.0,88.62,11.38,9.11,260,0.875,bicubic gluon_resnet101_v1d,68.99,31.01,88.08,11.92,44.57,224,0.875,bicubic gluon_xception65,68.98,31.02,88.32,11.68,39.92,299,0.875,bicubic gluon_resnext101_32x4d,68.96,31.04,88.34,11.66,44.18,224,0.875,bicubic @@ -85,6 +91,7 @@ dla102x2,68.34,31.66,87.87,12.13,41.75,224,0.875,bilinear efficientnet_b2_pruned,68.3,31.7,88.1,11.9,8.31,260,0.89,bicubic gluon_resnext50_32x4d,68.28,31.72,87.32,12.68,25.03,224,0.875,bicubic tf_efficientnet_lite3,68.23,31.77,87.72,12.28,8.2,300,0.904,bilinear +ese_vovnet39b,68.19,31.81,88.26,11.74,24.57,224,0.875,bicubic tf_efficientnet_el,68.18,31.82,88.35,11.65,10.59,300,0.904,bicubic dpn92,68.01,31.99,87.59,12.41,37.67,224,0.875,bicubic gluon_resnet50_v1d,67.91,32.09,87.12,12.88,25.58,224,0.875,bicubic @@ -104,15 +111,16 @@ tf_efficientnet_b1_ap,67.52,32.48,87.77,12.23,7.79,240,0.882,bicubic tf_efficientnet_cc_b1_8e,67.48,32.52,87.31,12.69,39.72,240,0.882,bicubic gluon_resnet101_v1b,67.45,32.55,87.23,12.77,44.55,224,0.875,bicubic res2net101_26w_4s,67.45,32.55,87.01,12.99,45.21,224,0.875,bilinear -resnetblur50,67.44,32.56,87.43,12.57,25.56,224,0.875,bicubic resnet50,67.44,32.56,87.42,12.58,25.56,224,0.875,bicubic +resnetblur50,67.44,32.56,87.43,12.57,25.56,224,0.875,bicubic +resnest26d,67.21,32.79,87.18,12.82,17.07,224,0.875,bilinear efficientnet_b1,67.16,32.84,87.15,12.85,7.79,240,0.875,bicubic seresnet101,67.15,32.85,87.05,12.95,49.33,224,0.875,bilinear gluon_resnet50_v1s,67.1,32.9,86.86,13.14,25.68,224,0.875,bicubic dla60x,67.08,32.92,87.17,12.83,17.65,224,0.875,bilinear dla60_res2net,67.03,32.97,87.14,12.86,21.15,224,0.875,bilinear resnet152,67.02,32.98,87.57,12.43,60.19,224,0.875,bilinear -dla102x,67,33,86.77,13.23,26.77,224,0.875,bilinear +dla102x,67.0,33.0,86.77,13.23,26.77,224,0.875,bilinear mixnet_l,66.97,33.03,86.94,13.06,7.33,224,0.875,bicubic res2net50_26w_6s,66.91,33.09,86.9,13.1,37.05,224,0.875,bilinear efficientnet_es,66.89,33.11,86.73,13.27,5.44,224,0.875,bicubic @@ -128,14 +136,14 @@ dla60_res2next,66.64,33.36,87.02,12.98,17.33,224,0.875,bilinear adv_inception_v3,66.6,33.4,86.56,13.44,23.83,299,0.875,bicubic dla102,66.55,33.45,86.91,13.09,33.73,224,0.875,bilinear gluon_resnet50_v1c,66.54,33.46,86.16,13.84,25.58,224,0.875,bicubic -tf_inception_v3,66.41,33.59,86.68,13.32,23.83,299,0.875,bicubic +tf_inception_v3,66.42,33.58,86.68,13.32,23.83,299,0.875,bicubic efficientnet_b0,66.25,33.75,85.95,14.05,5.29,224,0.875,bicubic seresnet50,66.24,33.76,86.33,13.67,28.09,224,0.875,bilinear selecsls60,66.22,33.78,86.33,13.67,30.67,224,0.875,bicubic tf_efficientnet_cc_b0_8e,66.21,33.79,86.22,13.78,24.01,224,0.875,bicubic tv_resnext50_32x4d,66.18,33.82,86.04,13.96,25.03,224,0.875,bilinear res2net50_26w_4s,66.17,33.83,86.6,13.4,25.7,224,0.875,bilinear -inception_v3,66.12,33.88,86.34,13.66,27.16,299,0.875,bicubic +inception_v3,66.12,33.88,86.34,13.66,23.83,299,0.875,bicubic efficientnet_b1_pruned,66.08,33.92,86.58,13.42,6.33,240,0.882,bicubic gluon_resnet50_v1b,66.04,33.96,86.27,13.73,25.56,224,0.875,bicubic res2net50_14w_8s,66.02,33.98,86.24,13.76,25.06,224,0.875,bilinear @@ -151,6 +159,7 @@ tf_efficientnet_b0_ap,65.49,34.51,85.55,14.45,5.29,224,0.875,bicubic seresnext26d_32x4d,65.42,34.58,85.97,14.03,16.81,224,0.875,bicubic tf_efficientnet_lite2,65.39,34.61,86.03,13.97,6.09,260,0.89,bicubic res2net50_48w_2s,65.32,34.68,85.96,14.04,25.29,224,0.875,bilinear +densenetblur121d,65.3,34.7,85.71,14.29,8.0,224,0.875,bicubic densenet201,65.28,34.72,85.67,14.33,20.01,224,0.875,bicubic tf_efficientnet_es,65.24,34.76,85.54,14.46,5.44,224,0.875,bicubic dla60,65.22,34.78,85.75,14.25,22.33,224,0.875,bilinear @@ -166,21 +175,23 @@ tf_efficientnet_b0,64.29,35.71,85.25,14.75,5.29,224,0.875,bicubic tf_mixnet_m,64.27,35.73,85.09,14.91,5.01,224,0.875,bicubic dpn68,64.22,35.78,85.18,14.82,12.61,224,0.875,bicubic mobilenetv2_140,64.05,35.95,85.02,14.98,6.11,224,0.875,bicubic +densenet121,63.74,36.26,84.63,15.37,7.98,224,0.875,bicubic +resnest14d,63.6,36.4,84.22,15.78,10.61,224,0.875,bilinear tf_mixnet_s,63.59,36.41,84.27,15.73,4.13,224,0.875,bicubic -resnet26,63.45,36.55,84.27,15.73,16,224,0.875,bicubic +resnet26,63.45,36.55,84.27,15.73,16.0,224,0.875,bicubic mixnet_s,63.38,36.62,84.71,15.29,4.13,224,0.875,bicubic mobilenetv3_large_100,63.36,36.64,84.08,15.92,5.48,224,0.875,bicubic tv_resnet50,63.33,36.67,84.65,15.35,25.56,224,0.875,bilinear mobilenetv3_rw,63.23,36.77,84.52,15.48,5.48,224,0.875,bicubic semnasnet_100,63.12,36.88,84.53,15.47,3.89,224,0.875,bicubic -densenet121,62.94,37.06,84.26,15.74,7.98,224,0.875,bicubic +tv_densenet121,62.94,37.06,84.26,15.74,7.98,224,0.875,bicubic seresnet34,62.89,37.11,84.22,15.78,21.96,224,0.875,bilinear hrnet_w18_small_v2,62.83,37.17,83.97,16.03,15.6,224,0.875,bilinear mobilenetv2_110d,62.82,37.18,84.48,15.52,4.52,224,0.875,bicubic resnet34,62.82,37.18,84.12,15.88,21.8,224,0.875,bilinear swsl_resnet18,62.73,37.27,84.3,15.7,11.69,224,0.875,bilinear tf_efficientnet_lite0,62.58,37.42,84.25,15.75,4.65,224,0.875,bicubic -gluon_resnet34_v1b,62.56,37.44,84,16,21.8,224,0.875,bicubic +gluon_resnet34_v1b,62.56,37.44,84.0,16.0,21.8,224,0.875,bicubic dla34,62.51,37.49,83.92,16.08,15.78,224,0.875,bilinear tf_mobilenetv3_large_100,62.47,37.53,83.96,16.04,5.48,224,0.875,bilinear fbnetc_100,62.43,37.57,83.39,16.61,5.57,224,0.875,bilinear diff --git a/results/results-sketch.csv b/results/results-sketch.csv index 078659b2..e040b75a 100644 --- a/results/results-sketch.csv +++ b/results/results-sketch.csv @@ -24,16 +24,22 @@ tf_efficientnet_b4_ap,40.4763,59.5237,61.7127,38.2873,19.34,380,0.922,bicubic tf_efficientnet_b3_ns,39.5822,60.4178,61.4632,38.5368,12.23,300,0.904,bicubic tf_efficientnet_b5,38.3285,61.6715,59.9285,40.0715,30.39,456,0.934,bicubic tf_efficientnet_b3_ap,37.0611,62.9389,57.2363,42.7637,12.23,300,0.904,bicubic +resnest269e,36.67,63.33,56.8099,43.1901,110.93,416,0.875,bilinear tf_efficientnet_b2_ns,36.1768,63.8232,57.5547,42.4453,9.11,260,0.89,bicubic ecaresnet101d,36.0058,63.9942,56.1536,43.8464,44.57,224,0.875,bicubic -swsl_resnet18,35.8604,64.1396,58.439,41.561,11.69,224,0.875,bilinear +swsl_resnet18,35.8604,64.1396,58.437,41.563,11.69,224,0.875,bilinear +resnest200e,35.8466,64.1534,55.8903,44.1097,70.2,320,0.875,bilinear +resnest101e,35.3652,64.6348,55.7861,44.2139,48.28,256,0.875,bilinear ssl_resnext101_32x16d,34.6087,65.3913,55.9139,44.0861,194.03,224,0.875,bilinear +resnest50d_4s2x40d,34.3611,65.6389,54.7112,45.2888,30.42,224,0.875,bicubic tf_efficientnet_b1_ns,34.1528,65.8472,55.4894,44.5106,7.79,240,0.882,bicubic tf_efficientnet_b4,34.0624,65.9376,54.216,45.784,19.34,380,0.922,bicubic ssl_resnext101_32x8d,34.0211,65.9789,55.5935,44.4065,88.79,224,0.875,bilinear tf_efficientnet_b6,34.0054,65.9946,54.5403,45.4597,43.04,528,0.942,bicubic efficientnet_b3_pruned,33.9956,66.0044,54.1099,45.8901,9.86,300,0.904,bicubic tresnet_xl,33.2587,66.7413,52.2962,47.7038,78.44,224,0.875,bilinear +resnest50d_1s4x24d,33.1388,66.8612,52.8307,47.1693,25.68,224,0.875,bicubic +resnest50d,32.9678,67.0322,52.701,47.299,27.48,224,0.875,bilinear tf_efficientnet_b3,32.8637,67.1363,52.9623,47.0377,12.23,300,0.904,bicubic inception_resnet_v2,32.736,67.264,50.6396,49.3604,55.84,299,0.8975,bicubic gluon_resnet152_v1d,32.7301,67.2699,51.0837,48.9163,60.21,224,0.875,bicubic @@ -45,7 +51,7 @@ ens_adv_inception_resnet_v2,32.3705,67.6295,50.4274,49.5726,55.84,299,0.8975,bic gluon_resnet152_v1s,32.3312,67.6688,50.5394,49.4606,60.32,224,0.875,bicubic gluon_seresnext101_64x4d,32.1936,67.8064,50.3272,49.6728,88.23,224,0.875,bicubic gluon_seresnext101_32x4d,32.115,67.885,51.2409,48.7591,48.96,224,0.875,bicubic -efficientnet_b3a,31.7279,68.2721,51.3215,48.6785,12.23,320,1,bicubic +efficientnet_b3a,31.7279,68.2721,51.3215,48.6785,12.23,320,1.0,bicubic efficientnet_b3,31.5648,68.4352,51.2724,48.7276,12.23,300,0.904,bicubic resnet50,31.5451,68.4549,50.1719,49.8281,25.56,224,0.875,bicubic ssl_resnext101_32x4d,31.4331,68.5669,52.1154,47.8846,44.18,224,0.875,bilinear @@ -62,11 +68,13 @@ ecaresnet101d_pruned,30.8947,69.1053,50.001,49.999,24.88,224,0.875,bicubic gluon_resnext101_32x4d,30.8809,69.1191,48.537,51.463,44.18,224,0.875,bicubic tf_efficientnet_lite4,30.8397,69.1603,50.3979,49.6021,13.01,380,0.92,bilinear dpn107,30.6805,69.3195,48.8062,51.1938,86.92,224,0.875,bicubic +ese_vovnet39b,30.6766,69.3234,49.8929,50.1071,24.57,224,0.875,bicubic tresnet_xl_448,30.6196,69.3804,49.0715,50.9285,78.44,448,0.875,bilinear -gluon_resnet152_v1b,30.6176,69.3824,48.5311,51.4689,60.19,224,0.875,bicubic +gluon_resnet152_v1b,30.6176,69.3824,48.5292,51.4708,60.19,224,0.875,bicubic ssl_resnext50_32x4d,30.594,69.406,50.6534,49.3466,25.03,224,0.875,bilinear gluon_resnet101_v1d,30.5095,69.4905,47.975,52.025,44.57,224,0.875,bicubic -efficientnet_b2a,30.4231,69.5769,49.6748,50.3252,9.11,288,1,bicubic +resnest26d,30.4997,69.5003,50.677,49.323,17.07,224,0.875,bilinear +efficientnet_b2a,30.4231,69.5769,49.6748,50.3252,9.11,288,1.0,bicubic tf_efficientnet_b1_ap,30.4191,69.5809,49.5529,50.4471,7.79,240,0.882,bicubic dpn98,30.0576,69.9424,48.2403,51.7597,61.57,224,0.875,bicubic tf_efficientnet_b2,30.0202,69.9798,49.5903,50.4097,9.11,260,0.89,bicubic @@ -75,14 +83,14 @@ senet154,30.0006,69.9994,48.032,51.968,115.09,224,0.875,bilinear dpn92,29.9691,70.0309,49.1599,50.8401,37.67,224,0.875,bicubic gluon_senet154,29.8866,70.1134,47.8728,52.1272,115.09,224,0.875,bicubic xception,29.8493,70.1507,48.6903,51.3097,22.86,299,0.8975,bicubic -adv_inception_v3,29.8237,70.1763,47.8689,52.1311,23.83,299,0.875,bicubic +adv_inception_v3,29.8237,70.1763,47.8669,52.1331,23.83,299,0.875,bicubic resnetblur50,29.6233,70.3767,48.2501,51.7499,25.56,224,0.875,bicubic efficientnet_b2,29.6174,70.3826,48.7728,51.2272,9.11,260,0.875,bicubic gluon_xception65,29.5545,70.4455,47.523,52.477,39.92,299,0.875,bicubic resnext101_32x8d,29.4347,70.5653,48.482,51.518,88.79,224,0.875,bilinear ssl_resnet50,29.4229,70.5771,49.773,50.227,25.56,224,0.875,bilinear resnext50_32x4d,29.3285,70.6715,47.3953,52.6047,25.03,224,0.875,bicubic -ecaresnet50d_pruned,29.2165,70.7835,48.4604,51.5396,19.94,224,0.875,bicubic +ecaresnet50d_pruned,29.2165,70.7835,48.4584,51.5416,19.94,224,0.875,bicubic tresnet_l_448,29.1674,70.8326,47.2342,52.7658,55.99,448,0.875,bilinear gluon_inception_v3,29.1143,70.8857,46.9433,53.0567,23.83,299,0.875,bicubic hrnet_w64,28.9866,71.0134,47.1399,52.8601,128.06,224,0.875,bilinear @@ -105,34 +113,49 @@ tf_efficientnet_cc_b0_4e,28.3106,71.6894,47.3639,52.6361,13.31,224,0.875,bicubic mixnet_xl,28.293,71.707,46.7174,53.2826,11.9,224,0.875,bicubic gluon_resnet50_v1d,28.236,71.764,45.8763,54.1237,25.58,224,0.875,bicubic wide_resnet101_2,28.1063,71.8937,46.4246,53.5754,126.89,224,0.875,bilinear +gluon_resnet101_v1c,28.1023,71.8977,45.953,54.047,44.57,224,0.875,bicubic densenet161,28.1004,71.8996,46.6506,53.3494,28.68,224,0.875,bicubic -gluon_resnet101_v1c,28.1004,71.8996,45.953,54.047,44.57,224,0.875,bicubic +regnetx_320,28.0788,71.9212,45.1198,54.8802,107.81,224,0.875,bicubic +regnety_320,28.0709,71.9291,45.4597,54.5403,145.05,224,0.875,bicubic dpn68b,27.8842,72.1158,47.4602,52.5398,12.61,224,0.875,bicubic -tf_inception_v3,27.784,72.216,45.7132,54.2868,23.83,299,0.875,bicubic +regnetx_160,27.8253,72.1747,45.6307,54.3693,54.28,224,0.875,bicubic +tf_inception_v3,27.786,72.214,45.7113,54.2887,23.83,299,0.875,bicubic res2net101_26w_4s,27.7742,72.2258,45.1709,54.8291,45.21,224,0.875,bilinear +regnety_160,27.6386,72.3614,45.5344,54.4656,83.59,224,0.875,bicubic hrnet_w44,27.6248,72.3752,45.8311,54.1689,67.06,224,0.875,bilinear -inception_v3,27.5698,72.4302,45.2632,54.7368,27.16,299,0.875,bicubic +inception_v3,27.5698,72.4302,45.2613,54.7387,23.83,299,0.875,bicubic +regnetx_080,27.4106,72.5894,45.0215,54.9785,39.57,224,0.875,bicubic hrnet_w30,27.3851,72.6149,46.5425,53.4575,37.71,224,0.875,bilinear hrnet_w32,27.3772,72.6228,45.9903,54.0097,41.23,224,0.875,bilinear -gluon_resnet50_v1s,27.3281,72.6719,45.2141,54.7859,25.68,224,0.875,bicubic +gluon_resnet50_v1s,27.3261,72.6739,45.2141,54.7859,25.68,224,0.875,bicubic densenet201,27.2613,72.7387,46.2241,53.7759,20.01,224,0.875,bicubic +regnety_064,27.2279,72.7721,44.8506,55.1494,30.58,224,0.875,bicubic +densenetblur121d,27.224,72.776,46.3067,53.6933,8.0,224,0.875,bicubic efficientnet_b1_pruned,27.1945,72.8055,45.8724,54.1276,6.33,240,0.882,bicubic res2net50_26w_8s,27.0726,72.9274,44.432,55.568,48.4,224,0.875,bilinear dla102x,27.0235,72.9765,45.4951,54.5049,26.77,224,0.875,bilinear resnet101,26.9685,73.0315,45.2357,54.7643,44.55,224,0.875,bilinear resnext50d_32x4d,26.8742,73.1258,44.43,55.57,25.05,224,0.875,bicubic +regnetx_120,26.8644,73.1356,44.6816,55.3184,46.11,224,0.875,bicubic seresnext101_32x4d,26.8192,73.1808,43.5084,56.4916,48.96,224,0.875,bilinear densenet169,26.8113,73.1887,45.3752,54.6248,14.15,224,0.875,bicubic +regnetx_064,26.8015,73.1985,44.9036,55.0964,26.21,224,0.875,bicubic +regnety_120,26.7818,73.2182,44.4399,55.5601,51.82,224,0.875,bicubic +regnetx_032,26.7071,73.2929,45.2259,54.7741,15.3,224,0.875,bicubic +densenet121,26.6757,73.3243,45.8999,54.1001,7.98,224,0.875,bicubic seresnet152,26.6718,73.3282,43.9447,56.0553,66.82,224,0.875,bilinear tf_efficientnet_el,26.6226,73.3774,44.6364,55.3636,10.59,300,0.904,bicubic efficientnet_es,26.6168,73.3832,45.106,54.894,5.44,224,0.875,bicubic res2net50_26w_6s,26.5873,73.4127,43.9781,56.0219,37.05,224,0.875,bilinear dla60x,26.5637,73.4363,45.0392,54.9608,17.65,224,0.875,bilinear +regnety_080,26.5146,73.4854,44.3554,55.6446,39.18,224,0.875,bicubic tf_efficientnet_b0,26.491,73.509,45.6562,54.3438,5.29,224,0.875,bicubic res2net50_14w_8s,26.4713,73.5287,44.3691,55.6309,25.06,224,0.875,bilinear gluon_resnet50_v1b,26.432,73.568,44.0331,55.9669,25.56,224,0.875,bicubic +regnetx_040,26.2395,73.7605,44.4241,55.5759,22.12,224,0.875,bicubic dpn68,26.1216,73.8784,44.2335,55.7665,12.61,224,0.875,bicubic hrnet_w18,25.9761,74.0239,44.8093,55.1907,21.3,224,0.875,bilinear +regnety_040,25.9133,74.0867,43.8543,56.1457,20.65,224,0.875,bicubic resnet34,25.8838,74.1162,43.9899,56.0101,21.8,224,0.875,bilinear res2net50_26w_4s,25.87,74.13,43.1606,56.8394,25.7,224,0.875,bilinear tresnet_m_448,25.8504,74.1496,42.8678,57.1322,31.39,448,0.875,bilinear @@ -148,21 +171,25 @@ tf_mixnet_l,25.42,74.58,42.5436,57.4564,7.33,224,0.875,bicubic res2next50,25.3945,74.6055,42.4925,57.5075,24.67,224,0.875,bilinear selecsls60b,25.3277,74.6723,43.5536,56.4464,32.77,224,0.875,bicubic seresnet101,25.3277,74.6723,42.8285,57.1715,49.33,224,0.875,bilinear +regnety_032,25.3237,74.6763,42.9071,57.0929,19.44,224,0.875,bicubic dla102,25.3139,74.6861,43.8366,56.1634,33.73,224,0.875,bilinear wide_resnet50_2,25.31,74.69,42.1781,57.8219,68.88,224,0.875,bilinear +resnest14d,25.2825,74.7175,44.1215,55.8785,10.61,224,0.875,bilinear seresnext50_32x4d,25.2176,74.7824,41.9383,58.0617,27.56,224,0.875,bilinear res2net50_48w_2s,25.0231,74.9769,42.2017,57.7983,25.29,224,0.875,bilinear efficientnet_b0,25.0152,74.9848,42.7853,57.2147,5.29,224,0.875,bicubic gluon_resnet34_v1b,24.9484,75.0516,42.237,57.763,21.8,224,0.875,bicubic mobilenetv2_120d,24.9327,75.0673,43.0643,56.9357,5.83,224,0.875,bicubic dla60,24.9268,75.0732,43.3021,56.6979,22.33,224,0.875,bilinear +regnety_016,24.8187,75.1813,42.6261,57.3739,11.2,224,0.875,bicubic tf_efficientnet_em,24.5338,75.4662,42.41,57.59,6.9,240,0.882,bicubic tf_efficientnet_lite2,24.5299,75.4701,42.292,57.708,6.09,260,0.89,bicubic skresnet18,24.4945,75.5055,42.5377,57.4623,11.96,224,0.875,bicubic +regnetx_016,24.4768,75.5232,42.5023,57.4977,9.19,224,0.875,bicubic tf_efficientnet_lite0,24.3707,75.6293,42.5102,57.4898,4.65,224,0.875,bicubic tv_resnet50,24.0917,75.9083,41.3095,58.6905,25.56,224,0.875,bilinear seresnet34,24.0366,75.9634,41.8951,58.1049,21.96,224,0.875,bilinear -densenet121,23.846,76.154,41.9207,58.0793,7.98,224,0.875,bicubic +tv_densenet121,23.846,76.154,41.9207,58.0793,7.98,224,0.875,bicubic tf_efficientnet_es,23.8244,76.1756,41.3193,58.6807,5.44,224,0.875,bicubic mobilenetv2_140,23.7104,76.2896,41.4687,58.5313,6.11,224,0.875,bicubic mixnet_m,23.7085,76.2915,41.1386,58.8614,5.01,224,0.875,bicubic @@ -176,26 +203,34 @@ mobilenetv3_large_100,22.665,77.335,40.7848,59.2152,5.48,224,0.875,bicubic mobilenetv3_rw,22.6257,77.3743,40.3702,59.6298,5.48,224,0.875,bicubic tf_mobilenetv3_large_100,22.5707,77.4293,39.7591,60.2409,5.48,224,0.875,bilinear hrnet_w18_small_v2,22.3408,77.6592,39.8475,60.1525,15.6,224,0.875,bilinear +regnety_008,22.1128,77.8872,38.8964,61.1036,6.26,224,0.875,bicubic seresnext26tn_32x4d,22.0028,77.9972,38.4916,61.5084,16.81,224,0.875,bicubic seresnext26t_32x4d,21.9871,78.0129,38.5663,61.4337,16.82,224,0.875,bicubic +regnety_006,21.9733,78.0267,38.9534,61.0466,6.06,224,0.875,bicubic +regnetx_008,21.9517,78.0483,38.9298,61.0702,7.26,224,0.875,bicubic resnet26d,21.9144,78.0856,38.6174,61.3826,16.01,224,0.875,bicubic semnasnet_100,21.8967,78.1033,38.6036,61.3964,3.89,224,0.875,bicubic +regnetx_006,21.7434,78.2566,38.9043,61.0957,6.2,224,0.875,bicubic gluon_resnet18_v1b,21.5449,78.4551,38.8728,61.1272,11.69,224,0.875,bicubic fbnetc_100,21.4919,78.5081,38.1654,61.8346,5.57,224,0.875,bilinear mnasnet_100,21.3504,78.6496,37.7154,62.2846,4.38,224,0.875,bicubic -resnet26,21.2954,78.7046,38.0161,61.9839,16,224,0.875,bicubic +resnet26,21.2954,78.7046,38.0161,61.9839,16.0,224,0.875,bicubic ssl_resnet18,21.2777,78.7223,39.1145,60.8855,11.69,224,0.875,bilinear mixnet_s,21.258,78.742,38.1929,61.8071,4.13,224,0.875,bicubic seresnext26d_32x4d,21.2541,78.7459,37.2851,62.7149,16.81,224,0.875,bicubic seresnext26_32x4d,21.093,78.907,37.6388,62.3612,16.79,224,0.875,bicubic +regnetx_004,20.8866,79.1134,37.5484,62.4516,5.16,224,0.875,bicubic spnasnet_100,20.867,79.133,37.8923,62.1077,4.42,224,0.875,bilinear seresnet18,20.8395,79.1605,37.6447,62.3553,11.78,224,0.875,bicubic mobilenetv2_100,20.7609,79.2391,37.7508,62.2492,3.5,224,0.875,bicubic tf_mixnet_s,20.4779,79.5221,36.6268,63.3732,4.13,224,0.875,bicubic +regnety_004,20.417,79.583,37.0296,62.9704,4.34,224,0.875,bicubic tf_mobilenetv3_large_075,20.3718,79.6282,36.7702,63.2298,3.99,224,0.875,bilinear hrnet_w18_small,20.3659,79.6341,37.0945,62.9055,13.19,224,0.875,bilinear resnet18,20.2283,79.7717,37.2595,62.7405,11.69,224,0.875,bilinear tf_mobilenetv3_large_minimal_100,20.1163,79.8837,36.9038,63.0962,3.92,224,0.875,bilinear +regnety_002,17.4596,82.5404,32.4432,67.5568,3.16,224,0.875,bicubic +regnetx_002,16.9506,83.0494,32.2349,67.7651,2.68,224,0.875,bicubic dla60x_c,16.3257,83.6743,31.775,68.225,1.34,224,0.875,bilinear tf_mobilenetv3_small_100,16.2334,83.7666,31.2229,68.7771,2.54,224,0.875,bilinear tf_mobilenetv3_small_075,14.9404,85.0596,29.5722,70.4278,2.04,224,0.875,bilinear From 0aca08384f33e773195971d00e6029efe85db3ed Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 9 Jun 2020 14:37:45 -0700 Subject: [PATCH 15/19] Update regnet cfg keys to match model names so registry works properly --- timm/models/regnet.py | 144 +++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/timm/models/regnet.py b/timm/models/regnet.py index 65ba2cc6..c8961bc2 100644 --- a/timm/models/regnet.py +++ b/timm/models/regnet.py @@ -31,30 +31,30 @@ def _mcfg(**kwargs): # Model FLOPS = three trailing digits * 10^8 model_cfgs = dict( - x_002=_mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13), - x_004=_mcfg(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22), - x_006=_mcfg(w0=48, wa=36.97, wm=2.24, group_w=24, depth=16), - x_008=_mcfg(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16), - x_016=_mcfg(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18), - x_032=_mcfg(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25), - x_040=_mcfg(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23), - x_064=_mcfg(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17), - x_080=_mcfg(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23), - x_120=_mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19), - x_160=_mcfg(w0=216, wa=55.59, wm=2.1, group_w=128, depth=22), - x_320=_mcfg(w0=320, wa=69.86, wm=2.0, group_w=168, depth=23), - y_002=_mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13, se_ratio=0.25), - y_004=_mcfg(w0=48, wa=27.89, wm=2.09, group_w=8, depth=16, se_ratio=0.25), - y_006=_mcfg(w0=48, wa=32.54, wm=2.32, group_w=16, depth=15, se_ratio=0.25), - y_008=_mcfg(w0=56, wa=38.84, wm=2.4, group_w=16, depth=14, se_ratio=0.25), - y_016=_mcfg(w0=48, wa=20.71, wm=2.65, group_w=24, depth=27, se_ratio=0.25), - y_032=_mcfg(w0=80, wa=42.63, wm=2.66, group_w=24, depth=21, se_ratio=0.25), - y_040=_mcfg(w0=96, wa=31.41, wm=2.24, group_w=64, depth=22, se_ratio=0.25), - y_064=_mcfg(w0=112, wa=33.22, wm=2.27, group_w=72, depth=25, se_ratio=0.25), - y_080=_mcfg(w0=192, wa=76.82, wm=2.19, group_w=56, depth=17, se_ratio=0.25), - y_120=_mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, se_ratio=0.25), - y_160=_mcfg(w0=200, wa=106.23, wm=2.48, group_w=112, depth=18, se_ratio=0.25), - y_320=_mcfg(w0=232, wa=115.89, wm=2.53, group_w=232, depth=20, se_ratio=0.25), + regnetx_002=_mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13), + regnetx_004=_mcfg(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22), + regnetx_006=_mcfg(w0=48, wa=36.97, wm=2.24, group_w=24, depth=16), + regnetx_008=_mcfg(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16), + regnetx_016=_mcfg(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18), + regnetx_032=_mcfg(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25), + regnetx_040=_mcfg(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23), + regnetx_064=_mcfg(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17), + regnetx_080=_mcfg(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23), + regnetx_120=_mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19), + regnetx_160=_mcfg(w0=216, wa=55.59, wm=2.1, group_w=128, depth=22), + regnetx_320=_mcfg(w0=320, wa=69.86, wm=2.0, group_w=168, depth=23), + regnety_002=_mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13, se_ratio=0.25), + regnety_004=_mcfg(w0=48, wa=27.89, wm=2.09, group_w=8, depth=16, se_ratio=0.25), + regnety_006=_mcfg(w0=48, wa=32.54, wm=2.32, group_w=16, depth=15, se_ratio=0.25), + regnety_008=_mcfg(w0=56, wa=38.84, wm=2.4, group_w=16, depth=14, se_ratio=0.25), + regnety_016=_mcfg(w0=48, wa=20.71, wm=2.65, group_w=24, depth=27, se_ratio=0.25), + regnety_032=_mcfg(w0=80, wa=42.63, wm=2.66, group_w=24, depth=21, se_ratio=0.25), + regnety_040=_mcfg(w0=96, wa=31.41, wm=2.24, group_w=64, depth=22, se_ratio=0.25), + regnety_064=_mcfg(w0=112, wa=33.22, wm=2.27, group_w=72, depth=25, se_ratio=0.25), + regnety_080=_mcfg(w0=192, wa=76.82, wm=2.19, group_w=56, depth=17, se_ratio=0.25), + regnety_120=_mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, se_ratio=0.25), + regnety_160=_mcfg(w0=200, wa=106.23, wm=2.48, group_w=112, depth=18, se_ratio=0.25), + regnety_320=_mcfg(w0=232, wa=115.89, wm=2.53, group_w=232, depth=20, se_ratio=0.25), ) @@ -68,30 +68,30 @@ def _cfg(url=''): default_cfgs = dict( - x_002=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth'), - x_004=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth'), - x_006=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth'), - x_008=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth'), - x_016=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth'), - x_032=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth'), - x_040=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth'), - x_064=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth'), - x_080=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth'), - x_120=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth'), - x_160=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth'), - x_320=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth'), - y_002=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth'), - y_004=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth'), - y_006=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth'), - y_008=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth'), - y_016=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth'), - y_032=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_032-62b47782.pth'), - y_040=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth'), - y_064=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth'), - y_080=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth'), - y_120=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth'), - y_160=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth'), - y_320=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth'), + regnetx_002=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth'), + regnetx_004=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth'), + regnetx_006=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth'), + regnetx_008=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth'), + regnetx_016=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth'), + regnetx_032=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth'), + regnetx_040=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth'), + regnetx_064=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth'), + regnetx_080=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth'), + regnetx_120=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth'), + regnetx_160=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth'), + regnetx_320=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth'), + regnety_002=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth'), + regnety_004=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth'), + regnety_006=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth'), + regnety_008=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth'), + regnety_016=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth'), + regnety_032=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_032-62b47782.pth'), + regnety_040=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth'), + regnety_064=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth'), + regnety_080=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth'), + regnety_120=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth'), + regnety_160=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth'), + regnety_320=_cfg(url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth'), ) @@ -344,142 +344,142 @@ def _regnet(variant, pretrained, **kwargs): @register_model def regnetx_002(pretrained=False, **kwargs): """RegNetX-200MF""" - return _regnet('x_002', pretrained, **kwargs) + return _regnet('regnetx_002', pretrained, **kwargs) @register_model def regnetx_004(pretrained=False, **kwargs): """RegNetX-400MF""" - return _regnet('x_004', pretrained, **kwargs) + return _regnet('regnetx_004', pretrained, **kwargs) @register_model def regnetx_006(pretrained=False, **kwargs): """RegNetX-600MF""" - return _regnet('x_006', pretrained, **kwargs) + return _regnet('regnetx_006', pretrained, **kwargs) @register_model def regnetx_008(pretrained=False, **kwargs): """RegNetX-800MF""" - return _regnet('x_008', pretrained, **kwargs) + return _regnet('regnetx_008', pretrained, **kwargs) @register_model def regnetx_016(pretrained=False, **kwargs): """RegNetX-1.6GF""" - return _regnet('x_016', pretrained, **kwargs) + return _regnet('regnetx_016', pretrained, **kwargs) @register_model def regnetx_032(pretrained=False, **kwargs): """RegNetX-3.2GF""" - return _regnet('x_032', pretrained, **kwargs) + return _regnet('regnetx_032', pretrained, **kwargs) @register_model def regnetx_040(pretrained=False, **kwargs): """RegNetX-4.0GF""" - return _regnet('x_040', pretrained, **kwargs) + return _regnet('regnetx_040', pretrained, **kwargs) @register_model def regnetx_064(pretrained=False, **kwargs): """RegNetX-6.4GF""" - return _regnet('x_064', pretrained, **kwargs) + return _regnet('regnetx_064', pretrained, **kwargs) @register_model def regnetx_080(pretrained=False, **kwargs): """RegNetX-8.0GF""" - return _regnet('x_080', pretrained, **kwargs) + return _regnet('regnetx_080', pretrained, **kwargs) @register_model def regnetx_120(pretrained=False, **kwargs): """RegNetX-12GF""" - return _regnet('x_120', pretrained, **kwargs) + return _regnet('regnetx_120', pretrained, **kwargs) @register_model def regnetx_160(pretrained=False, **kwargs): """RegNetX-16GF""" - return _regnet('x_160', pretrained, **kwargs) + return _regnet('regnetx_160', pretrained, **kwargs) @register_model def regnetx_320(pretrained=False, **kwargs): """RegNetX-32GF""" - return _regnet('x_320', pretrained, **kwargs) + return _regnet('regnetx_320', pretrained, **kwargs) @register_model def regnety_002(pretrained=False, **kwargs): """RegNetY-200MF""" - return _regnet('y_002', pretrained, **kwargs) + return _regnet('regnety_002', pretrained, **kwargs) @register_model def regnety_004(pretrained=False, **kwargs): """RegNetY-400MF""" - return _regnet('y_004', pretrained, **kwargs) + return _regnet('regnety_004', pretrained, **kwargs) @register_model def regnety_006(pretrained=False, **kwargs): """RegNetY-600MF""" - return _regnet('y_006', pretrained, **kwargs) + return _regnet('regnety_006', pretrained, **kwargs) @register_model def regnety_008(pretrained=False, **kwargs): """RegNetY-800MF""" - return _regnet('y_008', pretrained, **kwargs) + return _regnet('regnety_008', pretrained, **kwargs) @register_model def regnety_016(pretrained=False, **kwargs): """RegNetY-1.6GF""" - return _regnet('y_016', pretrained, **kwargs) + return _regnet('regnety_016', pretrained, **kwargs) @register_model def regnety_032(pretrained=False, **kwargs): """RegNetY-3.2GF""" - return _regnet('y_032', pretrained, **kwargs) + return _regnet('regnety_032', pretrained, **kwargs) @register_model def regnety_040(pretrained=False, **kwargs): """RegNetY-4.0GF""" - return _regnet('y_040', pretrained, **kwargs) + return _regnet('regnety_040', pretrained, **kwargs) @register_model def regnety_064(pretrained=False, **kwargs): """RegNetY-6.4GF""" - return _regnet('y_064', pretrained, **kwargs) + return _regnet('regnety_064', pretrained, **kwargs) @register_model def regnety_080(pretrained=False, **kwargs): """RegNetY-8.0GF""" - return _regnet('y_080', pretrained, **kwargs) + return _regnet('regnety_080', pretrained, **kwargs) @register_model def regnety_120(pretrained=False, **kwargs): """RegNetY-12GF""" - return _regnet('y_120', pretrained, **kwargs) + return _regnet('regnety_120', pretrained, **kwargs) @register_model def regnety_160(pretrained=False, **kwargs): """RegNetY-16GF""" - return _regnet('y_160', pretrained, **kwargs) + return _regnet('regnety_160', pretrained, **kwargs) @register_model def regnety_320(pretrained=False, **kwargs): """RegNetY-32GF""" - return _regnet('y_320', pretrained, **kwargs) + return _regnet('regnety_320', pretrained, **kwargs) From d3ee3de96a21a96fff0e2f3c3a93b6c3b12306bc Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 11 Jun 2020 13:34:21 -0700 Subject: [PATCH 16/19] Update validation script first batch prime and clear cuda cache between multi-model runs --- validate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/validate.py b/validate.py index 50010cce..ebd4d849 100755 --- a/validate.py +++ b/validate.py @@ -145,7 +145,8 @@ def validate(args): model.eval() with torch.no_grad(): # warmup, reduce variability of first batch time, especially for comparing torchscript vs non - model(torch.randn((args.batch_size,) + data_config['input_size']).cuda()) + input = torch.randn((args.batch_size,) + data_config['input_size']).cuda() + model(input) end = time.time() for i, (input, target) in enumerate(loader): if args.no_prefetcher: @@ -238,6 +239,7 @@ def main(): raise e batch_size = max(batch_size // 2, args.num_gpu) print("Validation failed, reducing batch size by 50%") + torch.cuda.empty_cache() result.update(r) if args.checkpoint: result['checkpoint'] = args.checkpoint From 6c7932fe75e541112c065d5ef603525bb0572a9a Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 11 Jun 2020 14:34:25 -0700 Subject: [PATCH 17/19] Update sotabench.py, tweak VovNet cfg --- sotabench.py | 34 ++++++++++++++++ timm/models/vovnet.py | 95 ++++++++++++++++++++++--------------------- 2 files changed, 83 insertions(+), 46 deletions(-) diff --git a/sotabench.py b/sotabench.py index c394d062..1d7a0590 100644 --- a/sotabench.py +++ b/sotabench.py @@ -135,6 +135,12 @@ model_list = [ _entry('resnetblur50', 'ResNet-Blur-50', '1904.11486'), + _entry('densenet121', 'DenseNet-121', '1608.06993'), + _entry('densenetblur121d', 'DenseNet-Blur-121D', '1904.11486', + model_desc='DenseNet with blur pooling and deep stem'), + + _entry('ese_vovnet39b', 'VoVNet-39-V2', '1911.06667'), + _entry('tf_efficientnet_b0', 'EfficientNet-B0 (AutoAugment)', '1905.11946', model_desc='Ported from official Google AI Tensorflow weights'), _entry('tf_efficientnet_b1', 'EfficientNet-B1 (AutoAugment)', '1905.11946', @@ -389,6 +395,34 @@ model_list = [ model_desc='Originally from https://github.com/mehtadushy/SelecSLS-Pytorch'), _entry('selecsls60b', 'SelecSLS-60_B', '1907.00837', model_desc='Originally from https://github.com/mehtadushy/SelecSLS-Pytorch'), + + ## RegNet official impl weighs + _entry('regnetx_002', 'RegNetX-200MF', '2003.13678'), + _entry('regnetx_004', 'RegNetX-400MF', '2003.13678'), + _entry('regnetx_006', 'RegNetX-600MF', '2003.13678'), + _entry('regnetx_008', 'RegNetX-800MF', '2003.13678'), + _entry('regnetx_016', 'RegNetX-1.6GF', '2003.13678'), + _entry('regnetx_032', 'RegNetX-3.2GF', '2003.13678'), + _entry('regnetx_040', 'RegNetX-4.0GF', '2003.13678'), + _entry('regnetx_064', 'RegNetX-6.4GF', '2003.13678'), + _entry('regnetx_080', 'RegNetX-8.0GF', '2003.13678'), + _entry('regnetx_120', 'RegNetX-12GF', '2003.13678'), + _entry('regnetx_160', 'RegNetX-16GF', '2003.13678'), + _entry('regnetx_320', 'RegNetX-32GF', '2003.13678', batch_size=BATCH_SIZE // 2), + + _entry('regnety_002', 'RegNetY-200MF', '2003.13678'), + _entry('regnety_004', 'RegNetY-400MF', '2003.13678'), + _entry('regnety_006', 'RegNetY-600MF', '2003.13678'), + _entry('regnety_008', 'RegNetY-800MF', '2003.13678'), + _entry('regnety_016', 'RegNetY-1.6GF', '2003.13678'), + _entry('regnety_032', 'RegNetY-3.2GF', '2003.13678'), + _entry('regnety_040', 'RegNetY-4.0GF', '2003.13678'), + _entry('regnety_064', 'RegNetY-6.4GF', '2003.13678'), + _entry('regnety_080', 'RegNetY-8.0GF', '2003.13678'), + _entry('regnety_120', 'RegNetY-12GF', '2003.13678'), + _entry('regnety_160', 'RegNetY-16GF', '2003.13678'), + _entry('regnety_320', 'RegNetY-32GF', '2003.13678', batch_size=BATCH_SIZE // 2), + ] for m in model_list: diff --git a/timm/models/vovnet.py b/timm/models/vovnet.py index 70dbac12..efeda955 100644 --- a/timm/models/vovnet.py +++ b/timm/models/vovnet.py @@ -28,9 +28,9 @@ from .layers import ConvBnAct, SeparableConvBnAct, BatchNormAct2d, SelectAdaptiv # https://github.com/stigma0617/VoVNet.pytorch/blob/master/models_vovnet/vovnet.py model_cfgs = dict( vovnet39a=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 128], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=5, block_per_stage=[1, 1, 2, 2], residual=False, @@ -38,9 +38,9 @@ model_cfgs = dict( attn='', ), vovnet57a=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 128], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=5, block_per_stage=[1, 1, 4, 3], residual=False, @@ -49,9 +49,9 @@ model_cfgs = dict( ), ese_vovnet19b_slim_dw=dict( - stem_ch=[64, 64, 64], - stage_conv_ch=[64, 80, 96, 112], - stage_out_ch=[112, 256, 384, 512], + stem_chs=[64, 64, 64], + stage_conv_chs=[64, 80, 96, 112], + stage_out_chs=[112, 256, 384, 512], layer_per_block=3, block_per_stage=[1, 1, 1, 1], residual=True, @@ -60,9 +60,9 @@ model_cfgs = dict( ), ese_vovnet19b_dw=dict( - stem_ch=[64, 64, 64], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 64], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=3, block_per_stage=[1, 1, 1, 1], residual=True, @@ -70,9 +70,9 @@ model_cfgs = dict( attn='ese', ), ese_vovnet19b_slim=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[64, 80, 96, 112], - stage_out_ch=[112, 256, 384, 512], + stem_chs=[64, 64, 128], + stage_conv_chs=[64, 80, 96, 112], + stage_out_chs=[112, 256, 384, 512], layer_per_block=3, block_per_stage=[1, 1, 1, 1], residual=True, @@ -80,9 +80,9 @@ model_cfgs = dict( attn='ese', ), ese_vovnet19b=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 128], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=3, block_per_stage=[1, 1, 1, 1], residual=True, @@ -91,9 +91,9 @@ model_cfgs = dict( ), ese_vovnet39b=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 128], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=5, block_per_stage=[1, 1, 2, 2], residual=True, @@ -101,9 +101,9 @@ model_cfgs = dict( attn='ese', ), ese_vovnet57b=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 128], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=5, block_per_stage=[1, 1, 4, 3], residual=True, @@ -112,9 +112,9 @@ model_cfgs = dict( ), ese_vovnet99b=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 128], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=5, block_per_stage=[1, 3, 9, 3], residual=True, @@ -122,9 +122,9 @@ model_cfgs = dict( attn='ese', ), eca_vovnet39b=dict( - stem_ch=[64, 64, 128], - stage_conv_ch=[128, 160, 192, 224], - stage_out_ch=[256, 512, 768, 1024], + stem_chs=[64, 64, 128], + stage_conv_chs=[128, 160, 192, 224], + stage_out_chs=[256, 512, 768, 1024], layer_per_block=5, block_per_stage=[1, 1, 2, 2], residual=True, @@ -132,6 +132,8 @@ model_cfgs = dict( attn='eca', ), ) +model_cfgs['ese_vovnet39b_evos'] = model_cfgs['ese_vovnet39b'] +model_cfgs['ese_vovnet99b_iabn'] = model_cfgs['ese_vovnet99b'] def _cfg(url=''): @@ -154,6 +156,8 @@ default_cfgs = dict( ese_vovnet57b=_cfg(url=''), ese_vovnet99b=_cfg(url=''), eca_vovnet39b=_cfg(url=''), + ese_vovnet39b_evos=_cfg(url=''), + eee_vovnet99b_iabn=_cfg(url=''), ) @@ -277,9 +281,9 @@ class VovNet(nn.Module): self.drop_rate = drop_rate assert stem_stride in (4, 2) - stem_ch = cfg["stem_ch"] - stage_conv_ch = cfg["stage_conv_ch"] - stage_out_ch = cfg["stage_out_ch"] + stem_chs = cfg["stem_chs"] + stage_conv_chs = cfg["stage_conv_chs"] + stage_out_chs = cfg["stage_out_chs"] block_per_stage = cfg["block_per_stage"] layer_per_block = cfg["layer_per_block"] @@ -287,23 +291,23 @@ class VovNet(nn.Module): last_stem_stride = stem_stride // 2 conv_type = SeparableConvBnAct if cfg["depthwise"] else ConvBnAct self.stem = nn.Sequential(*[ - ConvBnAct(in_chans, stem_ch[0], 3, stride=2, norm_layer=norm_layer), - conv_type(stem_ch[0], stem_ch[1], 3, stride=1, norm_layer=norm_layer), - conv_type(stem_ch[1], stem_ch[2], 3, stride=last_stem_stride, norm_layer=norm_layer), + ConvBnAct(in_chans, stem_chs[0], 3, stride=2, norm_layer=norm_layer), + conv_type(stem_chs[0], stem_chs[1], 3, stride=1, norm_layer=norm_layer), + conv_type(stem_chs[1], stem_chs[2], 3, stride=last_stem_stride, norm_layer=norm_layer), ]) # OSA stages - in_ch_list = stem_ch[-1:] + stage_out_ch[:-1] + in_ch_list = stem_chs[-1:] + stage_out_chs[:-1] stage_args = dict( residual=cfg["residual"], depthwise=cfg["depthwise"], attn=cfg["attn"], norm_layer=norm_layer) stages = [] for i in range(4): # num_stages downsample = stem_stride == 2 or i > 0 # first stage has no stride/downsample if stem_stride is 4 stages += [OsaStage( - in_ch_list[i], stage_conv_ch[i], stage_out_ch[i], block_per_stage[i], layer_per_block, + in_ch_list[i], stage_conv_chs[i], stage_out_chs[i], block_per_stage[i], layer_per_block, downsample=downsample, **stage_args) ] - self.num_features = stage_out_ch[i] + self.num_features = stage_out_chs[i] self.stages = nn.Sequential(*stages) self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=drop_rate) @@ -398,14 +402,13 @@ def eca_vovnet39b(pretrained=False, **kwargs): # Experimental Models -@register_model -def ese_vovnet39b_iabn(pretrained=False, **kwargs): - norm_layer = get_norm_act_layer('iabn') - return _vovnet('ese_vovnet39b', pretrained=pretrained, norm_layer=norm_layer, **kwargs) - - @register_model def ese_vovnet39b_evos(pretrained=False, **kwargs): def norm_act_fn(num_features, **kwargs): return create_norm_act('EvoNormSample', num_features, jit=False, **kwargs) - return _vovnet('ese_vovnet39b', pretrained=pretrained, norm_layer=norm_act_fn, **kwargs) + return _vovnet('ese_vovnet39b_evos', pretrained=pretrained, norm_layer=norm_act_fn, **kwargs) + +@register_model +def ese_vovnet99b_iabn(pretrained=False, **kwargs): + norm_layer = get_norm_act_layer('iabn') + return _vovnet('ese_vovnet99b_iabn', pretrained=pretrained, norm_layer=norm_layer, **kwargs) From 151679c2f16a81ee2417976e77c1d5f58026c0aa Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 11 Jun 2020 14:49:23 -0700 Subject: [PATCH 18/19] Add custom grad tests, fix cut & paste error with hard_mish ME, add a few more pytorch act fns to factory --- tests/test_layers.py | 71 ++++++++++++++++++++++++++++ timm/models/layers/activations_me.py | 4 +- timm/models/layers/create_act.py | 11 +++++ 3 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 tests/test_layers.py diff --git a/tests/test_layers.py b/tests/test_layers.py new file mode 100644 index 00000000..714cb444 --- /dev/null +++ b/tests/test_layers.py @@ -0,0 +1,71 @@ +import pytest +import torch +import torch.nn as nn +import platform +import os + +from timm.models.layers import create_act_layer, get_act_layer, set_layer_config + + +class MLP(nn.Module): + def __init__(self, act_layer="relu"): + super(MLP, self).__init__() + self.fc1 = nn.Linear(1000, 100) + self.act = create_act_layer(act_layer, inplace=True) + self.fc2 = nn.Linear(100, 10) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.fc2(x) + return x + + +def _run_act_layer_grad(act_type): + x = torch.rand(10, 1000) * 10 + m = MLP(act_layer=act_type) + + def _run(x, act_layer=''): + if act_layer: + # replace act layer if set + m.act = create_act_layer(act_layer, inplace=True) + out = m(x) + l = (out - 0).pow(2).sum() + return l + + out_me = _run(x) + + with set_layer_config(scriptable=True): + out_jit = _run(x, act_type) + + assert torch.isclose(out_jit, out_me) + + with set_layer_config(no_jit=True): + out_basic = _run(x, act_type) + + assert torch.isclose(out_basic, out_jit) + + +def test_swish_grad(): + for _ in range(100): + _run_act_layer_grad('swish') + + +def test_mish_grad(): + for _ in range(100): + _run_act_layer_grad('mish') + + +def test_hard_sigmoid_grad(): + for _ in range(100): + _run_act_layer_grad('hard_sigmoid') + + +def test_hard_swish_grad(): + for _ in range(100): + _run_act_layer_grad('hard_swish') + + +def test_hard_mish_grad(): + for _ in range(100): + _run_act_layer_grad('hard_mish') diff --git a/timm/models/layers/activations_me.py b/timm/models/layers/activations_me.py index 9c492f1e..b81f7165 100644 --- a/timm/models/layers/activations_me.py +++ b/timm/models/layers/activations_me.py @@ -185,12 +185,12 @@ class HardMishJitAutoFn(torch.autograd.Function): @staticmethod def forward(ctx, x): ctx.save_for_backward(x) - return mish_jit_fwd(x) + return hard_mish_jit_fwd(x) @staticmethod def backward(ctx, grad_output): x = ctx.saved_tensors[0] - return mish_jit_bwd(x, grad_output) + return hard_mish_jit_bwd(x, grad_output) def hard_mish_me(x, inplace: bool = False): diff --git a/timm/models/layers/create_act.py b/timm/models/layers/create_act.py index 66ab1e84..6404d62f 100644 --- a/timm/models/layers/create_act.py +++ b/timm/models/layers/create_act.py @@ -9,6 +9,12 @@ _ACT_FN_DEFAULT = dict( mish=mish, relu=F.relu, relu6=F.relu6, + leaky_relu=F.leaky_relu, + elu=F.elu, + prelu=F.prelu, + celu=F.celu, + selu=F.selu, + gelu=F.gelu, sigmoid=sigmoid, tanh=tanh, hard_sigmoid=hard_sigmoid, @@ -37,6 +43,11 @@ _ACT_LAYER_DEFAULT = dict( mish=Mish, relu=nn.ReLU, relu6=nn.ReLU6, + elu=nn.ELU, + prelu=nn.PReLU, + celu=nn.CELU, + selu=nn.SELU, + gelu=nn.GELU, sigmoid=Sigmoid, tanh=Tanh, hard_sigmoid=HardSigmoid, From 39f27c1add8ad39d6d91c7b9a01282c3faf5a6c4 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 11 Jun 2020 15:41:58 -0700 Subject: [PATCH 19/19] Almost ready to merge, vovnet typo, version bump, readme addition --- README.md | 13 +++++++++++++ timm/models/vovnet.py | 2 +- timm/version.py | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3cd5c223..7ebe9341 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,19 @@ ## What's New +### June 11, 2020 +Bunch of changes: +* DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions +* VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1 +* Activation factory added along with new activations: + * select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export) + * hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish + * context mgr for setting exportable/scriptable/no_jit states +* Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface +* Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this +* Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call +* Prep for 0.1.28 pip release + ### May 12, 2020 * Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt, paper https://arxiv.org/abs/2004.08955)) diff --git a/timm/models/vovnet.py b/timm/models/vovnet.py index efeda955..94e0e2e8 100644 --- a/timm/models/vovnet.py +++ b/timm/models/vovnet.py @@ -157,7 +157,7 @@ default_cfgs = dict( ese_vovnet99b=_cfg(url=''), eca_vovnet39b=_cfg(url=''), ese_vovnet39b_evos=_cfg(url=''), - eee_vovnet99b_iabn=_cfg(url=''), + ese_vovnet99b_iabn=_cfg(url=''), ) diff --git a/timm/version.py b/timm/version.py index 641e1a8b..13b519b0 100644 --- a/timm/version.py +++ b/timm/version.py @@ -1 +1 @@ -__version__ = '0.1.26' +__version__ = '0.1.28'