From b87d98b2384aa3e6e522f04a052e94bb23322c13 Mon Sep 17 00:00:00 2001 From: Mike <60082702+lmk123568@users.noreply.github.com> Date: Sun, 6 Jun 2021 17:58:31 +0800 Subject: [PATCH 01/31] Update convit.py Cut out the duplicates --- timm/models/convit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/timm/models/convit.py b/timm/models/convit.py index b15b46d8..60ba59fc 100644 --- a/timm/models/convit.py +++ b/timm/models/convit.py @@ -64,7 +64,6 @@ class GPSA(nn.Module): self.dim = dim head_dim = dim // num_heads self.scale = qk_scale or head_dim ** -0.5 - self.locality_strength = locality_strength self.qk = nn.Linear(dim, dim * 2, bias=qkv_bias) self.v = nn.Linear(dim, dim, bias=qkv_bias) From ded167148336f8a3cc0059f6d26779255013b7f0 Mon Sep 17 00:00:00 2001 From: Dongyoon Han Date: Mon, 7 Jun 2021 23:08:55 +0900 Subject: [PATCH 02/31] Fix stochastic depth working only with a shortcut --- timm/models/rexnet.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/timm/models/rexnet.py b/timm/models/rexnet.py index 7ab8d659..462ad8fe 100644 --- a/timm/models/rexnet.py +++ b/timm/models/rexnet.py @@ -89,10 +89,11 @@ class LinearBottleneck(nn.Module): x = self.se(x) x = self.act_dw(x) x = self.conv_pwl(x) - if self.drop_path is not None: - x = self.drop_path(x) if self.use_shortcut: - x[:, 0:self.in_channels] += shortcut + if self.drop_path is not None: + x = self.drop_path(x) + + x[:, 0:self.in_channels] += shortcut return x From 2a63d0246b87cdab3ccdece645b6bd2aa102221c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 7 Jun 2021 14:38:30 -0700 Subject: [PATCH 03/31] Post merge cleanup --- timm/models/convit.py | 2 +- timm/models/rexnet.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/timm/models/convit.py b/timm/models/convit.py index 60ba59fc..695c7c4f 100644 --- a/timm/models/convit.py +++ b/timm/models/convit.py @@ -64,6 +64,7 @@ class GPSA(nn.Module): self.dim = dim head_dim = dim // num_heads self.scale = qk_scale or head_dim ** -0.5 + self.locality_strength = locality_strength self.qk = nn.Linear(dim, dim * 2, bias=qkv_bias) self.v = nn.Linear(dim, dim, bias=qkv_bias) @@ -72,7 +73,6 @@ class GPSA(nn.Module): self.proj = nn.Linear(dim, dim) self.pos_proj = nn.Linear(3, num_heads) self.proj_drop = nn.Dropout(proj_drop) - self.locality_strength = locality_strength self.gating_param = nn.Parameter(torch.ones(self.num_heads)) self.rel_indices: torch.Tensor = torch.zeros(1, 1, 1, 3) # silly torchscript hack, won't work with None diff --git a/timm/models/rexnet.py b/timm/models/rexnet.py index 462ad8fe..279780be 100644 --- a/timm/models/rexnet.py +++ b/timm/models/rexnet.py @@ -92,8 +92,7 @@ class LinearBottleneck(nn.Module): if self.use_shortcut: if self.drop_path is not None: x = self.drop_path(x) - - x[:, 0:self.in_channels] += shortcut + x[:, 0:self.in_channels] += shortcut return x From 8e4ac3549f65eefa6b094cd04876b19ed3ca7506 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 7 Jun 2021 17:14:19 -0700 Subject: [PATCH 04/31] All ScaledStdConv and StdConv uses default to using F.layernorm so that they work with PyTorch XLA. eps value tweaking is a WIP. --- timm/models/layers/std_conv.py | 52 +++++++++++++----------- timm/models/nfnet.py | 7 +++- timm/models/vision_transformer_hybrid.py | 2 +- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/timm/models/layers/std_conv.py b/timm/models/layers/std_conv.py index b0cb1eeb..a1afc653 100644 --- a/timm/models/layers/std_conv.py +++ b/timm/models/layers/std_conv.py @@ -19,17 +19,22 @@ class StdConv2d(nn.Conv2d): """ def __init__( self, in_channel, out_channels, kernel_size, stride=1, padding=None, dilation=1, - groups=1, bias=False, eps=1e-5): + groups=1, bias=False, eps=1e-5, use_layernorm=True): if padding is None: padding = get_padding(kernel_size, stride, dilation) super().__init__( in_channel, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.eps = eps + self.use_layernorm = use_layernorm def get_weight(self): - std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = (self.weight - mean) / (std + self.eps) + if self.use_layernorm: + # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op + weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) + else: + std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) + weight = (self.weight - mean) / (std + self.eps) return weight def forward(self, x): @@ -45,17 +50,22 @@ class StdConv2dSame(nn.Conv2d): """ def __init__( self, in_channel, out_channels, kernel_size, stride=1, padding='SAME', dilation=1, - groups=1, bias=False, eps=1e-5): + groups=1, bias=False, eps=1e-5, use_layernorm=True): padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, dilation=dilation) super().__init__( in_channel, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.same_pad = is_dynamic self.eps = eps + self.use_layernorm = use_layernorm def get_weight(self): - std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = (self.weight - mean) / (std + self.eps) + if self.use_layernorm: + # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op + weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) + else: + std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) + weight = (self.weight - mean) / (std + self.eps) return weight def forward(self, x): @@ -76,7 +86,7 @@ class ScaledStdConv2d(nn.Conv2d): def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding=None, dilation=1, groups=1, - bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=False): + bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=True): if padding is None: padding = get_padding(kernel_size, stride, dilation) super().__init__( @@ -84,16 +94,17 @@ class ScaledStdConv2d(nn.Conv2d): groups=groups, bias=bias) self.gain = nn.Parameter(torch.full((self.out_channels, 1, 1, 1), gain_init)) self.scale = gamma * self.weight[0].numel() ** -0.5 # gamma * 1 / sqrt(fan-in) - self.eps = eps ** 2 if use_layernorm else eps + self.eps = eps self.use_layernorm = use_layernorm # experimental, slightly faster/less GPU memory to hijack LN kernel def get_weight(self): if self.use_layernorm: - weight = self.scale * F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) + # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op + weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) else: std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = self.scale * (self.weight - mean) / (std + self.eps) - return self.gain * weight + weight = (self.weight - mean) / (std + self.eps) + return weight.mul_(self.gain * self.scale) def forward(self, x): return F.conv2d(x, self.get_weight(), self.bias, self.stride, self.padding, self.dilation, self.groups) @@ -110,7 +121,7 @@ class ScaledStdConv2dSame(nn.Conv2d): def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding='SAME', dilation=1, groups=1, - bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=False): + bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=True): padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, dilation=dilation) super().__init__( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, @@ -118,24 +129,17 @@ class ScaledStdConv2dSame(nn.Conv2d): self.gain = nn.Parameter(torch.full((self.out_channels, 1, 1, 1), gain_init)) self.scale = gamma * self.weight[0].numel() ** -0.5 self.same_pad = is_dynamic - self.eps = eps ** 2 if use_layernorm else eps + self.eps = eps self.use_layernorm = use_layernorm # experimental, slightly faster/less GPU memory to hijack LN kernel - # NOTE an alternate formulation to consider, closer to DeepMind Haiku impl but doesn't seem - # to make much numerical difference (+/- .002 to .004) in top-1 during eval. - # def get_weight(self): - # var, mean = torch.var_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - # scale = torch.rsqrt((self.weight[0].numel() * var).clamp_(self.eps)) * self.gain - # weight = (self.weight - mean) * scale - # return self.gain * weight - def get_weight(self): if self.use_layernorm: - weight = self.scale * F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) + # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op + weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) else: std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = self.scale * (self.weight - mean) / (std + self.eps) - return self.gain * weight + weight = (self.weight - mean) / (std + self.eps) + return weight.mul_(self.gain * self.scale) def forward(self, x): if self.same_pad: diff --git a/timm/models/nfnet.py b/timm/models/nfnet.py index 593796a5..584495c3 100644 --- a/timm/models/nfnet.py +++ b/timm/models/nfnet.py @@ -166,6 +166,8 @@ class NfCfg: extra_conv: bool = False # extra 3x3 bottleneck convolution for NFNet models gamma_in_act: bool = False same_padding: bool = False + std_conv_eps: float = 1e-5 + std_conv_ln: bool = True # use layer-norm impl to normalize in std-conv, works in PyTorch XLA, slightly faster skipinit: bool = False # disabled by default, non-trivial performance impact zero_init_fc: bool = False act_layer: str = 'silu' @@ -482,10 +484,11 @@ class NormFreeNet(nn.Module): conv_layer = ScaledStdConv2dSame if cfg.same_padding else ScaledStdConv2d if cfg.gamma_in_act: act_layer = act_with_gamma(cfg.act_layer, gamma=_nonlin_gamma[cfg.act_layer]) - conv_layer = partial(conv_layer, eps=1e-4) # DM weights better with higher eps + conv_layer = partial(conv_layer, eps=cfg.std_conv_eps, use_layernorm=cfg.std_conv_ln) else: act_layer = get_act_layer(cfg.act_layer) - conv_layer = partial(conv_layer, gamma=_nonlin_gamma[cfg.act_layer]) + conv_layer = partial( + conv_layer, gamma=_nonlin_gamma[cfg.act_layer], eps=cfg.std_conv_eps, use_layernorm=cfg.std_conv_ln) attn_layer = partial(get_attn(cfg.attn_layer), **cfg.attn_kwargs) if cfg.attn_layer else None stem_chs = make_divisible((cfg.stem_chs or cfg.channels[0]) * cfg.width_factor, cfg.ch_div) diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index 9e5a62b2..a32ce019 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -118,7 +118,7 @@ def _resnetv2(layers=(3, 4, 9), **kwargs): padding_same = kwargs.get('padding_same', True) if padding_same: stem_type = 'same' - conv_layer = StdConv2dSame + conv_layer = partial(StdConv2dSame, eps=1e-5) else: stem_type = '' conv_layer = StdConv2d From 2f5ed2dec1e5b020bfd0c4271845e2288a223624 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 7 Jun 2021 17:15:04 -0700 Subject: [PATCH 05/31] Update `init_values` const for 24 and 36 layer ResMLP models --- timm/models/mlp_mixer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 5a6dce6f..6f53264a 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -422,7 +422,8 @@ def resmlp_24_224(pretrained=False, **kwargs): Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 """ model_args = dict( - patch_size=16, num_blocks=24, hidden_dim=384, mlp_ratio=4, block_layer=ResBlock, norm_layer=Affine, **kwargs) + patch_size=16, num_blocks=24, hidden_dim=384, mlp_ratio=4, + block_layer=partial(ResBlock, init_values=1e-5), norm_layer=Affine, **kwargs) model = _create_mixer('resmlp_24_224', pretrained=pretrained, **model_args) return model @@ -433,7 +434,8 @@ def resmlp_36_224(pretrained=False, **kwargs): Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 """ model_args = dict( - patch_size=16, num_blocks=36, hidden_dim=384, mlp_ratio=4, block_layer=ResBlock, norm_layer=Affine, **kwargs) + patch_size=16, num_blocks=36, hidden_dim=384, mlp_ratio=4, + block_layer=partial(ResBlock, init_values=1e-5), norm_layer=Affine, **kwargs) model = _create_mixer('resmlp_36_224', pretrained=pretrained, **model_args) return model From 10d8fa46202dd28551d730369e4ea5c0532d10a2 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 8 Jun 2021 14:21:07 -0700 Subject: [PATCH 06/31] Add gc and bat attention resnext26ts variants to byob for test. --- timm/models/byobnet.py | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py index d41245f5..750a43de 100644 --- a/timm/models/byobnet.py +++ b/timm/models/byobnet.py @@ -98,6 +98,11 @@ default_cfgs = { first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), 'gcresnet50t': _cfg( first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), + + 'gcresnext26ts': _cfg( + first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), + 'bat_resnext26ts': _cfg( + first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), } @@ -307,6 +312,37 @@ model_cfgs = dict( stem_pool=None, attn_layer='gc' ), + + gcresnext26ts=ByoModelCfg( + blocks=( + ByoBlockCfg(type='bottle', d=3, c=256, s=1, gs=32, br=0.25), + ByoBlockCfg(type='bottle', d=4, c=512, s=2, gs=32, br=0.25), + ByoBlockCfg(type='bottle', d=6, c=1024, s=2, gs=32, br=0.25), + ByoBlockCfg(type='bottle', d=3, c=2048, s=2, gs=32, br=0.25), + ), + stem_chs=64, + stem_type='tiered', + stem_pool='maxpool', + num_features=0, + act_layer='silu', + attn_layer='gc', + ), + + bat_resnext26ts=ByoModelCfg( + blocks=( + ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25), + ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25), + ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25), + ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25), + ), + stem_chs=64, + stem_type='tiered', + stem_pool='maxpool', + num_features=0, + act_layer='silu', + attn_layer='bat', + attn_kwargs=dict(block_size=8) + ), ) @@ -426,6 +462,20 @@ def gcresnet50t(pretrained=False, **kwargs): return _create_byobnet('gcresnet50t', pretrained=pretrained, **kwargs) +@register_model +def gcresnext26ts(pretrained=False, **kwargs): + """ + """ + return _create_byobnet('gcresnext26ts', pretrained=pretrained, **kwargs) + + +@register_model +def bat_resnext26ts(pretrained=False, **kwargs): + """ + """ + return _create_byobnet('bat_resnext26ts', pretrained=pretrained, **kwargs) + + def expand_blocks_cfg(stage_blocks_cfg: Union[ByoBlockCfg, Sequence[ByoBlockCfg]]) -> List[ByoBlockCfg]: if not isinstance(stage_blocks_cfg, Sequence): stage_blocks_cfg = (stage_blocks_cfg,) From d413eef1bf6bced1aa34be959679da7d4ad9c648 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 8 Jun 2021 14:22:05 -0700 Subject: [PATCH 07/31] Add ResMLP-24 model weights that I trained in PyTorch XLA on TPU-VM. 79.2 top-1. --- timm/models/mlp_mixer.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 5a6dce6f..3a7eecd0 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -80,21 +80,24 @@ default_cfgs = dict( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_l16_224_in21k-846aa33c.pth', num_classes=21843 ), + # Mixer ImageNet-21K-P pretraining mixer_b16_224_miil_in21k=_cfg( - url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth', - mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221, + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth', + mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221, ), mixer_b16_224_miil=_cfg( - url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth', - mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth', + mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', ), gmixer_12_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), gmixer_24_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), resmlp_12_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), - resmlp_24_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + resmlp_24_224=_cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resmlp_24_224_raa-a8256759.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, crop_pct=0.89), resmlp_36_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), gmlp_ti16_224=_cfg(), From 758c4438a7a9ad9309ea930a52cf2fc95a2edb85 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 8 Jun 2021 15:19:11 -0700 Subject: [PATCH 08/31] Update README.md --- README.md | 17 +++---- docs/archived_changes.md | 12 +++++ docs/changes.md | 107 +++------------------------------------ 3 files changed, 24 insertions(+), 112 deletions(-) diff --git a/README.md b/README.md index 0b878a0a..704bc32c 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,12 @@ I'm fortunate to be able to dedicate significant time and money of my own suppor ## What's New +### June 8, 2021 +* Add first ResMLP weights, trained in PyTorch XLA on TPU-VM w/ my XLA branch. 24 block variant, 79.2 top-1. +* Add ResNet51-Q model w/ pretrained weights at 82.36 top-1. + * NFNet inspired block layout with quad layer stem and no maxpool + * Same param count (35.7M) and throughput as ResNetRS-50 but +1.5 top-1 @ 224x224 and +2.5 top-1 at 288x288 + ### May 25, 2021 * Add LeViT, Visformer, ConViT (PR by Aman Arora), Twins (PR by paper authors) transformer models * Add ResMLP and gMLP MLP vision models to the existing MLP Mixer impl @@ -163,17 +169,6 @@ I'm fortunate to be able to dedicate significant time and money of my own suppor * 320x320 val, 1.0 crop - 84.36 * Update [results files](results/) -### Dec 18, 2020 -* Add ResNet-101D, ResNet-152D, and ResNet-200D weights trained @ 256x256 - * 256x256 val, 0.94 crop (top-1) - 101D (82.33), 152D (83.08), 200D (83.25) - * 288x288 val, 1.0 crop - 101D (82.64), 152D (83.48), 200D (83.76) - * 320x320 val, 1.0 crop - 101D (83.00), 152D (83.66), 200D (84.01) - -### Dec 7, 2020 -* Simplify EMA module (ModelEmaV2), compatible with fully torchscripted models -* Misc fixes for SiLU ONNX export, default_cfg missing from Feature extraction models, Linear layer w/ AMP + torchscript -* PyPi release @ 0.3.2 (needed by EfficientDet) - ## Introduction diff --git a/docs/archived_changes.md b/docs/archived_changes.md index 56ee706f..f8d88fd7 100644 --- a/docs/archived_changes.md +++ b/docs/archived_changes.md @@ -1,5 +1,17 @@ # Archived Changes +### Dec 18, 2020 +* Add ResNet-101D, ResNet-152D, and ResNet-200D weights trained @ 256x256 + * 256x256 val, 0.94 crop (top-1) - 101D (82.33), 152D (83.08), 200D (83.25) + * 288x288 val, 1.0 crop - 101D (82.64), 152D (83.48), 200D (83.76) + * 320x320 val, 1.0 crop - 101D (83.00), 152D (83.66), 200D (84.01) + +### Dec 7, 2020 +* Simplify EMA module (ModelEmaV2), compatible with fully torchscripted models +* Misc fixes for SiLU ONNX export, default_cfg missing from Feature extraction models, Linear layer w/ AMP + torchscript +* PyPi release @ 0.3.2 (needed by EfficientDet) + + ### Oct 30, 2020 * Test with PyTorch 1.7 and fix a small top-n metric view vs reshape issue. * Convert newly added 224x224 Vision Transformer weights from official JAX repo. 81.8 top-1 for B/16, 83.1 L/16. diff --git a/docs/changes.md b/docs/changes.md index 9719dd65..6ff50756 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -1,5 +1,11 @@ # Recent Changes +### June 8, 2021 +* Add first ResMLP weights, trained in PyTorch XLA on TPU-VM w/ my XLA branch. 24 block variant, 79.2 top-1. +* Add ResNet51-Q model w/ pretrained weights at 82.36 top-1. + * NFNet inspired block layout with quad layer stem and no maxpool + * Same param count (35.7M) and throughput as ResNetRS-50 but +1.5 top-1 @ 224x224 and +2.5 top-1 at 288x288 + ### May 25, 2021 * Add LeViT, Visformer, Convit (PR by Aman Arora), Twins (PR by paper authors) transformer models * Cleanup input_size/img_size override handling and testing for all vision transformer models @@ -122,104 +128,3 @@ * 256x256 val, 0.94 crop top-1 - 83.75 * 320x320 val, 1.0 crop - 84.36 * Update results files - -### Dec 18, 2020 -* Add ResNet-101D, ResNet-152D, and ResNet-200D weights trained @ 256x256 - * 256x256 val, 0.94 crop (top-1) - 101D (82.33), 152D (83.08), 200D (83.25) - * 288x288 val, 1.0 crop - 101D (82.64), 152D (83.48), 200D (83.76) - * 320x320 val, 1.0 crop - 101D (83.00), 152D (83.66), 200D (84.01) - -### Dec 7, 2020 -* Simplify EMA module (ModelEmaV2), compatible with fully torchscripted models -* Misc fixes for SiLU ONNX export, default_cfg missing from Feature extraction models, Linear layer w/ AMP + torchscript -* PyPi release @ 0.3.2 (needed by EfficientDet) - -### Oct 30, 2020 -* Test with PyTorch 1.7 and fix a small top-n metric view vs reshape issue. -* Convert newly added 224x224 Vision Transformer weights from official JAX repo. 81.8 top-1 for B/16, 83.1 L/16. -* Support PyTorch 1.7 optimized, native SiLU (aka Swish) activation. Add mapping to 'silu' name, custom swish will eventually be deprecated. -* Fix regression for loading pretrained classifier via direct model entrypoint functions. Didn't impact create_model() factory usage. -* PyPi release @ 0.3.0 version! - -### Oct 26, 2020 -* Update Vision Transformer models to be compatible with official code release at https://github.com/google-research/vision_transformer -* Add Vision Transformer weights (ImageNet-21k pretrain) for 384x384 base and large models converted from official jax impl - * ViT-B/16 - 84.2 - * ViT-B/32 - 81.7 - * ViT-L/16 - 85.2 - * ViT-L/32 - 81.5 - -### Oct 21, 2020 -* Weights added for Vision Transformer (ViT) models. 77.86 top-1 for 'small' and 79.35 for 'base'. Thanks to [Christof](https://www.kaggle.com/christofhenkel) for training the base model w/ lots of GPUs. - -### Oct 13, 2020 -* Initial impl of Vision Transformer models. Both patch and hybrid (CNN backbone) variants. Currently trying to train... -* Adafactor and AdaHessian (FP32 only, no AMP) optimizers -* EdgeTPU-M (`efficientnet_em`) model trained in PyTorch, 79.3 top-1 -* Pip release, doc updates pending a few more changes... - -### Sept 18, 2020 -* New ResNet 'D' weights. 72.7 (top-1) ResNet-18-D, 77.1 ResNet-34-D, 80.5 ResNet-50-D -* Added a few untrained defs for other ResNet models (66D, 101D, 152D, 200/200D) - -### Sept 3, 2020 -* New weights - * Wide-ResNet50 - 81.5 top-1 (vs 78.5 torchvision) - * SEResNeXt50-32x4d - 81.3 top-1 (vs 79.1 cadene) -* Support for native Torch AMP and channels_last memory format added to train/validate scripts (`--channels-last`, `--native-amp` vs `--apex-amp`) -* Models tested with channels_last on latest NGC 20.08 container. AdaptiveAvgPool in attn layers changed to mean((2,3)) to work around bug with NHWC kernel. - -### Aug 12, 2020 -* New/updated weights from training experiments - * EfficientNet-B3 - 82.1 top-1 (vs 81.6 for official with AA and 81.9 for AdvProp) - * RegNetY-3.2GF - 82.0 top-1 (78.9 from official ver) - * CSPResNet50 - 79.6 top-1 (76.6 from official ver) -* Add CutMix integrated w/ Mixup. See [pull request](https://github.com/rwightman/pytorch-image-models/pull/218) for some usage examples -* Some fixes for using pretrained weights with `in_chans` != 3 on several models. - -### Aug 5, 2020 -Universal feature extraction, new models, new weights, new test sets. - -* All models support the `features_only=True` argument for `create_model` call to return a network that extracts features from the deepest layer at each stride. -* New models - * CSPResNet, CSPResNeXt, CSPDarkNet, DarkNet - * ReXNet - * (Modified Aligned) Xception41/65/71 (a proper port of TF models) -* New trained weights - * SEResNet50 - 80.3 top-1 - * CSPDarkNet53 - 80.1 top-1 - * CSPResNeXt50 - 80.0 top-1 - * DPN68b - 79.2 top-1 - * EfficientNet-Lite0 (non-TF ver) - 75.5 (submitted by [@hal-314](https://github.com/hal-314)) -* Add 'real' labels for ImageNet and ImageNet-Renditions test set, see [`results/README.md`](results/README.md) -* Test set ranking/top-n diff script by [@KushajveerSingh](https://github.com/KushajveerSingh) -* Train script and loader/transform tweaks to punch through more aug arguments -* README and documentation overhaul. See initial (WIP) documentation at https://rwightman.github.io/pytorch-image-models/ -* adamp and sgdp optimizers added by [@hellbell](https://github.com/hellbell) - -### June 11, 2020 -Bunch of changes: - -* DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions -* VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1 -* Activation factory added along with new activations: - * select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export) - * hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish - * context mgr for setting exportable/scriptable/no_jit states -* Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface -* Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this -* Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call -* Prep for 0.1.28 pip release - -### May 12, 2020 -* Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt, paper https://arxiv.org/abs/2004.08955)) - -### May 3, 2020 -* Pruned EfficientNet B1, B2, and B3 (https://arxiv.org/abs/2002.08258) contributed by [Yonathan Aflalo](https://github.com/yoniaflalo) - -### May 1, 2020 -* Merged a number of execellent contributions in the ResNet model family over the past month - * BlurPool2D and resnetblur models initiated by [Chris Ha](https://github.com/VRandme), I trained resnetblur50 to 79.3. - * TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by [mrT23](https://github.com/mrT23) - * ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per (https://arxiv.org/abs/2002.08258) by [Yonathan Aflalo](https://github.com/yoniaflalo) -* 200 pretrained models in total now with updated results csv in results folder From b3b90d944d7c77c34428225ecd25c4d55761c20e Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 8 Jun 2021 17:32:08 -0700 Subject: [PATCH 09/31] Add min_input_size to bat_resnext to prevent test breakage. --- timm/models/byobnet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py index 750a43de..692cea8c 100644 --- a/timm/models/byobnet.py +++ b/timm/models/byobnet.py @@ -102,7 +102,8 @@ default_cfgs = { 'gcresnext26ts': _cfg( first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), 'bat_resnext26ts': _cfg( - first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), + first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic', + min_input_size=(3, 128, 128)), } From d17b374f0fe3c86ab69ccd92104825344ef1f66a Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 8 Jun 2021 21:31:39 -0700 Subject: [PATCH 10/31] Minimum input_size needed to be higher --- timm/models/byobnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py index 692cea8c..38ff6615 100644 --- a/timm/models/byobnet.py +++ b/timm/models/byobnet.py @@ -103,7 +103,7 @@ default_cfgs = { first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic'), 'bat_resnext26ts': _cfg( first_conv='stem.conv1.conv', input_size=(3, 256, 256), pool_size=(8, 8), interpolation='bicubic', - min_input_size=(3, 128, 128)), + min_input_size=(3, 256, 256)), } From b7a568f06504310381733cba0cf8cc54a557442c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 8 Jun 2021 23:19:51 -0700 Subject: [PATCH 11/31] Fix torchscript issue in bat --- timm/models/layers/non_local_attn.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/timm/models/layers/non_local_attn.py b/timm/models/layers/non_local_attn.py index d20a5f3e..a537d60e 100644 --- a/timm/models/layers/non_local_attn.py +++ b/timm/models/layers/non_local_attn.py @@ -81,7 +81,7 @@ class BilinearAttnTransform(nn.Module): self.groups = groups self.in_channels = in_channels - def resize_mat(self, x, t): + def resize_mat(self, x, t: int): B, C, block_size, block_size1 = x.shape assert block_size == block_size1 if t <= 1: @@ -100,10 +100,8 @@ class BilinearAttnTransform(nn.Module): out = self.conv1(x) rp = F.adaptive_max_pool2d(out, (self.block_size, 1)) cp = F.adaptive_max_pool2d(out, (1, self.block_size)) - p = self.conv_p(rp).view(B, self.groups, self.block_size, self.block_size) - q = self.conv_q(cp).view(B, self.groups, self.block_size, self.block_size) - p = F.sigmoid(p) - q = F.sigmoid(q) + p = self.conv_p(rp).view(B, self.groups, self.block_size, self.block_size).sigmoid() + q = self.conv_q(cp).view(B, self.groups, self.block_size, self.block_size).sigmoid() p = p / p.sum(dim=3, keepdim=True) q = q / q.sum(dim=2, keepdim=True) p = p.view(B, self.groups, 1, self.block_size, self.block_size).expand(x.size( From 7c19c35d9f1185a2cf017b33a98f1cd67ab30785 Mon Sep 17 00:00:00 2001 From: SamuelGabriel Date: Wed, 9 Jun 2021 19:11:58 +0200 Subject: [PATCH 12/31] Global instead of local rank. --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 4264a164..f1c1581e 100755 --- a/train.py +++ b/train.py @@ -561,7 +561,7 @@ def main(): best_epoch = None saver = None output_dir = None - if args.local_rank == 0: + if args.rank == 0: if args.experiment: exp_name = args.experiment else: From 07fb05cc3d5c6f99d5b74a88e9a14db42e8e2e4f Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 9 Jun 2021 22:33:05 -0700 Subject: [PATCH 13/31] Update results csv files --- results/results-imagenet-a-clean.csv | 67 +- results/results-imagenet-a.csv | 623 ++++++++--------- results/results-imagenet-r-clean.csv | 107 +-- results/results-imagenet-r.csv | 629 +++++++++--------- results/results-imagenet-real.csv | 383 ++++++----- results/results-imagenet.csv | 45 +- .../results-imagenetv2-matched-frequency.csv | 387 ++++++----- results/results-sketch.csv | 609 +++++++++-------- 8 files changed, 1549 insertions(+), 1301 deletions(-) diff --git a/results/results-imagenet-a-clean.csv b/results/results-imagenet-a-clean.csv index 57baa83e..ab87c4ee 100644 --- a/results/results-imagenet-a-clean.csv +++ b/results/results-imagenet-a-clean.csv @@ -5,6 +5,7 @@ swin_large_patch4_window12_384,98.040,1.960,99.690,0.310,196.74,384,1.000,bicubi tf_efficientnet_b7_ns,97.910,2.090,99.720,0.280,66.35,600,0.949,bicubic swin_base_patch4_window12_384,97.890,2.110,99.710,0.290,87.90,384,1.000,bicubic dm_nfnet_f6,97.730,2.270,99.580,0.420,438.36,576,0.956,bicubic +tf_efficientnetv2_l_in21ft1k,97.700,2.300,99.670,0.330,118.52,480,1.000,bicubic swin_large_patch4_window7_224,97.650,2.350,99.580,0.420,196.53,224,0.900,bicubic tf_efficientnet_b6_ns,97.630,2.370,99.580,0.420,43.04,528,0.942,bicubic ig_resnext101_32x48d,97.620,2.380,99.700,0.300,828.41,224,0.875,bilinear @@ -13,16 +14,19 @@ dm_nfnet_f4,97.570,2.430,99.520,0.480,316.07,512,0.951,bicubic tf_efficientnet_b5_ns,97.500,2.500,99.630,0.370,30.39,456,0.934,bicubic resnetv2_152x4_bitm,97.490,2.510,99.600,0.400,936.53,480,1.000,bilinear cait_m48_448,97.480,2.520,99.550,0.450,356.46,448,1.000,bicubic +tf_efficientnetv2_m_in21ft1k,97.480,2.520,99.530,0.470,54.14,480,1.000,bicubic cait_m36_384,97.400,2.600,99.510,0.490,271.22,384,1.000,bicubic dm_nfnet_f3,97.360,2.640,99.580,0.420,254.92,416,0.940,bicubic ig_resnext101_32x32d,97.360,2.640,99.680,0.320,468.53,224,0.875,bilinear cait_s36_384,97.330,2.670,99.530,0.470,68.37,384,1.000,bicubic +tf_efficientnetv2_l,97.280,2.720,99.550,0.450,118.52,480,1.000,bicubic swin_base_patch4_window7_224,97.250,2.750,99.530,0.470,87.77,224,0.900,bicubic -swsl_resnext101_32x8d,97.200,2.800,99.570,0.430,88.79,224,0.875,bilinear -tf_efficientnet_b7_ap,97.200,2.800,99.540,0.460,66.35,600,0.949,bicubic tf_efficientnet_b8,97.200,2.800,99.500,0.500,87.41,672,0.954,bicubic +tf_efficientnet_b7_ap,97.200,2.800,99.540,0.460,66.35,600,0.949,bicubic +swsl_resnext101_32x8d,97.200,2.800,99.570,0.430,88.79,224,0.875,bilinear vit_base_r50_s16_384,97.180,2.820,99.560,0.440,98.95,384,1.000,bicubic resnetv2_152x2_bitm,97.150,2.850,99.590,0.410,236.34,480,1.000,bilinear +tf_efficientnetv2_m,97.140,2.860,99.410,0.590,54.14,480,1.000,bicubic tf_efficientnet_b8_ap,97.110,2.890,99.660,0.340,87.41,672,0.954,bicubic vit_large_patch16_384,97.110,2.890,99.640,0.360,304.72,384,1.000,bicubic ecaresnet269d,97.080,2.920,99.470,0.530,102.09,352,1.000,bicubic @@ -30,6 +34,7 @@ tf_efficientnet_b6_ap,97.080,2.920,99.620,0.380,43.04,528,0.942,bicubic cait_s24_384,97.070,2.930,99.430,0.570,47.06,384,1.000,bicubic resnetv2_101x3_bitm,97.050,2.950,99.520,0.480,387.93,480,1.000,bilinear tf_efficientnet_b7,97.010,2.990,99.520,0.480,66.35,600,0.949,bicubic +efficientnetv2_rw_m,96.980,3.020,99.540,0.460,53.24,416,1.000,bicubic dm_nfnet_f2,96.960,3.040,99.450,0.550,193.78,352,0.920,bicubic vit_deit_base_distilled_patch16_384,96.960,3.040,99.480,0.520,87.63,384,1.000,bicubic tf_efficientnet_b4_ns,96.950,3.050,99.580,0.420,19.34,380,0.922,bicubic @@ -39,18 +44,19 @@ ig_resnext101_32x16d,96.820,3.180,99.590,0.410,194.03,224,0.875,bilinear resnetv2_50x3_bitm,96.770,3.230,99.430,0.570,217.32,480,1.000,bilinear seresnet152d,96.770,3.230,99.450,0.550,66.84,320,1.000,bicubic resnetrs350,96.760,3.240,99.370,0.630,163.96,384,1.000,bicubic +tf_efficientnetv2_s_in21ft1k,96.730,3.270,99.420,0.580,21.46,384,1.000,bicubic resnet200d,96.720,3.280,99.330,0.670,64.69,320,1.000,bicubic -eca_nfnet_l1,96.700,3.300,99.270,0.730,41.41,320,1.000,bicubic vit_base_patch16_384,96.700,3.300,99.510,0.490,86.86,384,1.000,bicubic +eca_nfnet_l1,96.700,3.300,99.270,0.730,41.41,320,1.000,bicubic resnetrs270,96.690,3.310,99.350,0.650,129.86,352,1.000,bicubic -tf_efficientnet_b5_ap,96.680,3.320,99.460,0.540,30.39,456,0.934,bicubic pit_b_distilled_224,96.680,3.320,99.350,0.650,74.79,224,0.900,bicubic +tf_efficientnet_b5_ap,96.680,3.320,99.460,0.540,30.39,456,0.934,bicubic tf_efficientnet_b6,96.670,3.330,99.370,0.630,43.04,528,0.942,bicubic resnest200e,96.610,3.390,99.350,0.650,70.20,320,0.909,bicubic swsl_resnext101_32x16d,96.600,3.400,99.520,0.480,194.03,224,0.875,bilinear resnetrs152,96.580,3.420,99.240,0.760,86.62,320,1.000,bicubic cait_xs24_384,96.550,3.450,99.420,0.580,26.67,384,1.000,bicubic -efficientnet_v2s,96.540,3.460,99.360,0.640,23.94,384,1.000,bicubic +efficientnetv2_rw_s,96.540,3.460,99.360,0.640,23.94,384,1.000,bicubic resnetrs200,96.530,3.470,99.350,0.650,93.21,320,1.000,bicubic resnest269e,96.520,3.480,99.350,0.650,110.93,416,0.928,bicubic vit_base_patch16_224_miil,96.460,3.540,99.300,0.700,86.54,224,0.875,bilinear @@ -60,15 +66,19 @@ cait_s24_224,96.380,3.620,99.150,0.850,46.92,224,1.000,bicubic resnet152d,96.360,3.640,99.390,0.610,60.21,320,1.000,bicubic regnety_160,96.350,3.650,99.330,0.670,83.59,288,1.000,bicubic tf_efficientnet_b5,96.350,3.650,99.310,0.690,30.39,456,0.934,bicubic +tf_efficientnetv2_s,96.340,3.660,99.200,0.800,21.46,384,1.000,bicubic ig_resnext101_32x8d,96.320,3.680,99.430,0.570,88.79,224,0.875,bilinear resnet101d,96.290,3.710,99.230,0.770,44.57,320,1.000,bicubic +twins_svt_large,96.270,3.730,99.170,0.830,99.27,224,0.900,bicubic +twins_svt_base,96.160,3.840,99.060,0.940,56.07,224,0.900,bicubic tf_efficientnet_b4_ap,96.160,3.840,99.280,0.720,19.34,380,0.922,bicubic efficientnet_b4,96.150,3.850,99.200,0.800,19.34,384,1.000,bicubic +twins_pcpvt_large,96.150,3.850,99.180,0.820,60.99,224,0.900,bicubic vit_deit_base_patch16_384,96.150,3.850,99.140,0.860,86.86,384,1.000,bicubic dm_nfnet_f0,96.140,3.860,99.240,0.760,71.49,256,0.900,bicubic -vit_deit_base_distilled_patch16_224,96.090,3.910,99.190,0.810,87.34,224,0.900,bicubic nfnet_l0,96.090,3.910,99.260,0.740,35.07,288,1.000,bicubic resnetv2_101x1_bitm,96.090,3.910,99.300,0.700,44.54,480,1.000,bilinear +vit_deit_base_distilled_patch16_224,96.090,3.910,99.190,0.810,87.34,224,0.900,bicubic regnety_032,95.970,4.030,99.190,0.810,19.44,288,1.000,bicubic tresnet_xl_448,95.970,4.030,99.130,0.870,78.44,448,0.875,bilinear vit_large_patch16_224,95.950,4.050,99.240,0.760,304.33,224,0.900,bicubic @@ -77,31 +87,42 @@ swin_small_patch4_window7_224,95.910,4.090,99.020,0.980,49.61,224,0.900,bicubic tf_efficientnet_b4,95.900,4.100,99.170,0.830,19.34,380,0.922,bicubic swsl_resnext50_32x4d,95.870,4.130,99.250,0.750,25.03,224,0.875,bilinear resnest101e,95.860,4.140,99.210,0.790,48.28,256,0.875,bilinear +resnet51q,95.860,4.140,99.120,0.880,35.70,288,1.000,bilinear tresnet_l_448,95.860,4.140,99.120,0.880,55.99,448,0.875,bilinear cait_xxs36_384,95.850,4.150,99.090,0.910,17.37,384,1.000,bicubic vit_large_patch32_384,95.830,4.170,99.150,0.850,306.63,384,1.000,bicubic vit_base_patch32_384,95.810,4.190,99.150,0.850,88.30,384,1.000,bicubic ssl_resnext101_32x16d,95.800,4.200,99.180,0.820,194.03,224,0.875,bilinear +twins_pcpvt_base,95.790,4.210,99.130,0.870,43.83,224,0.900,bicubic tf_efficientnet_b2_ns,95.770,4.230,99.120,0.880,9.11,260,0.890,bicubic tresnet_m,95.720,4.280,99.030,0.970,31.39,224,0.875,bilinear -efficientnet_b3,95.710,4.290,99.040,0.960,12.23,320,1.000,bicubic pnasnet5large,95.710,4.290,98.920,1.080,86.06,331,0.911,bicubic +efficientnet_b3,95.710,4.290,99.040,0.960,12.23,320,1.000,bicubic nasnetalarge,95.680,4.320,98.930,1.070,88.75,331,0.911,bicubic pit_b_224,95.640,4.360,98.660,1.340,73.76,224,0.900,bicubic +convit_base,95.550,4.450,98.870,1.130,86.54,224,0.875,bicubic +coat_lite_small,95.540,4.460,98.860,1.140,19.84,224,0.900,bicubic ecaresnet101d,95.530,4.470,99.130,0.870,44.57,224,0.875,bicubic +levit_384,95.530,4.470,99.050,0.950,39.13,224,0.900,bicubic ecaresnet50t,95.510,4.490,99.120,0.880,25.57,320,0.950,bicubic +visformer_small,95.490,4.510,98.900,1.100,40.22,224,0.900,bicubic ssl_resnext101_32x8d,95.470,4.530,99.110,0.890,88.79,224,0.875,bilinear -ssl_resnext101_32x4d,95.440,4.560,99.130,0.870,44.18,224,0.875,bilinear tresnet_xl,95.440,4.560,99.050,0.950,78.44,224,0.875,bilinear vit_deit_base_patch16_224,95.440,4.560,98.840,1.160,86.57,224,0.900,bicubic +ssl_resnext101_32x4d,95.440,4.560,99.130,0.870,44.18,224,0.875,bilinear resnetrs101,95.430,4.570,99.030,0.970,63.62,288,0.940,bicubic swsl_resnet50,95.410,4.590,99.290,0.710,25.56,224,0.875,bilinear vit_base_patch16_224,95.330,4.670,99.000,1.000,86.57,224,0.900,bicubic tf_efficientnet_b3_ap,95.320,4.680,98.900,1.100,12.23,300,0.904,bicubic +mixer_b16_224_miil,95.300,4.700,98.880,1.120,59.88,224,0.875,bilinear tresnet_l,95.290,4.710,99.010,0.990,55.99,224,0.875,bilinear cait_xxs24_384,95.260,4.740,98.960,1.040,12.03,384,1.000,bicubic pit_s_distilled_224,95.240,4.760,99.050,0.950,24.04,224,0.900,bicubic +twins_pcpvt_small,95.210,4.790,98.880,1.120,24.11,224,0.900,bicubic +convit_small,95.200,4.800,98.900,1.100,27.78,224,0.875,bicubic +twins_svt_small,95.200,4.800,98.880,1.120,24.06,224,0.900,bicubic tf_efficientnet_b1_ns,95.170,4.830,99.110,0.890,7.79,240,0.882,bicubic +tf_efficientnetv2_b3,95.160,4.840,98.820,1.180,14.36,300,0.904,bicubic swin_tiny_patch4_window7_224,95.140,4.860,98.850,1.150,28.29,224,0.900,bicubic efficientnet_el,95.120,4.880,98.990,1.010,10.59,300,0.904,bicubic gernet_l,95.090,4.910,98.900,1.100,31.08,256,0.875,bilinear @@ -113,7 +134,9 @@ gluon_resnet152_v1s,95.040,4.960,98.930,1.070,60.32,224,0.875,bicubic seresnext50_32x4d,95.040,4.960,98.880,1.120,27.56,224,0.875,bicubic tnt_s_patch16_224,95.040,4.960,98.830,1.170,23.76,224,0.900,bicubic tf_efficientnet_b3,95.010,4.990,98.910,1.090,12.23,300,0.904,bicubic +levit_256,95.010,4.990,98.890,1.110,18.89,224,0.900,bicubic tresnet_m_448,94.990,5.010,98.980,1.020,31.39,448,0.875,bilinear +coat_mini,94.970,5.030,98.780,1.220,10.34,224,0.900,bicubic resnest50d_4s2x40d,94.960,5.040,99.070,0.930,30.42,224,0.875,bicubic rexnet_200,94.940,5.060,99.010,0.990,16.37,224,0.875,bicubic gluon_seresnext101_64x4d,94.930,5.070,98.830,1.170,88.23,224,0.875,bicubic @@ -137,8 +160,8 @@ nf_resnet50,94.590,5.410,98.810,1.190,25.56,288,0.940,bicubic pit_s_224,94.590,5.410,98.710,1.290,23.46,224,0.900,bicubic repvgg_b3,94.570,5.430,98.780,1.220,123.09,224,0.875,bilinear seresnet50,94.550,5.450,98.750,1.250,28.09,224,0.875,bicubic -regnety_320,94.540,5.460,98.850,1.150,145.05,224,0.875,bicubic inception_resnet_v2,94.540,5.460,98.790,1.210,55.84,299,0.897,bicubic +regnety_320,94.540,5.460,98.850,1.150,145.05,224,0.875,bicubic gluon_resnext101_32x4d,94.530,5.470,98.630,1.370,44.18,224,0.875,bicubic repvgg_b3g4,94.520,5.480,98.970,1.030,83.83,224,0.875,bilinear tf_efficientnet_b2_ap,94.490,5.510,98.620,1.380,9.11,260,0.890,bicubic @@ -147,9 +170,10 @@ regnety_120,94.480,5.520,98.810,1.190,51.82,224,0.875,bicubic rexnet_150,94.480,5.520,98.790,1.210,9.73,224,0.875,bicubic regnetx_320,94.460,5.540,98.740,1.260,107.81,224,0.875,bicubic ssl_resnet50,94.450,5.550,98.920,1.080,25.56,224,0.875,bilinear +tf_efficientnetv2_b2,94.420,5.580,98.570,1.430,10.10,260,0.890,bicubic tf_efficientnet_el,94.410,5.590,98.710,1.290,10.59,300,0.904,bicubic -vit_deit_small_patch16_224,94.400,5.600,98.690,1.310,22.05,224,0.900,bicubic efficientnet_el_pruned,94.400,5.600,98.740,1.260,10.59,300,0.904,bicubic +vit_deit_small_patch16_224,94.400,5.600,98.690,1.310,22.05,224,0.900,bicubic inception_v4,94.380,5.620,98.580,1.420,42.68,299,0.875,bicubic legacy_seresnext101_32x4d,94.370,5.630,98.650,1.350,48.96,224,0.875,bilinear tf_efficientnet_b2,94.360,5.640,98.610,1.390,9.11,260,0.890,bicubic @@ -160,8 +184,8 @@ resnetrs50,94.310,5.690,98.640,1.360,35.69,224,0.910,bicubic xception71,94.280,5.720,98.640,1.360,42.34,299,0.903,bicubic resnet50d,94.260,5.740,98.720,1.280,25.58,224,0.875,bicubic skresnext50_32x4d,94.260,5.740,98.460,1.540,27.48,224,0.875,bicubic -cait_xxs36_224,94.260,5.740,98.720,1.280,17.30,224,1.000,bicubic gluon_xception65,94.260,5.740,98.570,1.430,39.92,299,0.903,bicubic +cait_xxs36_224,94.260,5.740,98.720,1.280,17.30,224,1.000,bicubic regnetx_120,94.240,5.760,98.650,1.350,46.11,224,0.875,bicubic dpn92,94.230,5.770,98.730,1.270,37.67,224,0.875,bicubic ecaresnet50d_pruned,94.220,5.780,98.730,1.270,19.94,224,0.875,bicubic @@ -169,6 +193,7 @@ gluon_resnet101_v1d,94.220,5.780,98.550,1.450,44.57,224,0.875,bicubic tf_efficientnet_lite3,94.200,5.800,98.640,1.360,8.20,300,0.904,bilinear mixnet_xl,94.190,5.810,98.340,1.660,11.90,224,0.875,bicubic resnext50d_32x4d,94.180,5.820,98.570,1.430,25.05,224,0.875,bicubic +levit_192,94.170,5.830,98.540,1.460,10.95,224,0.900,bicubic regnety_080,94.170,5.830,98.680,1.320,39.18,224,0.875,bicubic gluon_resnet152_v1c,94.160,5.840,98.640,1.360,60.21,224,0.875,bicubic ens_adv_inception_resnet_v2,94.160,5.840,98.600,1.400,55.84,299,0.897,bicubic @@ -185,6 +210,7 @@ dpn131,94.010,5.990,98.720,1.280,79.25,224,0.875,bicubic hrnet_w64,94.010,5.990,98.610,1.390,128.06,224,0.875,bilinear resnetblur50,93.960,6.040,98.590,1.410,25.56,224,0.875,bicubic dla102x2,93.950,6.050,98.490,1.510,41.28,224,0.875,bilinear +tf_efficientnetv2_b1,93.940,6.060,98.620,1.380,8.14,240,0.882,bicubic hrnet_w48,93.920,6.080,98.610,1.390,77.47,224,0.875,bilinear rexnet_130,93.900,6.100,98.400,1.600,7.56,224,0.875,bicubic tf_efficientnet_cc_b1_8e,93.900,6.100,98.260,1.740,39.72,240,0.882,bicubic @@ -204,33 +230,35 @@ res2net101_26w_4s,93.750,6.250,98.310,1.690,45.21,224,0.875,bilinear cspresnet50,93.740,6.260,98.640,1.360,21.62,256,0.887,bilinear legacy_seresnext50_32x4d,93.730,6.270,98.580,1.420,27.56,224,0.875,bilinear wide_resnet101_2,93.720,6.280,98.540,1.460,126.89,224,0.875,bilinear -tf_efficientnet_b1_ap,93.690,6.310,98.360,1.640,7.79,240,0.882,bicubic dpn68b,93.690,6.310,98.510,1.490,12.61,224,0.875,bicubic +tf_efficientnet_b1_ap,93.690,6.310,98.360,1.640,7.79,240,0.882,bicubic gluon_resnet101_v1c,93.670,6.330,98.420,1.580,44.57,224,0.875,bicubic tf_efficientnet_b0_ns,93.630,6.370,98.640,1.360,5.29,224,0.875,bicubic gluon_resnet50_v1s,93.620,6.380,98.460,1.540,25.68,224,0.875,bicubic cait_xxs24_224,93.600,6.400,98.440,1.560,11.96,224,1.000,bicubic +coat_tiny,93.590,6.410,98.430,1.570,5.50,224,0.900,bicubic regnetx_040,93.560,6.440,98.540,1.460,22.12,224,0.875,bicubic hrnet_w44,93.550,6.450,98.700,1.300,67.06,224,0.875,bilinear res2net50_26w_8s,93.540,6.460,98.260,1.740,48.40,224,0.875,bilinear hrnet_w32,93.530,6.470,98.450,1.550,41.23,224,0.875,bilinear dla102x,93.520,6.480,98.510,1.490,26.31,224,0.875,bilinear -tf_efficientnet_b1,93.500,6.500,98.360,1.640,7.79,240,0.882,bicubic repvgg_b2,93.500,6.500,98.730,1.270,89.02,224,0.875,bilinear +tf_efficientnet_b1,93.500,6.500,98.360,1.640,7.79,240,0.882,bicubic hrnet_w40,93.490,6.510,98.580,1.420,57.56,224,0.875,bilinear -gluon_inception_v3,93.460,6.540,98.570,1.430,23.83,299,0.875,bicubic xception,93.460,6.540,98.530,1.470,22.86,299,0.897,bicubic +gluon_inception_v3,93.460,6.540,98.570,1.430,23.83,299,0.875,bicubic mixnet_l,93.450,6.550,98.220,1.780,7.33,224,0.875,bicubic xception41,93.430,6.570,98.430,1.570,26.97,299,0.903,bicubic res2net50_26w_6s,93.410,6.590,98.280,1.720,37.05,224,0.875,bilinear legacy_seresnet152,93.400,6.600,98.350,1.650,66.82,224,0.875,bilinear dla169,93.340,6.660,98.600,1.400,53.39,224,0.875,bilinear -resnest26d,93.330,6.670,98.630,1.370,17.07,224,0.875,bilinear +levit_128,93.340,6.660,98.380,1.620,9.21,224,0.900,bicubic repvgg_b1,93.330,6.670,98.510,1.490,57.42,224,0.875,bilinear +resnest26d,93.330,6.670,98.630,1.370,17.07,224,0.875,bilinear tf_inception_v3,93.320,6.680,98.030,1.970,23.83,299,0.875,bicubic tf_mixnet_l,93.310,6.690,98.030,1.970,7.33,224,0.875,bicubic -selecsls60b,93.300,6.700,98.280,1.720,32.77,224,0.875,bicubic tv_resnet152,93.300,6.700,98.390,1.610,60.19,224,0.875,bilinear +selecsls60b,93.300,6.700,98.280,1.720,32.77,224,0.875,bicubic legacy_seresnet101,93.280,6.720,98.510,1.490,49.33,224,0.875,bilinear efficientnet_b1,93.250,6.750,98.290,1.710,7.79,256,1.000,bicubic coat_lite_tiny,93.240,6.760,98.260,1.740,5.72,224,0.900,bicubic @@ -241,6 +269,7 @@ efficientnet_es,93.140,6.860,98.420,1.580,5.44,224,0.875,bicubic dla60x,93.120,6.880,98.510,1.490,17.35,224,0.875,bilinear regnetx_032,93.120,6.880,98.390,1.610,15.30,224,0.875,bicubic pit_xs_224,93.110,6.890,98.310,1.690,10.62,224,0.900,bicubic +tf_efficientnetv2_b0,93.110,6.890,98.390,1.610,7.14,224,0.875,bicubic dla102,93.060,6.940,98.540,1.460,33.27,224,0.875,bilinear gluon_resnet50_v1c,93.030,6.970,98.390,1.610,25.58,224,0.875,bicubic regnety_016,93.030,6.970,98.360,1.640,11.20,224,0.875,bicubic @@ -271,8 +300,8 @@ tf_efficientnet_cc_b0_4e,92.590,7.410,98.080,1.920,13.31,224,0.875,bicubic hardcorenas_e,92.570,7.430,98.110,1.890,8.07,224,0.875,bilinear res2net50_48w_2s,92.550,7.450,98.080,1.920,25.29,224,0.875,bilinear gluon_resnet50_v1b,92.540,7.460,98.170,1.830,25.56,224,0.875,bicubic -res2net50_26w_4s,92.500,7.500,98.060,1.940,25.70,224,0.875,bilinear densenet161,92.500,7.500,98.290,1.710,28.68,224,0.875,bicubic +res2net50_26w_4s,92.500,7.500,98.060,1.940,25.70,224,0.875,bilinear mixnet_m,92.430,7.570,97.870,2.130,5.01,224,0.875,bicubic hardcorenas_d,92.400,7.600,98.070,1.930,7.50,224,0.875,bilinear mobilenetv2_120d,92.400,7.600,98.050,1.950,5.83,224,0.875,bicubic @@ -291,6 +320,7 @@ resnet26d,92.070,7.930,97.960,2.040,16.01,224,0.875,bicubic hardcorenas_c,92.020,7.980,97.840,2.160,5.52,224,0.875,bilinear dpn68,92.010,7.990,98.050,1.950,12.61,224,0.875,bicubic tf_efficientnet_es,91.980,8.020,97.860,2.140,5.44,224,0.875,bicubic +levit_128s,91.970,8.030,98.060,1.940,7.78,224,0.900,bicubic repvgg_a2,91.940,8.060,98.150,1.850,28.21,224,0.875,bilinear densenet169,91.930,8.070,98.100,1.900,14.15,224,0.875,bicubic densenetblur121d,91.910,8.090,98.070,1.930,8.00,224,0.875,bicubic @@ -326,10 +356,11 @@ dla34,90.760,9.240,97.660,2.340,15.74,224,0.875,bilinear fbnetc_100,90.700,9.300,97.210,2.790,5.57,224,0.875,bilinear vit_deit_tiny_distilled_patch16_224,90.700,9.300,97.570,2.430,5.91,224,0.900,bicubic swsl_resnet18,90.690,9.310,97.700,2.300,11.69,224,0.875,bilinear +convit_tiny,90.630,9.370,97.740,2.260,5.71,224,0.875,bicubic mnasnet_100,90.510,9.490,97.470,2.530,4.38,224,0.875,bicubic regnety_004,90.500,9.500,97.540,2.460,4.34,224,0.875,bicubic -spnasnet_100,90.350,9.650,97.190,2.810,4.42,224,0.875,bilinear regnetx_006,90.350,9.650,97.430,2.570,6.20,224,0.875,bicubic +spnasnet_100,90.350,9.650,97.190,2.810,4.42,224,0.875,bilinear ssl_resnet18,90.220,9.780,97.550,2.450,11.69,224,0.875,bilinear vgg16_bn,90.090,9.910,97.370,2.630,138.37,224,0.875,bilinear vgg19_bn,90.080,9.920,97.580,2.420,143.68,224,0.875,bilinear diff --git a/results/results-imagenet-a.csv b/results/results-imagenet-a.csv index 267802e7..51cd3a15 100644 --- a/results/results-imagenet-a.csv +++ b/results/results-imagenet-a.csv @@ -3,344 +3,375 @@ tf_efficientnet_l2_ns,84.760,15.240,96.147,3.853,480.31,800,0.960,bicubic,-13.79 tf_efficientnet_l2_ns_475,83.373,16.627,95.453,4.547,480.31,475,0.936,bicubic,-15.127,-4.377,0 swin_large_patch4_window12_384,69.627,30.373,89.560,10.440,196.74,384,1.000,bicubic,-28.413,-10.130,0 tf_efficientnet_b7_ns,67.040,32.960,88.667,11.333,66.35,600,0.949,bicubic,-30.870,-11.053,0 -swin_base_patch4_window12_384,64.480,35.520,87.493,12.507,87.90,384,1.000,bicubic,-33.410,-12.217,0 +tf_efficientnetv2_l_in21ft1k,66.333,33.667,87.853,12.147,118.52,480,1.000,bicubic,-31.367,-11.817,+2 +swin_base_patch4_window12_384,64.480,35.520,87.493,12.507,87.90,384,1.000,bicubic,-33.410,-12.217,-1 cait_m48_448,62.373,37.627,86.453,13.547,356.46,448,1.000,bicubic,-35.107,-13.097,+8 tf_efficientnet_b6_ns,62.267,37.733,85.173,14.827,43.04,528,0.942,bicubic,-35.363,-14.407,+1 -dm_nfnet_f6,62.253,37.747,84.667,15.333,438.36,576,0.956,bicubic,-35.477,-14.913,-2 +dm_nfnet_f6,62.253,37.747,84.667,15.333,438.36,576,0.956,bicubic,-35.477,-14.913,-3 dm_nfnet_f5,61.587,38.413,84.027,15.973,377.21,544,0.954,bicubic,-36.013,-15.523,+1 -ig_resnext101_32x48d,61.013,38.987,83.347,16.653,828.41,224,0.875,bilinear,-36.607,-16.353,-1 -swin_large_patch4_window7_224,60.893,39.107,85.840,14.160,196.53,224,0.900,bicubic,-36.757,-13.740,-4 -resnetv2_152x4_bitm,60.733,39.267,83.600,16.400,936.53,480,1.000,bilinear,-36.757,-16.000,+1 -dm_nfnet_f4,60.720,39.280,83.427,16.573,316.07,512,0.951,bicubic,-36.850,-16.093,-2 -tf_efficientnet_b5_ns,60.320,39.680,84.493,15.507,30.39,456,0.934,bicubic,-37.180,-15.137,-2 +tf_efficientnetv2_m_in21ft1k,61.387,38.613,85.413,14.587,54.14,480,1.000,bicubic,-36.093,-14.117,+5 +ig_resnext101_32x48d,61.013,38.987,83.347,16.653,828.41,224,0.875,bilinear,-36.607,-16.353,-2 +swin_large_patch4_window7_224,60.893,39.107,85.840,14.160,196.53,224,0.900,bicubic,-36.757,-13.740,-5 +resnetv2_152x4_bitm,60.733,39.267,83.600,16.400,936.53,480,1.000,bilinear,-36.757,-16.000,0 +dm_nfnet_f4,60.720,39.280,83.427,16.573,316.07,512,0.951,bicubic,-36.850,-16.093,-3 +tf_efficientnet_b5_ns,60.320,39.680,84.493,15.507,30.39,456,0.934,bicubic,-37.180,-15.137,-3 dm_nfnet_f3,58.373,41.627,82.360,17.640,254.92,416,0.940,bicubic,-38.987,-17.220,+1 ig_resnext101_32x32d,58.093,41.907,80.653,19.347,468.53,224,0.875,bilinear,-39.267,-19.027,+1 cait_m36_384,57.840,42.160,84.813,15.187,271.22,384,1.000,bicubic,-39.560,-14.697,-2 -resnetv2_152x2_bitm,54.973,45.027,82.813,17.187,236.34,480,1.000,bilinear,-42.177,-16.777,+6 -vit_base_r50_s16_384,54.627,45.373,81.213,18.787,98.95,384,1.000,bicubic,-42.553,-18.347,+4 +resnetv2_152x2_bitm,54.973,45.027,82.813,17.187,236.34,480,1.000,bilinear,-42.177,-16.777,+7 +vit_base_r50_s16_384,54.627,45.373,81.213,18.787,98.95,384,1.000,bicubic,-42.553,-18.347,+5 cait_s36_384,54.413,45.587,81.360,18.640,68.37,384,1.000,bicubic,-42.917,-18.170,-2 -vit_large_patch16_384,53.867,46.133,80.320,19.680,304.72,384,1.000,bicubic,-43.243,-19.320,+5 -resnetv2_101x3_bitm,53.813,46.187,81.093,18.907,387.93,480,1.000,bilinear,-43.237,-18.427,+8 -ig_resnext101_32x16d,53.067,46.933,76.907,23.093,194.03,224,0.875,bilinear,-43.753,-22.683,+14 +vit_large_patch16_384,53.867,46.133,80.320,19.680,304.72,384,1.000,bicubic,-43.243,-19.320,+7 +resnetv2_101x3_bitm,53.813,46.187,81.093,18.907,387.93,480,1.000,bilinear,-43.237,-18.427,+10 +tf_efficientnetv2_l,53.187,46.813,79.133,20.867,118.52,480,1.000,bicubic,-44.093,-20.417,-4 +ig_resnext101_32x16d,53.067,46.933,76.907,23.093,194.03,224,0.875,bilinear,-43.753,-22.683,+16 swin_base_patch4_window7_224,51.453,48.547,79.973,20.027,87.77,224,0.900,bicubic,-45.797,-19.557,-5 -tf_efficientnet_b4_ns,51.213,48.787,79.187,20.813,19.34,380,0.922,bicubic,-45.737,-20.393,+9 -swsl_resnext101_32x8d,51.187,48.813,78.240,21.760,88.79,224,0.875,bilinear,-46.013,-21.330,-6 -dm_nfnet_f2,50.773,49.227,78.013,21.987,193.78,352,0.920,bicubic,-46.187,-21.437,+5 -vit_base_patch16_384,50.613,49.387,78.200,21.800,86.86,384,1.000,bicubic,-46.087,-21.310,+15 -cait_s24_384,49.733,50.267,78.733,21.267,47.06,384,1.000,bicubic,-47.337,-20.697,0 -vit_deit_base_distilled_patch16_384,49.333,50.667,79.253,20.747,87.63,384,1.000,bicubic,-47.627,-20.227,+3 -tf_efficientnet_b8,48.947,51.053,77.240,22.760,87.41,672,0.954,bicubic,-48.253,-22.260,-9 -resnest269e,48.187,51.813,74.333,25.667,110.93,416,0.928,bicubic,-48.333,-25.017,+22 -resnetv2_50x3_bitm,47.787,52.213,77.627,22.373,217.32,480,1.000,bilinear,-48.983,-21.803,+5 +tf_efficientnet_b4_ns,51.213,48.787,79.187,20.813,19.34,380,0.922,bicubic,-45.737,-20.393,+11 +swsl_resnext101_32x8d,51.187,48.813,78.240,21.760,88.79,224,0.875,bilinear,-46.013,-21.260,-4 +dm_nfnet_f2,50.773,49.227,78.013,21.987,193.78,352,0.920,bicubic,-46.187,-21.437,+7 +vit_base_patch16_384,50.613,49.387,78.200,21.800,86.86,384,1.000,bicubic,-46.087,-21.070,+17 +cait_s24_384,49.733,50.267,78.733,21.267,47.06,384,1.000,bicubic,-47.337,-20.697,+1 +vit_deit_base_distilled_patch16_384,49.333,50.667,79.253,20.747,87.63,384,1.000,bicubic,-47.627,-20.227,+5 +tf_efficientnet_b8,48.947,51.053,77.240,22.760,87.41,672,0.954,bicubic,-48.253,-22.330,-11 +tf_efficientnetv2_s_in21ft1k,48.507,51.493,77.880,22.120,21.46,384,1.000,bicubic,-48.223,-21.540,+11 +resnest269e,48.187,51.813,74.333,25.667,110.93,416,0.928,bicubic,-48.333,-25.017,+24 +resnetv2_50x3_bitm,47.787,52.213,77.627,22.373,217.32,480,1.000,bilinear,-48.983,-21.803,+6 tf_efficientnet_b8_ap,46.893,53.107,76.507,23.493,87.41,672,0.954,bicubic,-50.217,-23.153,-9 -dm_nfnet_f1,46.600,53.400,74.773,25.227,132.63,320,0.910,bicubic,-50.320,-24.617,0 -swsl_resnext101_32x16d,46.200,53.800,72.200,27.800,194.03,224,0.875,bilinear,-50.400,-27.320,+13 -ecaresnet269d,45.893,54.107,75.133,24.867,102.09,352,1.000,bicubic,-51.187,-24.337,-10 -tf_efficientnet_b7_ap,45.373,54.627,74.213,25.787,66.35,600,0.949,bicubic,-51.827,-25.327,-17 -ig_resnext101_32x8d,45.320,54.680,70.867,29.133,88.79,224,0.875,bilinear,-51.000,-28.563,+23 +dm_nfnet_f1,46.600,53.400,74.773,25.227,132.63,320,0.910,bicubic,-50.320,-24.617,+1 +efficientnetv2_rw_m,46.280,53.720,75.707,24.293,53.24,416,1.000,bicubic,-50.700,-23.833,-4 +swsl_resnext101_32x16d,46.200,53.800,72.200,27.800,194.03,224,0.875,bilinear,-50.400,-27.320,+14 +ecaresnet269d,45.893,54.107,75.133,24.867,102.09,352,1.000,bicubic,-51.187,-24.337,-11 +tf_efficientnetv2_m,45.533,54.467,74.533,25.467,54.14,480,1.000,bicubic,-51.607,-24.877,-15 +tf_efficientnet_b7_ap,45.373,54.627,74.213,25.787,66.35,600,0.949,bicubic,-51.827,-25.327,-20 +ig_resnext101_32x8d,45.320,54.680,70.867,29.133,88.79,224,0.875,bilinear,-51.000,-28.563,+24 resnest200e,44.147,55.853,73.467,26.533,70.20,320,0.909,bicubic,-52.463,-25.883,+8 cait_xs24_384,43.947,56.053,75.187,24.813,26.67,384,1.000,bicubic,-52.603,-24.233,+10 -tresnet_xl_448,43.480,56.520,72.453,27.547,78.44,448,0.875,bilinear,-52.490,-26.677,+30 -resnetrs420,43.147,56.853,70.453,29.547,191.89,416,1.000,bicubic,-53.763,-29.007,-7 -tf_efficientnet_b7,42.960,57.040,73.133,26.867,66.35,600,0.949,bicubic,-54.050,-26.387,-13 +tresnet_xl_448,43.480,56.520,72.453,27.547,78.44,448,0.875,bilinear,-52.490,-26.677,+34 +resnetrs420,43.147,56.853,70.453,29.547,191.89,416,1.000,bicubic,-53.763,-29.007,-8 +tf_efficientnet_b7,42.960,57.040,73.133,26.867,66.35,600,0.949,bicubic,-54.050,-26.387,-15 swsl_resnext101_32x4d,41.560,58.440,71.760,28.240,44.18,224,0.875,bilinear,-54.860,-27.710,+11 -tf_efficientnet_b6_ap,40.800,59.200,71.627,28.373,43.04,528,0.942,bicubic,-56.280,-27.993,-18 -tresnet_l_448,40.200,59.800,69.893,30.107,55.99,448,0.875,bilinear,-55.660,-29.227,+32 -vit_deit_base_patch16_384,40.173,59.827,70.760,29.240,86.86,384,1.000,bicubic,-55.977,-28.380,+18 -resnetrs350,39.960,60.040,68.907,31.093,163.96,384,1.000,bicubic,-56.800,-30.463,-9 -resnetv2_101x1_bitm,39.307,60.693,71.493,28.507,44.54,480,1.000,bilinear,-56.783,-27.697,+20 -vit_large_patch32_384,38.933,61.067,68.920,31.080,306.63,384,1.000,bicubic,-56.897,-30.230,+30 +tf_efficientnet_b6_ap,40.800,59.200,71.627,28.373,43.04,528,0.942,bicubic,-56.280,-27.993,-20 +tresnet_l_448,40.200,59.800,69.893,30.107,55.99,448,0.875,bilinear,-55.660,-29.227,+37 +vit_deit_base_patch16_384,40.173,59.827,70.760,29.240,86.86,384,1.000,bicubic,-55.977,-28.380,+22 +resnetrs350,39.960,60.040,68.907,31.093,163.96,384,1.000,bicubic,-56.800,-30.463,-10 +resnetv2_101x1_bitm,39.307,60.693,71.493,28.507,44.54,480,1.000,bilinear,-56.783,-27.807,+23 +vit_large_patch32_384,38.933,61.067,68.920,31.080,306.63,384,1.000,bicubic,-56.897,-30.230,+35 resnet200d,38.147,61.853,68.613,31.387,64.69,320,1.000,bicubic,-58.573,-30.717,-11 -eca_nfnet_l1,38.107,61.893,71.293,28.707,41.41,320,1.000,bicubic,-58.593,-27.977,-11 -seresnet152d,37.640,62.360,69.480,30.520,66.84,320,1.000,bicubic,-59.130,-29.970,-15 -efficientnet_v2s,36.787,63.213,68.320,31.680,23.94,384,1.000,bicubic,-59.753,-31.040,-3 -regnety_160,36.747,63.253,69.107,30.893,83.59,288,1.000,bicubic,-59.603,-30.223,+4 -cait_xxs36_384,36.227,63.773,67.800,32.200,17.37,384,1.000,bicubic,-59.623,-31.290,+23 -pit_b_distilled_224,35.627,64.373,69.120,30.880,74.79,224,0.900,bicubic,-61.053,-30.340,-12 -tf_efficientnet_b3_ns,35.520,64.480,67.773,32.227,12.23,300,0.904,bicubic,-60.870,-31.577,-2 -vit_large_patch16_224,35.493,64.507,64.427,35.573,304.33,224,0.900,bicubic,-60.457,-34.813,+13 -tf_efficientnet_b6,35.213,64.787,67.720,32.280,43.04,528,0.942,bicubic,-61.457,-31.650,-14 -resnetrs270,35.013,64.987,65.480,34.520,129.86,352,1.000,bicubic,-61.677,-33.870,-18 -tf_efficientnet_b5_ap,34.787,65.213,67.493,32.507,30.39,456,0.934,bicubic,-61.893,-31.857,-18 -vit_base_patch16_224_miil,34.507,65.493,65.000,35.000,86.54,224,0.875,bilinear,-61.953,-34.300,-9 -resnet152d,34.320,65.680,65.907,34.093,60.21,320,1.000,bicubic,-62.040,-33.483,-6 -tresnet_m_448,34.107,65.893,64.493,35.507,31.39,448,0.875,bilinear,-60.883,-34.487,+49 -vit_base_patch32_384,33.613,66.387,65.240,34.760,88.30,384,1.000,bicubic,-62.197,-33.910,+15 -pit_b_224,33.173,66.827,62.320,37.680,73.76,224,0.900,bicubic,-62.467,-36.340,+21 -swsl_resnext50_32x4d,33.013,66.987,65.067,34.933,25.03,224,0.875,bilinear,-62.857,-34.183,+8 -ssl_resnext101_32x16d,32.600,67.400,64.000,36.000,194.03,224,0.875,bilinear,-63.200,-35.180,+13 -swin_small_patch4_window7_224,32.600,67.400,65.440,34.560,49.61,224,0.900,bicubic,-63.310,-33.580,+4 -vit_base_patch16_224,32.053,67.947,61.573,38.427,86.57,224,0.900,bicubic,-63.277,-37.427,+26 -tf_efficientnet_b5,31.840,68.160,65.293,34.707,30.39,456,0.934,bicubic,-64.510,-34.017,-12 -resnest101e,31.413,68.587,64.360,35.640,48.28,256,0.875,bilinear,-64.447,-34.850,+4 -dm_nfnet_f0,31.280,68.720,63.347,36.653,71.49,256,0.900,bicubic,-64.860,-35.893,-8 -cait_s24_224,31.200,68.800,64.560,35.440,46.92,224,1.000,bicubic,-65.180,-34.590,-18 +eca_nfnet_l1,38.107,61.893,71.293,28.707,41.41,320,1.000,bicubic,-58.593,-28.217,-10 +seresnet152d,37.640,62.360,69.480,30.520,66.84,320,1.000,bicubic,-59.130,-29.970,-16 +twins_svt_large,37.200,62.800,69.227,30.773,99.27,224,0.900,bicubic,-59.070,-29.943,+10 +efficientnetv2_rw_s,36.787,63.213,68.320,31.680,23.94,384,1.000,bicubic,-59.753,-31.040,-4 +regnety_160,36.747,63.253,69.107,30.893,83.59,288,1.000,bicubic,-59.603,-30.223,+3 +cait_xxs36_384,36.227,63.773,67.800,32.200,17.37,384,1.000,bicubic,-59.623,-31.290,+27 +pit_b_distilled_224,35.627,64.373,69.120,30.880,74.79,224,0.900,bicubic,-61.053,-30.230,-14 +tf_efficientnet_b3_ns,35.520,64.480,67.773,32.227,12.23,300,0.904,bicubic,-60.870,-31.577,-3 +vit_large_patch16_224,35.493,64.507,64.427,35.573,304.33,224,0.900,bicubic,-60.457,-34.813,+16 +tf_efficientnet_b6,35.213,64.787,67.720,32.280,43.04,528,0.942,bicubic,-61.457,-31.650,-15 +resnetrs270,35.013,64.987,65.480,34.520,129.86,352,1.000,bicubic,-61.677,-33.870,-19 +tf_efficientnet_b5_ap,34.787,65.213,67.493,32.507,30.39,456,0.934,bicubic,-61.893,-31.967,-18 +vit_base_patch16_224_miil,34.507,65.493,65.000,35.000,86.54,224,0.875,bilinear,-61.953,-34.300,-10 +resnet152d,34.320,65.680,65.907,34.093,60.21,320,1.000,bicubic,-62.040,-33.483,-7 +tresnet_m_448,34.107,65.893,64.493,35.507,31.39,448,0.875,bilinear,-60.883,-34.487,+64 +vit_base_patch32_384,33.613,66.387,65.240,34.760,88.30,384,1.000,bicubic,-62.197,-33.910,+19 +twins_pcpvt_large,33.387,66.613,67.933,32.067,60.99,224,0.900,bicubic,-62.763,-31.247,0 +pit_b_224,33.173,66.827,62.320,37.680,73.76,224,0.900,bicubic,-62.467,-36.340,+25 +twins_svt_base,33.173,66.827,65.773,34.227,56.07,224,0.900,bicubic,-62.987,-33.507,-5 +swsl_resnext50_32x4d,33.013,66.987,65.067,34.933,25.03,224,0.875,bilinear,-62.857,-34.183,+9 +ssl_resnext101_32x16d,32.600,67.400,64.000,36.000,194.03,224,0.875,bilinear,-63.200,-35.180,+15 +swin_small_patch4_window7_224,32.600,67.400,65.440,34.560,49.61,224,0.900,bicubic,-63.310,-33.580,+5 +vit_base_patch16_224,32.053,67.947,61.573,38.427,86.57,224,0.900,bicubic,-63.277,-37.427,+33 +tf_efficientnet_b5,31.840,68.160,65.293,34.707,30.39,456,0.934,bicubic,-64.510,-34.017,-15 +resnest101e,31.413,68.587,64.360,35.640,48.28,256,0.875,bilinear,-64.447,-34.850,+5 +dm_nfnet_f0,31.280,68.720,63.347,36.653,71.49,256,0.900,bicubic,-64.860,-35.893,-7 +cait_s24_224,31.200,68.800,64.560,35.440,46.92,224,1.000,bicubic,-65.180,-34.590,-21 efficientnet_b4,30.867,69.133,64.600,35.400,19.34,384,1.000,bicubic,-65.283,-34.600,-12 -resnetrs200,30.773,69.227,63.320,36.680,93.21,320,1.000,bicubic,-65.757,-36.030,-25 -cait_xxs24_384,30.027,69.973,63.933,36.067,12.03,384,1.000,bicubic,-65.233,-35.027,+22 -swsl_resnet50,29.867,70.133,63.853,36.147,25.56,224,0.875,bilinear,-65.543,-35.437,+17 -vit_deit_base_distilled_patch16_224,29.600,70.400,64.453,35.547,87.34,224,0.900,bicubic,-66.490,-34.807,-13 -ssl_resnext101_32x8d,29.040,70.960,60.973,39.027,88.79,224,0.875,bilinear,-66.430,-38.137,+10 -resnet101d,28.987,71.013,62.053,37.947,44.57,320,1.000,bicubic,-67.303,-37.177,-20 -resnetrs152,28.920,71.080,60.520,39.480,86.62,320,1.000,bicubic,-67.660,-38.720,-34 -vit_deit_base_patch16_224,27.440,72.560,58.893,41.107,86.57,224,0.900,bicubic,-68.000,-39.947,+10 -resnetv2_50x1_bitm,27.347,72.653,63.547,36.453,25.55,480,1.000,bilinear,-67.703,-35.613,+24 -nfnet_l0,26.493,73.507,61.987,38.013,35.07,288,1.000,bicubic,-69.597,-37.313,-18 -tf_efficientnet_b4,26.293,73.707,60.107,39.893,19.34,380,0.922,bicubic,-69.607,-39.063,-12 -tf_efficientnet_b4_ap,26.240,73.760,60.227,39.773,19.34,380,0.922,bicubic,-69.920,-39.053,-25 -regnety_032,26.213,73.787,60.987,39.013,19.44,288,1.000,bicubic,-69.757,-38.203,-19 -ecaresnet50t,26.133,73.867,60.027,39.973,25.57,320,0.950,bicubic,-69.377,-39.093,0 -ecaresnet101d,26.027,73.973,58.987,41.013,44.57,224,0.875,bicubic,-69.503,-40.143,-2 -eca_nfnet_l0,25.013,74.987,60.360,39.640,24.14,288,1.000,bicubic,-70.917,-38.850,-19 -tnt_s_patch16_224,24.733,75.267,58.187,41.813,23.76,224,0.900,bicubic,-70.307,-40.643,+19 -ssl_resnext101_32x4d,24.173,75.827,57.413,42.587,44.18,224,0.875,bilinear,-71.267,-41.717,-2 -tf_efficientnet_b2_ns,24.013,75.987,57.293,42.707,9.11,260,0.890,bicubic,-71.757,-41.827,-12 -nasnetalarge,23.493,76.507,55.027,44.973,88.75,331,0.911,bicubic,-72.187,-43.903,-9 -pnasnet5large,23.333,76.667,53.640,46.360,86.06,331,0.911,bicubic,-72.377,-45.280,-11 -efficientnet_b3,23.213,76.787,55.960,44.040,12.23,320,1.000,bicubic,-72.497,-43.080,-13 -pit_s_distilled_224,22.360,77.640,57.120,42.880,24.04,224,0.900,bicubic,-72.880,-41.930,+2 -tresnet_m,21.680,78.320,53.840,46.160,31.39,224,0.875,bilinear,-74.040,-45.190,-16 -swin_tiny_patch4_window7_224,21.173,78.827,55.973,44.027,28.29,224,0.900,bicubic,-73.967,-42.877,+2 -pit_s_224,21.080,78.920,53.573,46.427,23.46,224,0.900,bicubic,-73.510,-45.137,+33 -resnetrs101,20.893,79.107,52.813,47.187,63.62,288,0.940,bicubic,-74.537,-46.217,-8 -vit_deit_small_distilled_patch16_224,20.707,79.293,55.133,44.867,22.44,224,0.900,bicubic,-74.003,-43.897,+23 -resnest50d_4s2x40d,20.387,79.613,52.800,47.200,30.42,224,0.875,bicubic,-74.573,-46.270,+10 -ssl_resnext50_32x4d,20.000,80.000,53.613,46.387,25.03,224,0.875,bilinear,-74.870,-45.267,+15 -tresnet_xl,19.640,80.360,53.133,46.867,78.44,224,0.875,bilinear,-75.800,-45.917,-14 -gluon_senet154,19.307,80.693,47.533,52.467,115.09,224,0.875,bicubic,-75.613,-51.227,+10 -rexnet_200,19.227,80.773,52.720,47.280,16.37,224,0.875,bicubic,-75.713,-46.290,+7 -repvgg_b3,19.107,80.893,50.253,49.747,123.09,224,0.875,bilinear,-75.463,-48.527,+26 +resnetrs200,30.773,69.227,63.320,36.680,93.21,320,1.000,bicubic,-65.757,-36.030,-28 +cait_xxs24_384,30.027,69.973,63.933,36.067,12.03,384,1.000,bicubic,-65.233,-35.027,+30 +twins_pcpvt_base,29.960,70.040,64.587,35.413,43.83,224,0.900,bicubic,-65.830,-34.543,+6 +swsl_resnet50,29.867,70.133,63.853,36.147,25.56,224,0.875,bilinear,-65.543,-35.437,+23 +vit_deit_base_distilled_patch16_224,29.600,70.400,64.453,35.547,87.34,224,0.900,bicubic,-66.490,-34.737,-11 +convit_base,29.520,70.480,61.787,38.213,86.54,224,0.875,bicubic,-66.030,-37.083,+10 +ssl_resnext101_32x8d,29.040,70.960,60.973,39.027,88.79,224,0.875,bilinear,-66.430,-38.137,+15 +tf_efficientnetv2_s,29.040,70.960,61.213,38.787,21.46,384,1.000,bicubic,-67.300,-37.987,-26 +resnet101d,28.987,71.013,62.053,37.947,44.57,320,1.000,bicubic,-67.303,-37.177,-25 +resnetrs152,28.920,71.080,60.520,39.480,86.62,320,1.000,bicubic,-67.660,-38.720,-40 +coat_lite_small,27.547,72.453,58.547,41.453,19.84,224,0.900,bicubic,-67.993,-40.313,+6 +vit_deit_base_patch16_224,27.440,72.560,58.893,41.107,86.57,224,0.900,bicubic,-68.000,-40.157,+12 +resnetv2_50x1_bitm,27.347,72.653,63.547,36.453,25.55,480,1.000,bilinear,-67.703,-35.613,+32 +nfnet_l0,26.493,73.507,61.987,38.013,35.07,288,1.000,bicubic,-69.597,-37.273,-22 +tf_efficientnet_b4,26.293,73.707,60.107,39.893,19.34,380,0.922,bicubic,-69.607,-39.063,-15 +tf_efficientnet_b4_ap,26.240,73.760,60.227,39.773,19.34,380,0.922,bicubic,-69.920,-38.833,-29 +regnety_032,26.213,73.787,60.987,39.013,19.44,288,1.000,bicubic,-69.757,-38.203,-22 +ecaresnet50t,26.133,73.867,60.027,39.973,25.57,320,0.950,bicubic,-69.377,-39.093,+2 +ecaresnet101d,26.027,73.973,58.987,41.013,44.57,224,0.875,bicubic,-69.503,-40.143,-1 +visformer_small,25.840,74.160,58.907,41.093,40.22,224,0.900,bicubic,-69.650,-39.993,+1 +coat_mini,25.520,74.480,57.693,42.307,10.34,224,0.900,bicubic,-69.450,-41.087,+31 +convit_small,25.093,74.907,57.280,42.720,27.78,224,0.875,bicubic,-70.107,-41.620,+13 +eca_nfnet_l0,25.013,74.987,60.360,39.640,24.14,288,1.000,bicubic,-70.917,-38.850,-25 +tnt_s_patch16_224,24.733,75.267,58.187,41.813,23.76,224,0.900,bicubic,-70.307,-40.643,+24 +ssl_resnext101_32x4d,24.173,75.827,57.413,42.587,44.18,224,0.875,bilinear,-71.267,-41.427,0 +twins_svt_small,24.133,75.867,57.147,42.853,24.06,224,0.900,bicubic,-71.067,-41.733,+10 +tf_efficientnet_b2_ns,24.013,75.987,57.293,42.707,9.11,260,0.890,bicubic,-71.757,-41.827,-17 +nasnetalarge,23.493,76.507,55.027,44.973,88.75,331,0.911,bicubic,-72.187,-43.903,-14 +levit_384,23.440,76.560,56.387,43.613,39.13,224,0.900,bicubic,-72.090,-42.663,-10 +pnasnet5large,23.333,76.667,53.640,46.360,86.06,331,0.911,bicubic,-72.377,-45.400,-18 +efficientnet_b3,23.213,76.787,55.960,44.040,12.23,320,1.000,bicubic,-72.497,-42.960,-18 +twins_pcpvt_small,22.720,77.280,56.853,43.147,24.11,224,0.900,bicubic,-72.490,-42.027,+2 +pit_s_distilled_224,22.360,77.640,57.120,42.880,24.04,224,0.900,bicubic,-72.880,-41.930,0 +tresnet_m,21.680,78.320,53.840,46.160,31.39,224,0.875,bilinear,-74.040,-45.190,-23 +swin_tiny_patch4_window7_224,21.173,78.827,55.973,44.027,28.29,224,0.900,bicubic,-73.967,-42.877,+4 +pit_s_224,21.080,78.920,53.573,46.427,23.46,224,0.900,bicubic,-73.510,-45.137,+37 +resnet51q,20.960,79.040,55.720,44.280,35.70,288,1.000,bilinear,-74.900,-43.400,-34 +resnetrs101,20.893,79.107,52.813,47.187,63.62,288,0.940,bicubic,-74.537,-46.217,-12 +vit_deit_small_distilled_patch16_224,20.707,79.293,55.133,44.867,22.44,224,0.900,bicubic,-74.003,-43.897,+26 +resnest50d_4s2x40d,20.387,79.613,52.800,47.200,30.42,224,0.875,bicubic,-74.573,-46.270,+13 +ssl_resnext50_32x4d,20.000,80.000,53.613,46.387,25.03,224,0.875,bilinear,-74.870,-45.267,+18 +tresnet_xl,19.640,80.360,53.133,46.867,78.44,224,0.875,bilinear,-75.800,-45.997,-19 +gluon_senet154,19.307,80.693,47.533,52.467,115.09,224,0.875,bicubic,-75.613,-51.227,+13 +rexnet_200,19.227,80.773,52.720,47.280,16.37,224,0.875,bicubic,-75.713,-46.290,+10 +levit_256,19.200,80.800,50.067,49.933,18.89,224,0.900,bicubic,-75.810,-48.843,+5 +repvgg_b3,19.107,80.893,50.253,49.747,123.09,224,0.875,bilinear,-75.463,-48.527,+28 legacy_senet154,19.053,80.947,47.947,52.053,115.09,224,0.875,bilinear,-76.017,-50.883,-3 -vit_deit_small_patch16_224,18.907,81.093,51.413,48.587,22.05,224,0.900,bicubic,-75.493,-47.327,+36 -gluon_seresnext101_64x4d,18.907,81.093,49.187,50.813,88.23,224,0.875,bicubic,-76.023,-49.643,+5 -tf_efficientnet_b1_ns,18.693,81.307,51.667,48.333,7.79,240,0.882,bicubic,-76.477,-47.443,-12 -seresnext50_32x4d,18.360,81.640,50.973,49.027,27.56,224,0.875,bicubic,-76.680,-47.907,-4 -cait_xxs36_224,18.253,81.747,49.427,50.573,17.30,224,1.000,bicubic,-76.007,-49.293,+45 -ecaresnet50d,18.227,81.773,51.880,48.120,25.58,224,0.875,bicubic,-76.403,-47.010,+13 -tf_efficientnet_lite4,18.133,81.867,50.707,49.293,13.01,380,0.920,bilinear,-76.757,-48.313,+2 -resnest50d_1s4x24d,17.693,82.307,49.800,50.200,25.68,224,0.875,bicubic,-77.057,-49.180,+5 -gluon_seresnext101_32x4d,17.373,82.627,46.373,53.627,48.96,224,0.875,bicubic,-77.547,-52.437,-1 -resnest50d,17.373,82.627,50.707,49.293,27.48,224,0.875,bilinear,-77.457,-48.173,+1 -efficientnet_el,17.347,82.653,49.987,50.013,10.59,300,0.904,bicubic,-77.773,-49.003,-18 -inception_v4,17.267,82.733,45.920,54.080,42.68,299,0.875,bicubic,-77.113,-52.660,+28 -tf_efficientnet_b3_ap,17.187,82.813,49.680,50.320,12.23,300,0.904,bicubic,-78.133,-49.220,-26 -tf_efficientnet_b3,17.000,83.000,49.267,50.733,12.23,300,0.904,bicubic,-78.010,-49.643,-12 -xception71,17.000,83.000,45.520,54.480,42.34,299,0.903,bicubic,-77.280,-53.120,+32 -gluon_resnext101_64x4d,16.853,83.147,44.213,55.787,83.46,224,0.875,bicubic,-77.817,-54.437,+1 -tresnet_l,16.600,83.400,49.920,50.080,55.99,224,0.875,bilinear,-78.690,-49.090,-29 +mixer_b16_224_miil,19.053,80.947,51.227,48.773,59.88,224,0.875,bilinear,-76.247,-47.653,-18 +vit_deit_small_patch16_224,18.907,81.093,51.413,48.587,22.05,224,0.900,bicubic,-75.493,-47.277,+39 +gluon_seresnext101_64x4d,18.907,81.093,49.187,50.813,88.23,224,0.875,bicubic,-76.023,-49.643,+6 +tf_efficientnet_b1_ns,18.693,81.307,51.667,48.333,7.79,240,0.882,bicubic,-76.477,-47.443,-14 +seresnext50_32x4d,18.360,81.640,50.973,49.027,27.56,224,0.875,bicubic,-76.680,-47.907,-5 +cait_xxs36_224,18.253,81.747,49.427,50.573,17.30,224,1.000,bicubic,-76.007,-49.033,+48 +ecaresnet50d,18.227,81.773,51.893,48.107,25.58,224,0.875,bicubic,-76.403,-46.997,+14 +tf_efficientnet_lite4,18.133,81.867,50.707,49.293,13.01,380,0.920,bilinear,-76.757,-48.313,+3 +resnest50d_1s4x24d,17.693,82.307,49.800,50.200,25.68,224,0.875,bicubic,-77.057,-49.180,+6 +gluon_seresnext101_32x4d,17.373,82.627,46.373,53.627,48.96,224,0.875,bicubic,-77.547,-52.437,0 +resnest50d,17.373,82.627,50.707,49.293,27.48,224,0.875,bilinear,-77.457,-48.173,+2 +efficientnet_el,17.347,82.653,49.987,50.013,10.59,300,0.904,bicubic,-77.773,-49.003,-19 +inception_v4,17.267,82.733,45.920,54.080,42.68,299,0.875,bicubic,-77.113,-52.660,+30 +tf_efficientnet_b3_ap,17.187,82.813,49.680,50.320,12.23,300,0.904,bicubic,-78.133,-49.220,-32 +xception71,17.000,83.000,45.520,54.480,42.34,299,0.903,bicubic,-77.280,-53.120,+34 +tf_efficientnet_b3,17.000,83.000,49.267,50.733,12.23,300,0.904,bicubic,-78.010,-49.623,-13 +gluon_resnext101_64x4d,16.853,83.147,44.213,55.787,83.46,224,0.875,bicubic,-77.817,-54.437,+2 +tf_efficientnetv2_b3,16.667,83.333,48.680,51.320,14.36,300,0.904,bicubic,-78.493,-50.140,-27 +tresnet_l,16.600,83.400,49.920,50.080,55.99,224,0.875,bilinear,-78.690,-49.090,-35 gluon_resnet152_v1d,16.573,83.427,44.280,55.720,60.21,224,0.875,bicubic,-78.167,-54.460,-4 -gluon_resnet152_v1s,16.573,83.427,44.533,55.467,60.32,224,0.875,bicubic,-78.467,-54.397,-20 -inception_resnet_v2,16.573,83.427,44.960,55.040,55.84,299,0.897,bicubic,-77.967,-53.890,+8 -gluon_xception65,16.440,83.560,46.027,53.973,39.92,299,0.903,bicubic,-77.820,-52.433,+30 -gernet_l,16.373,83.627,47.213,52.787,31.08,256,0.875,bilinear,-78.717,-51.687,-28 -wide_resnet50_2,16.280,83.720,48.347,51.653,68.88,224,0.875,bicubic,-78.800,-50.623,-27 -ens_adv_inception_resnet_v2,16.240,83.760,43.640,56.360,55.84,299,0.897,bicubic,-77.920,-55.000,+37 +gluon_resnet152_v1s,16.573,83.427,44.533,55.467,60.32,224,0.875,bicubic,-78.467,-54.397,-22 +inception_resnet_v2,16.573,83.427,44.960,55.040,55.84,299,0.897,bicubic,-77.967,-53.830,+7 +gluon_xception65,16.440,83.560,46.027,53.973,39.92,299,0.903,bicubic,-77.820,-52.693,+30 +gernet_l,16.373,83.627,47.213,52.787,31.08,256,0.875,bilinear,-78.717,-51.687,-30 +wide_resnet50_2,16.280,83.720,48.347,51.653,68.88,224,0.875,bicubic,-78.800,-50.623,-29 +ens_adv_inception_resnet_v2,16.240,83.760,43.640,56.360,55.84,299,0.897,bicubic,-77.920,-55.000,+39 repvgg_b3g4,16.213,83.787,47.653,52.347,83.83,224,0.875,bilinear,-78.307,-51.317,+5 -xception65,16.027,83.973,43.773,56.227,39.92,299,0.903,bicubic,-77.733,-54.597,+62 +xception65,16.027,83.973,43.773,56.227,39.92,299,0.903,bicubic,-77.733,-54.597,+65 ssl_resnet50,15.960,84.040,49.467,50.533,25.56,224,0.875,bilinear,-78.490,-49.453,+9 -regnety_320,15.627,84.373,44.827,55.173,145.05,224,0.875,bicubic,-78.913,-53.963,-1 -ecaresnet101d_pruned,15.600,84.400,48.027,51.973,24.88,224,0.875,bicubic,-79.480,-50.953,-34 -ecaresnet26t,15.467,84.533,47.920,52.080,16.01,320,0.950,bicubic,-78.843,-50.800,+15 +regnety_320,15.627,84.373,44.827,55.173,145.05,224,0.875,bicubic,-78.913,-54.023,0 +ecaresnet101d_pruned,15.600,84.400,48.027,51.973,24.88,224,0.875,bicubic,-79.480,-50.953,-36 +ecaresnet26t,15.467,84.533,47.920,52.080,16.01,320,0.950,bicubic,-78.843,-50.800,+16 +coat_tiny,15.413,84.587,45.600,54.400,5.50,224,0.900,bicubic,-78.177,-52.830,+72 skresnext50_32x4d,15.373,84.627,44.493,55.507,27.48,224,0.875,bicubic,-78.887,-54.077,+18 -cait_xxs24_224,15.160,84.840,44.960,55.040,11.96,224,1.000,bicubic,-78.440,-53.480,+67 -ecaresnetlight,15.160,84.840,45.827,54.173,30.16,224,0.875,bicubic,-79.610,-52.973,-21 -rexnet_150,14.720,85.280,46.907,53.093,9.73,224,0.875,bicubic,-79.760,-51.883,0 +cait_xxs24_224,15.160,84.840,44.960,55.040,11.96,224,1.000,bicubic,-78.440,-53.480,+69 +ecaresnetlight,15.160,84.840,45.827,54.173,30.16,224,0.875,bicubic,-79.610,-52.973,-22 +levit_192,14.893,85.107,44.920,55.080,10.95,224,0.900,bicubic,-79.277,-53.620,+25 +rexnet_150,14.720,85.280,46.907,53.093,9.73,224,0.875,bicubic,-79.760,-51.883,-2 coat_lite_mini,14.507,85.493,44.507,55.493,11.01,224,0.900,bicubic,-79.553,-54.053,+35 -efficientnet_el_pruned,14.480,85.520,46.120,53.880,10.59,300,0.904,bicubic,-79.920,-52.570,+3 -efficientnet_b2,14.440,85.560,46.080,53.920,9.11,288,1.000,bicubic,-80.170,-52.630,-15 -legacy_seresnext101_32x4d,14.147,85.853,42.973,57.027,48.96,224,0.875,bilinear,-80.223,-55.677,+3 -seresnet50,14.147,85.853,45.467,54.533,28.09,224,0.875,bicubic,-80.403,-53.283,-13 -gernet_m,14.013,85.987,46.067,53.933,21.14,224,0.875,bilinear,-80.607,-52.793,-19 -gluon_resnext101_32x4d,13.867,86.133,41.653,58.347,44.18,224,0.875,bicubic,-80.663,-56.977,-12 -gluon_seresnext50_32x4d,13.600,86.400,43.760,56.240,27.56,224,0.875,bicubic,-80.740,-54.850,+1 -repvgg_b2g4,13.440,86.560,43.787,56.213,61.76,224,0.875,bilinear,-80.420,-54.803,+38 +efficientnet_el_pruned,14.480,85.520,46.120,53.880,10.59,300,0.904,bicubic,-79.920,-52.620,+1 +efficientnet_b2,14.440,85.560,46.080,53.920,9.11,288,1.000,bicubic,-80.170,-52.630,-17 +legacy_seresnext101_32x4d,14.147,85.853,42.973,57.027,48.96,224,0.875,bilinear,-80.223,-55.677,+2 +seresnet50,14.147,85.853,45.467,54.533,28.09,224,0.875,bicubic,-80.403,-53.283,-15 +gernet_m,14.013,85.987,46.067,53.933,21.14,224,0.875,bilinear,-80.607,-52.793,-21 +gluon_resnext101_32x4d,13.867,86.133,41.653,58.347,44.18,224,0.875,bicubic,-80.663,-56.977,-14 +gluon_seresnext50_32x4d,13.600,86.400,43.760,56.240,27.56,224,0.875,bicubic,-80.740,-54.850,0 +repvgg_b2g4,13.440,86.560,43.787,56.213,61.76,224,0.875,bilinear,-80.420,-54.803,+39 ese_vovnet39b,13.320,86.680,43.813,56.187,24.57,224,0.875,bicubic,-80.770,-54.847,+24 -regnetx_320,13.307,86.693,40.720,59.280,107.81,224,0.875,bicubic,-81.153,-58.020,-10 -pit_xs_distilled_224,13.240,86.760,44.573,55.427,11.00,224,0.900,bicubic,-80.570,-54.097,+39 -efficientnet_b3_pruned,13.173,86.827,45.213,54.787,9.86,300,0.904,bicubic,-81.457,-53.547,-27 -gluon_resnet101_v1d,13.160,86.840,41.493,58.507,44.57,224,0.875,bicubic,-81.060,-57.057,+7 -mixnet_xl,13.120,86.880,43.253,56.747,11.90,224,0.875,bicubic,-81.070,-55.087,+8 +regnetx_320,13.307,86.693,40.720,59.280,107.81,224,0.875,bicubic,-81.153,-58.020,-12 +pit_xs_distilled_224,13.240,86.760,44.573,55.427,11.00,224,0.900,bicubic,-80.570,-54.097,+40 +efficientnet_b3_pruned,13.173,86.827,45.213,54.787,9.86,300,0.904,bicubic,-81.457,-53.547,-29 +gluon_resnet101_v1d,13.160,86.840,41.493,58.507,44.57,224,0.875,bicubic,-81.060,-57.057,+6 +mixnet_xl,13.120,86.880,43.253,56.747,11.90,224,0.875,bicubic,-81.070,-55.087,+7 nf_regnet_b1,13.027,86.973,44.413,55.587,10.22,288,0.900,bicubic,-81.103,-54.217,+15 -pit_xs_224,12.813,87.187,42.840,57.160,10.62,224,0.900,bicubic,-80.297,-55.470,+79 -gluon_inception_v3,12.640,87.360,40.493,59.507,23.83,299,0.875,bicubic,-80.820,-58.077,+56 -coat_lite_tiny,12.520,87.480,41.160,58.840,5.72,224,0.900,bicubic,-80.720,-57.100,+70 -regnety_120,12.427,87.573,42.200,57.800,51.82,224,0.875,bicubic,-82.053,-56.610,-21 -efficientnet_em,12.360,87.640,43.880,56.120,6.90,240,0.882,bicubic,-81.480,-54.930,+27 -nf_resnet50,12.320,87.680,46.760,53.240,25.56,288,0.940,bicubic,-82.270,-52.050,-33 -vit_small_patch16_224,12.147,87.853,40.320,59.680,48.75,224,0.900,bicubic,-80.613,-57.610,+89 +pit_xs_224,12.813,87.187,42.840,57.160,10.62,224,0.900,bicubic,-80.297,-55.470,+82 +gluon_inception_v3,12.640,87.360,40.493,59.507,23.83,299,0.875,bicubic,-80.820,-58.037,+59 +coat_lite_tiny,12.520,87.480,41.160,58.840,5.72,224,0.900,bicubic,-80.720,-57.100,+73 +regnety_120,12.427,87.573,42.200,57.800,51.82,224,0.875,bicubic,-82.053,-56.610,-23 +efficientnet_em,12.360,87.640,43.880,56.120,6.90,240,0.882,bicubic,-81.480,-54.930,+28 +nf_resnet50,12.320,87.680,46.760,53.240,25.56,288,0.940,bicubic,-82.270,-52.050,-35 +vit_small_patch16_224,12.147,87.853,40.320,59.680,48.75,224,0.900,bicubic,-80.613,-57.610,+93 hrnet_w64,12.027,87.973,40.787,59.213,128.06,224,0.875,bilinear,-81.983,-57.823,+14 -cspdarknet53,12.013,87.987,43.253,56.747,27.64,256,0.887,bilinear,-82.647,-55.547,-41 -gluon_resnet101_v1s,11.880,88.120,40.973,59.027,44.67,224,0.875,bicubic,-82.840,-57.847,-45 -resnet50d,11.693,88.307,42.453,57.547,25.58,224,0.875,bicubic,-82.567,-56.267,-13 -dpn92,11.627,88.373,40.267,59.733,37.67,224,0.875,bicubic,-82.603,-58.463,-9 -xception41,11.600,88.400,39.133,60.867,26.97,299,0.903,bicubic,-81.830,-59.297,+48 +cspdarknet53,12.013,87.987,43.253,56.747,27.64,256,0.887,bilinear,-82.647,-55.547,-43 +gluon_resnet101_v1s,11.880,88.120,40.973,59.027,44.67,224,0.875,bicubic,-82.840,-57.847,-47 +resnet50d,11.693,88.307,42.453,57.547,25.58,224,0.875,bicubic,-82.567,-56.267,-14 +dpn92,11.627,88.373,40.267,59.733,37.67,224,0.875,bicubic,-82.603,-58.463,-10 +xception41,11.600,88.400,39.133,60.867,26.97,299,0.903,bicubic,-81.830,-59.297,+50 dla102x2,11.573,88.427,41.293,58.707,41.28,224,0.875,bilinear,-82.377,-57.197,+10 -regnety_080,11.413,88.587,40.613,59.387,39.18,224,0.875,bicubic,-82.757,-58.067,-6 -efficientnet_b2_pruned,11.360,88.640,42.027,57.973,8.31,260,0.890,bicubic,-82.780,-56.503,-3 -tf_efficientnet_el,11.333,88.667,42.040,57.960,10.59,300,0.904,bicubic,-83.077,-56.670,-30 -gluon_resnet152_v1c,11.093,88.907,37.120,62.880,60.21,224,0.875,bicubic,-83.067,-61.480,-8 +levit_128,11.427,88.573,40.267,59.733,9.21,224,0.900,bicubic,-81.913,-58.113,+52 +regnety_080,11.413,88.587,40.613,59.387,39.18,224,0.875,bicubic,-82.757,-58.067,-7 +efficientnet_b2_pruned,11.360,88.640,42.027,57.973,8.31,260,0.890,bicubic,-82.780,-56.503,-4 +tf_efficientnet_el,11.333,88.667,42.040,57.960,10.59,300,0.904,bicubic,-83.077,-56.670,-32 +gluon_resnet152_v1c,11.093,88.907,37.120,62.880,60.21,224,0.875,bicubic,-83.067,-61.480,-9 +dpn107,11.080,88.920,38.693,61.307,86.92,224,0.875,bicubic,-83.230,-59.787,-27 hrnet_w48,11.080,88.920,40.320,59.680,77.47,224,0.875,bilinear,-82.840,-58.290,+5 -dpn107,11.080,88.920,38.693,61.307,86.92,224,0.875,bicubic,-83.230,-59.787,-25 -ecaresnet50d_pruned,11.027,88.973,41.947,58.053,19.94,224,0.875,bicubic,-83.193,-56.783,-17 -adv_inception_v3,11.013,88.987,36.720,63.280,23.83,299,0.875,bicubic,-81.867,-61.420,+68 -tf_efficientnet_b0_ns,10.933,89.067,40.067,59.933,5.29,224,0.875,bicubic,-82.697,-58.573,+24 -tf_inception_v3,10.840,89.160,36.853,63.147,23.83,299,0.875,bicubic,-82.480,-61.177,+43 -resnext50_32x4d,10.800,89.200,40.307,59.693,25.03,224,0.875,bicubic,-83.300,-58.043,-8 -dpn131,10.787,89.213,37.200,62.800,79.25,224,0.875,bicubic,-83.223,-61.520,-5 -tf_efficientnet_b2_ap,10.533,89.467,40.107,59.893,9.11,260,0.890,bicubic,-83.957,-58.513,-46 -resnext50d_32x4d,10.413,89.587,39.733,60.267,25.05,224,0.875,bicubic,-83.767,-58.837,-20 -rexnet_130,10.400,89.600,41.547,58.453,7.56,224,0.875,bicubic,-83.500,-56.853,-3 +ecaresnet50d_pruned,11.027,88.973,41.947,58.053,19.94,224,0.875,bicubic,-83.193,-56.783,-19 +tf_efficientnetv2_b2,11.027,88.973,39.760,60.240,10.10,260,0.890,bicubic,-83.393,-58.810,-38 +adv_inception_v3,11.013,88.987,36.720,63.280,23.83,299,0.875,bicubic,-81.867,-61.420,+70 +tf_efficientnet_b0_ns,10.933,89.067,40.067,59.933,5.29,224,0.875,bicubic,-82.697,-58.573,+23 +tf_inception_v3,10.840,89.160,36.853,63.147,23.83,299,0.875,bicubic,-82.480,-61.177,+44 +resnext50_32x4d,10.800,89.200,40.307,59.693,25.03,224,0.875,bicubic,-83.300,-58.043,-10 +dpn131,10.787,89.213,37.200,62.800,79.25,224,0.875,bicubic,-83.223,-61.520,-7 +tf_efficientnet_b2_ap,10.533,89.467,40.107,59.893,9.11,260,0.890,bicubic,-83.957,-58.513,-50 +resnext50d_32x4d,10.413,89.587,39.733,60.267,25.05,224,0.875,bicubic,-83.767,-58.837,-23 +rexnet_130,10.400,89.600,41.547,58.453,7.56,224,0.875,bicubic,-83.500,-56.853,-4 hrnet_w44,10.320,89.680,39.507,60.493,67.06,224,0.875,bilinear,-83.230,-59.193,+21 -resnext101_32x8d,10.187,89.813,37.827,62.173,88.79,224,0.875,bilinear,-83.643,-60.753,+2 -regnetx_160,10.147,89.853,38.000,62.000,54.28,224,0.875,bicubic,-83.973,-60.750,-16 -dpn98,10.133,89.867,36.587,63.413,61.57,224,0.875,bicubic,-83.997,-61.983,-19 -cspresnext50,10.120,89.880,40.373,59.627,20.57,224,0.875,bilinear,-84.360,-58.307,-52 -legacy_seresnext50_32x4d,10.107,89.893,39.200,60.800,27.56,224,0.875,bilinear,-83.623,-59.380,+7 -resnetrs50,10.093,89.907,37.507,62.493,35.69,224,0.910,bicubic,-84.217,-61.133,-40 -inception_v3,10.027,89.973,35.227,64.773,23.83,299,0.875,bicubic,-82.693,-62.743,+63 -efficientnet_b1,10.013,89.987,37.547,62.453,7.79,256,1.000,bicubic,-83.237,-60.743,+34 -xception,9.987,90.013,38.027,61.973,22.86,299,0.897,bicubic,-83.473,-60.503,+20 -regnety_064,9.947,90.053,39.067,60.933,30.58,224,0.875,bicubic,-84.203,-59.663,-28 -dpn68b,9.787,90.213,38.053,61.947,12.61,224,0.875,bicubic,-83.903,-60.307,+4 -gluon_resnet152_v1b,9.747,90.253,36.067,63.933,60.19,224,0.875,bicubic,-84.333,-62.383,-23 -tf_efficientnet_lite3,9.667,90.333,39.000,61.000,8.20,300,0.904,bilinear,-84.533,-59.640,-37 -tf_efficientnet_b2,9.653,90.347,38.880,61.120,9.11,260,0.890,bicubic,-84.707,-59.730,-52 -tf_efficientnet_cc_b1_8e,9.573,90.427,36.773,63.227,39.72,240,0.882,bicubic,-84.327,-61.487,-18 -res2net101_26w_4s,9.520,90.480,35.027,64.973,45.21,224,0.875,bilinear,-84.230,-63.283,-6 +resnext101_32x8d,10.187,89.813,37.827,62.173,88.79,224,0.875,bilinear,-83.643,-60.753,+1 +regnetx_160,10.147,89.853,38.000,62.000,54.28,224,0.875,bicubic,-83.973,-60.750,-18 +dpn98,10.133,89.867,36.587,63.413,61.57,224,0.875,bicubic,-83.997,-61.983,-21 +cspresnext50,10.120,89.880,40.373,59.627,20.57,224,0.875,bilinear,-84.360,-58.307,-56 +legacy_seresnext50_32x4d,10.107,89.893,39.200,60.800,27.56,224,0.875,bilinear,-83.623,-59.380,+6 +resnetrs50,10.093,89.907,37.507,62.493,35.69,224,0.910,bicubic,-84.217,-61.133,-43 +inception_v3,10.027,89.973,35.227,64.773,23.83,299,0.875,bicubic,-82.693,-62.743,+65 +efficientnet_b1,10.013,89.987,37.547,62.453,7.79,256,1.000,bicubic,-83.237,-60.743,+35 +xception,9.987,90.013,38.027,61.973,22.86,299,0.897,bicubic,-83.473,-60.543,+19 +regnety_064,9.947,90.053,39.067,60.933,30.58,224,0.875,bicubic,-84.203,-59.663,-30 +dpn68b,9.787,90.213,38.053,61.947,12.61,224,0.875,bicubic,-83.903,-60.457,+2 +gluon_resnet152_v1b,9.747,90.253,36.067,63.933,60.19,224,0.875,bicubic,-84.333,-62.383,-25 +tf_efficientnet_lite3,9.667,90.333,39.000,61.000,8.20,300,0.904,bilinear,-84.533,-59.640,-40 +tf_efficientnet_b2,9.653,90.347,38.880,61.120,9.11,260,0.890,bicubic,-84.707,-59.730,-55 +tf_efficientnet_cc_b1_8e,9.573,90.427,36.773,63.227,39.72,240,0.882,bicubic,-84.327,-61.487,-19 +res2net101_26w_4s,9.520,90.480,35.027,64.973,45.21,224,0.875,bilinear,-84.230,-63.283,-7 legacy_seresnet152,9.347,90.653,37.413,62.587,66.82,224,0.875,bilinear,-84.053,-60.937,+16 -cspresnet50,9.253,90.747,39.640,60.360,21.62,256,0.887,bilinear,-84.487,-59.000,-7 +cspresnet50,9.253,90.747,39.640,60.360,21.62,256,0.887,bilinear,-84.487,-59.000,-8 hrnet_w40,9.227,90.773,36.893,63.107,57.56,224,0.875,bilinear,-84.263,-61.687,+8 -regnetx_120,9.187,90.813,37.200,62.800,46.11,224,0.875,bicubic,-85.053,-61.450,-48 -seresnext26d_32x4d,9.147,90.853,36.840,63.160,16.81,224,0.875,bicubic,-83.553,-61.310,+50 -resnest26d,9.080,90.920,37.853,62.147,17.07,224,0.875,bilinear,-84.250,-60.657,+13 -regnety_040,9.000,91.000,37.053,62.947,20.65,224,0.875,bicubic,-84.860,-61.597,-23 -gluon_resnext50_32x4d,8.947,91.053,36.333,63.667,25.03,224,0.875,bicubic,-84.863,-62.057,-18 -rexnet_100,8.893,91.107,36.373,63.627,4.80,224,0.875,bicubic,-84.137,-61.817,+29 -seresnext26t_32x4d,8.893,91.107,36.907,63.093,16.81,224,0.875,bicubic,-83.927,-61.463,+37 +regnetx_120,9.187,90.813,37.200,62.800,46.11,224,0.875,bicubic,-85.053,-61.450,-51 +seresnext26d_32x4d,9.147,90.853,36.840,63.160,16.81,224,0.875,bicubic,-83.553,-61.310,+52 +resnest26d,9.080,90.920,37.853,62.147,17.07,224,0.875,bilinear,-84.250,-60.777,+15 +regnety_040,9.000,91.000,37.053,62.947,20.65,224,0.875,bicubic,-84.860,-61.597,-24 +gluon_resnext50_32x4d,8.947,91.053,36.333,63.667,25.03,224,0.875,bicubic,-84.863,-62.057,-19 +rexnet_100,8.893,91.107,36.373,63.627,4.80,224,0.875,bicubic,-84.137,-61.817,+31 +seresnext26t_32x4d,8.893,91.107,36.907,63.093,16.81,224,0.875,bicubic,-83.927,-61.463,+39 mixnet_l,8.853,91.147,36.187,63.813,7.33,224,0.875,bicubic,-84.597,-62.033,+3 -mobilenetv3_large_100_miil,8.840,91.160,32.973,67.027,5.48,224,0.875,bilinear,-83.420,-64.667,+63 -dla169,8.640,91.360,36.040,63.960,53.39,224,0.875,bilinear,-84.700,-62.560,+5 -hrnet_w30,8.613,91.387,37.040,62.960,37.71,224,0.875,bilinear,-84.587,-61.370,+14 -mixer_b16_224,8.600,91.400,29.413,70.587,59.88,224,0.875,bicubic,-83.270,-67.837,+74 -legacy_seresnet101,8.533,91.467,36.013,63.987,49.33,224,0.875,bilinear,-84.747,-62.497,+9 -tf_efficientnet_b1_ap,8.453,91.547,35.253,64.747,7.79,240,0.882,bicubic,-85.237,-63.257,-19 -repvgg_b2,8.427,91.573,36.467,63.533,89.02,224,0.875,bilinear,-85.073,-61.893,-8 -resnetblur50,8.240,91.760,37.400,62.600,25.56,224,0.875,bicubic,-85.720,-61.190,-42 -dla102x,8.200,91.800,37.013,62.987,26.31,224,0.875,bilinear,-85.320,-61.497,-12 -hrnet_w32,8.040,91.960,37.507,62.493,41.23,224,0.875,bilinear,-85.490,-60.943,-14 -res2net50_26w_8s,8.000,92.000,33.853,66.147,48.40,224,0.875,bilinear,-85.540,-64.407,-16 -gluon_resnet101_v1c,7.987,92.013,33.360,66.640,44.57,224,0.875,bicubic,-85.683,-65.060,-23 -gluon_resnet50_v1d,7.920,92.080,35.000,65.000,25.58,224,0.875,bicubic,-85.850,-63.390,-33 -dla60_res2next,7.787,92.213,34.987,65.013,17.03,224,0.875,bilinear,-85.393,-63.423,+5 -densenetblur121d,7.720,92.280,34.733,65.267,8.00,224,0.875,bicubic,-84.190,-63.337,+61 -vit_deit_tiny_distilled_patch16_224,7.707,92.293,33.560,66.440,5.91,224,0.900,bicubic,-82.993,-64.010,+91 -dla60_res2net,7.560,92.440,34.627,65.373,20.85,224,0.875,bilinear,-85.620,-63.793,+1 -efficientnet_b1_pruned,7.440,92.560,34.533,65.467,6.33,240,0.882,bicubic,-85.330,-63.507,+20 -wide_resnet101_2,7.360,92.640,34.147,65.853,126.89,224,0.875,bilinear,-86.360,-64.393,-33 -regnetx_064,7.333,92.667,34.373,65.627,26.21,224,0.875,bicubic,-86.557,-64.257,-49 -vit_deit_tiny_patch16_224,7.307,92.693,30.707,69.293,5.72,224,0.900,bicubic,-82.363,-66.743,+99 -hardcorenas_e,7.240,92.760,33.293,66.707,8.07,224,0.875,bilinear,-85.330,-64.817,+29 -gluon_resnet101_v1b,7.227,92.773,32.773,67.227,44.55,224,0.875,bicubic,-86.523,-65.607,-41 -efficientnet_b0,7.213,92.787,34.013,65.987,5.29,224,0.875,bicubic,-85.477,-64.057,+21 -gluon_resnet50_v1s,7.213,92.787,33.507,66.493,25.68,224,0.875,bicubic,-86.407,-64.953,-34 -tf_mixnet_l,7.147,92.853,31.613,68.387,7.33,224,0.875,bicubic,-86.163,-66.417,-15 -tf_efficientnet_b1,7.133,92.867,33.040,66.960,7.79,240,0.882,bicubic,-86.367,-65.690,-29 -tf_efficientnet_cc_b0_8e,7.120,92.880,31.787,68.213,24.01,224,0.875,bicubic,-85.710,-66.393,+7 -hardcorenas_f,6.827,93.173,34.093,65.907,8.20,224,0.875,bilinear,-86.123,-64.067,+2 -selecsls60b,6.733,93.267,33.267,66.733,32.77,224,0.875,bicubic,-86.567,-65.013,-19 -ese_vovnet19b_dw,6.733,93.267,33.413,66.587,6.54,224,0.875,bicubic,-85.557,-64.677,+32 -efficientnet_es,6.707,93.293,33.840,66.160,5.44,224,0.875,bicubic,-86.433,-64.580,-12 -res2net50_26w_6s,6.693,93.307,31.653,68.347,37.05,224,0.875,bilinear,-86.717,-66.627,-28 -legacy_seresnext26_32x4d,6.627,93.373,33.253,66.747,16.79,224,0.875,bicubic,-86.013,-64.877,+14 -mixnet_m,6.627,93.373,32.053,67.947,5.01,224,0.875,bicubic,-85.803,-65.817,+21 +mobilenetv3_large_100_miil,8.840,91.160,32.973,67.027,5.48,224,0.875,bilinear,-83.420,-64.667,+64 +convit_tiny,8.840,91.160,34.360,65.640,5.71,224,0.875,bicubic,-81.790,-63.380,+111 +levit_128s,8.653,91.347,33.107,66.893,7.78,224,0.900,bicubic,-83.317,-64.953,+73 +dla169,8.640,91.360,36.040,63.960,53.39,224,0.875,bilinear,-84.700,-62.560,+3 +hrnet_w30,8.613,91.387,37.040,62.960,37.71,224,0.875,bilinear,-84.587,-61.370,+13 +mixer_b16_224,8.600,91.400,29.413,70.587,59.88,224,0.875,bicubic,-83.270,-67.837,+75 +legacy_seresnet101,8.533,91.467,36.013,63.987,49.33,224,0.875,bilinear,-84.747,-62.497,+8 +tf_efficientnet_b1_ap,8.453,91.547,35.253,64.747,7.79,240,0.882,bicubic,-85.237,-63.107,-21 +repvgg_b2,8.427,91.573,36.467,63.533,89.02,224,0.875,bilinear,-85.073,-62.263,-11 +resnetblur50,8.240,91.760,37.400,62.600,25.56,224,0.875,bicubic,-85.720,-61.190,-46 +dla102x,8.200,91.800,37.013,62.987,26.31,224,0.875,bilinear,-85.320,-61.497,-14 +hrnet_w32,8.040,91.960,37.507,62.493,41.23,224,0.875,bilinear,-85.490,-60.943,-16 +res2net50_26w_8s,8.000,92.000,33.853,66.147,48.40,224,0.875,bilinear,-85.540,-64.407,-18 +gluon_resnet101_v1c,7.987,92.013,33.360,66.640,44.57,224,0.875,bicubic,-85.683,-65.060,-26 +gluon_resnet50_v1d,7.920,92.080,35.000,65.000,25.58,224,0.875,bicubic,-85.850,-63.390,-36 +dla60_res2next,7.787,92.213,34.987,65.013,17.03,224,0.875,bilinear,-85.393,-63.423,+4 +densenetblur121d,7.720,92.280,34.733,65.267,8.00,224,0.875,bicubic,-84.190,-63.337,+62 +vit_deit_tiny_distilled_patch16_224,7.707,92.293,33.560,66.440,5.91,224,0.900,bicubic,-82.993,-64.010,+92 +tf_efficientnetv2_b1,7.693,92.307,34.653,65.347,8.14,240,0.882,bicubic,-86.247,-63.967,-53 +dla60_res2net,7.560,92.440,34.627,65.373,20.85,224,0.875,bilinear,-85.620,-63.793,-1 +efficientnet_b1_pruned,7.440,92.560,34.533,65.467,6.33,240,0.882,bicubic,-85.330,-63.507,+19 +wide_resnet101_2,7.360,92.640,34.147,65.853,126.89,224,0.875,bilinear,-86.360,-64.393,-37 +regnetx_064,7.333,92.667,34.373,65.627,26.21,224,0.875,bicubic,-86.557,-64.257,-53 +vit_deit_tiny_patch16_224,7.307,92.693,30.707,69.293,5.72,224,0.900,bicubic,-82.363,-66.743,+100 +hardcorenas_e,7.240,92.760,33.293,66.707,8.07,224,0.875,bilinear,-85.330,-64.817,+28 +gluon_resnet101_v1b,7.227,92.773,32.773,67.227,44.55,224,0.875,bicubic,-86.523,-65.607,-45 +efficientnet_b0,7.213,92.787,34.013,65.987,5.29,224,0.875,bicubic,-85.477,-64.057,+20 +gluon_resnet50_v1s,7.213,92.787,33.507,66.493,25.68,224,0.875,bicubic,-86.407,-64.953,-38 +tf_mixnet_l,7.147,92.853,31.613,68.387,7.33,224,0.875,bicubic,-86.163,-66.417,-17 +tf_efficientnet_b1,7.133,92.867,33.040,66.960,7.79,240,0.882,bicubic,-86.367,-65.320,-31 +tf_efficientnet_cc_b0_8e,7.120,92.880,31.787,68.213,24.01,224,0.875,bicubic,-85.710,-66.393,+6 +hardcorenas_f,6.827,93.173,34.093,65.907,8.20,224,0.875,bilinear,-86.123,-64.067,+1 +ese_vovnet19b_dw,6.733,93.267,33.413,66.587,6.54,224,0.875,bicubic,-85.557,-64.677,+31 +selecsls60b,6.733,93.267,33.267,66.733,32.77,224,0.875,bicubic,-86.567,-65.123,-20 +efficientnet_es,6.707,93.293,33.840,66.160,5.44,224,0.875,bicubic,-86.433,-64.580,-14 +res2net50_26w_6s,6.693,93.307,31.653,68.347,37.05,224,0.875,bilinear,-86.717,-66.627,-31 pit_ti_distilled_224,6.627,93.373,30.760,69.240,5.10,224,0.900,bicubic,-84.273,-66.940,+66 -skresnet34,6.480,93.520,31.547,68.453,22.28,224,0.875,bicubic,-85.910,-66.603,+22 -repvgg_b1,6.467,93.533,33.827,66.173,57.42,224,0.875,bilinear,-86.863,-64.803,-29 -hardcorenas_d,6.440,93.560,32.213,67.787,7.50,224,0.875,bilinear,-85.960,-65.857,+18 -dla60x,6.427,93.573,34.080,65.920,17.35,224,0.875,bilinear,-86.693,-64.430,-19 -resnet34d,6.400,93.600,31.493,68.507,21.82,224,0.875,bicubic,-86.280,-66.817,+5 -regnetx_080,6.307,93.693,32.320,67.680,39.57,224,0.875,bicubic,-87.563,-66.200,-70 +mixnet_m,6.627,93.373,32.053,67.947,5.01,224,0.875,bicubic,-85.803,-65.817,+20 +legacy_seresnext26_32x4d,6.627,93.373,33.253,66.747,16.79,224,0.875,bicubic,-86.013,-64.877,+13 +skresnet34,6.480,93.520,31.547,68.453,22.28,224,0.875,bicubic,-85.910,-66.603,+21 +repvgg_b1,6.467,93.533,33.827,66.173,57.42,224,0.875,bilinear,-86.863,-64.683,-32 +hardcorenas_d,6.440,93.560,32.213,67.787,7.50,224,0.875,bilinear,-85.960,-65.857,+17 +dla60x,6.427,93.573,34.080,65.920,17.35,224,0.875,bilinear,-86.693,-64.430,-21 +resnet34d,6.400,93.600,31.493,68.507,21.82,224,0.875,bicubic,-86.280,-66.817,+4 +regnetx_080,6.307,93.693,32.320,67.680,39.57,224,0.875,bicubic,-87.563,-66.200,-74 swsl_resnet18,6.240,93.760,31.600,68.400,11.69,224,0.875,bilinear,-84.450,-66.100,+65 -legacy_seresnet50,6.187,93.813,32.653,67.347,28.09,224,0.875,bilinear,-86.773,-65.537,-14 -pit_ti_224,6.120,93.880,30.227,69.773,4.85,224,0.900,bicubic,-83.820,-67.223,+72 -tv_resnet152,6.040,93.960,32.053,67.947,60.19,224,0.875,bilinear,-87.260,-66.337,-33 -regnetx_040,5.973,94.027,31.547,68.453,22.12,224,0.875,bicubic,-87.587,-66.993,-54 -tf_efficientnet_cc_b0_4e,5.973,94.027,29.600,70.400,13.31,224,0.875,bicubic,-86.617,-68.480,+2 -resnet50,5.933,94.067,29.093,70.907,25.56,224,0.875,bicubic,-87.877,-69.317,-72 -dla102,5.880,94.120,32.707,67.293,33.27,224,0.875,bilinear,-87.180,-65.833,-26 +legacy_seresnet50,6.187,93.813,32.653,67.347,28.09,224,0.875,bilinear,-86.773,-65.537,-15 +pit_ti_224,6.120,93.880,30.227,69.773,4.85,224,0.900,bicubic,-83.820,-67.223,+73 +tv_resnet152,6.040,93.960,32.053,67.947,60.19,224,0.875,bilinear,-87.260,-66.227,-36 +tf_efficientnet_cc_b0_4e,5.973,94.027,29.600,70.400,13.31,224,0.875,bicubic,-86.617,-68.480,+1 +regnetx_040,5.973,94.027,31.547,68.453,22.12,224,0.875,bicubic,-87.587,-66.993,-57 +resnet50,5.933,94.067,29.093,70.907,25.56,224,0.875,bicubic,-87.877,-69.317,-76 +tf_efficientnetv2_b0,5.893,94.107,30.773,69.227,7.14,224,0.875,bicubic,-87.217,-67.617,-28 +dla102,5.880,94.120,32.707,67.293,33.27,224,0.875,bilinear,-87.180,-65.833,-28 mixer_l16_224,5.867,94.133,18.533,81.467,208.20,224,0.875,bicubic,-81.283,-74.987,+84 -regnety_016,5.680,94.320,30.413,69.587,11.20,224,0.875,bicubic,-87.350,-67.947,-26 -selecsls60,5.653,94.347,32.507,67.493,30.67,224,0.875,bicubic,-87.377,-65.793,-25 -hardcorenas_c,5.640,94.360,30.400,69.600,5.52,224,0.875,bilinear,-86.380,-67.440,+17 -res2next50,5.627,94.373,30.867,69.133,24.67,224,0.875,bilinear,-87.213,-67.313,-21 -hrnet_w18,5.493,94.507,30.960,69.040,21.30,224,0.875,bilinear,-86.827,-67.280,+5 -resnest14d,5.480,94.520,28.547,71.453,10.61,224,0.875,bilinear,-86.240,-69.323,+26 -tf_efficientnet_lite2,5.360,94.640,30.907,69.093,6.09,260,0.890,bicubic,-87.290,-67.323,-11 -tf_efficientnet_em,5.347,94.653,31.107,68.893,6.90,240,0.882,bicubic,-87.583,-67.083,-27 -gernet_s,5.307,94.693,30.133,69.867,8.17,224,0.875,bilinear,-86.833,-68.057,+9 -tf_efficientnet_b0_ap,5.307,94.693,28.813,71.187,5.29,224,0.875,bicubic,-86.893,-69.207,+6 -densenet121,5.293,94.707,29.907,70.093,7.98,224,0.875,bicubic,-86.277,-68.123,+22 -repvgg_b1g4,5.293,94.707,30.813,69.187,39.97,224,0.875,bilinear,-87.687,-67.617,-34 -res2net50_26w_4s,5.160,94.840,29.360,70.640,25.70,224,0.875,bilinear,-87.340,-68.930,-10 -tf_mixnet_m,5.080,94.920,28.147,71.853,5.01,224,0.875,bicubic,-87.250,-69.743,-5 -tf_efficientnet_b0,5.067,94.933,28.800,71.200,5.29,224,0.875,bicubic,-87.183,-69.200,-2 -mobilenetv3_large_100,5.067,94.933,28.187,71.813,5.48,224,0.875,bicubic,-86.253,-69.523,+23 -res2net50_14w_8s,5.040,94.960,28.773,71.227,25.06,224,0.875,bilinear,-87.700,-69.407,-27 -hardcorenas_b,4.947,95.053,28.120,71.880,5.18,224,0.875,bilinear,-86.823,-69.660,+12 -mixnet_s,4.907,95.093,28.573,71.427,4.13,224,0.875,bicubic,-86.923,-69.117,+9 -mobilenetv3_rw,4.907,95.093,29.853,70.147,5.48,224,0.875,bicubic,-86.303,-67.807,+21 -gluon_resnet50_v1c,4.893,95.107,28.147,71.853,25.58,224,0.875,bicubic,-88.137,-70.243,-47 -hardcorenas_a,4.867,95.133,28.093,71.907,5.26,224,0.875,bilinear,-86.483,-69.767,+15 -regnetx_032,4.853,95.147,30.280,69.720,15.30,224,0.875,bicubic,-88.267,-68.110,-52 -tv_resnext50_32x4d,4.840,95.160,30.307,69.693,25.03,224,0.875,bilinear,-87.900,-67.963,-33 -tv_resnet101,4.707,95.293,29.333,70.667,44.55,224,0.875,bilinear,-88.103,-68.917,-39 -densenet161,4.693,95.307,29.547,70.453,28.68,224,0.875,bicubic,-87.807,-68.513,-22 -selecsls42b,4.667,95.333,28.587,71.413,32.46,224,0.875,bicubic,-87.613,-69.563,-15 -tf_efficientnet_lite1,4.613,95.387,28.387,71.613,5.42,240,0.882,bicubic,-88.007,-69.693,-30 -mobilenetv2_120d,4.533,95.467,29.280,70.720,5.83,224,0.875,bicubic,-87.867,-68.770,-22 -efficientnet_es_pruned,4.187,95.813,26.520,73.480,5.44,224,0.875,bicubic,-86.993,-71.230,+13 -fbnetc_100,4.133,95.867,25.933,74.067,5.57,224,0.875,bilinear,-86.567,-71.277,+24 -densenet201,4.120,95.880,27.547,72.453,20.01,224,0.875,bicubic,-88.630,-70.683,-43 -gluon_resnet50_v1b,4.120,95.880,26.933,73.067,25.56,224,0.875,bicubic,-88.420,-71.237,-31 -resnet26d,4.040,95.960,28.520,71.480,16.01,224,0.875,bicubic,-88.030,-69.440,-15 -semnasnet_100,3.960,96.040,26.947,73.053,3.89,224,0.875,bicubic,-87.320,-70.613,+4 -repvgg_a2,3.947,96.053,27.267,72.733,28.21,224,0.875,bilinear,-87.993,-70.883,-13 -tf_mixnet_s,3.880,96.120,25.253,74.747,4.13,224,0.875,bicubic,-87.630,-72.367,-3 -dpn68,3.867,96.133,26.080,73.920,12.61,224,0.875,bicubic,-88.143,-71.970,-17 -tf_efficientnet_es,3.827,96.173,26.107,73.893,5.44,224,0.875,bicubic,-88.153,-71.753,-17 -regnety_008,3.813,96.187,27.133,72.867,6.26,224,0.875,bicubic,-87.937,-71.047,-9 -dla60,3.773,96.227,27.933,72.067,22.04,224,0.875,bilinear,-88.457,-70.177,-26 +regnety_016,5.680,94.320,30.413,69.587,11.20,224,0.875,bicubic,-87.350,-67.947,-28 +selecsls60,5.653,94.347,32.507,67.493,30.67,224,0.875,bicubic,-87.377,-65.793,-27 +hardcorenas_c,5.640,94.360,30.400,69.600,5.52,224,0.875,bilinear,-86.380,-67.440,+15 +res2next50,5.627,94.373,30.867,69.133,24.67,224,0.875,bilinear,-87.213,-67.313,-23 +hrnet_w18,5.493,94.507,30.960,69.040,21.30,224,0.875,bilinear,-86.827,-67.280,+3 +resnest14d,5.480,94.520,28.547,71.453,10.61,224,0.875,bilinear,-86.240,-69.323,+25 +tf_efficientnet_lite2,5.360,94.640,30.907,69.093,6.09,260,0.890,bicubic,-87.290,-67.323,-13 +tf_efficientnet_em,5.347,94.653,31.107,68.893,6.90,240,0.882,bicubic,-87.583,-67.083,-29 +gernet_s,5.307,94.693,30.133,69.867,8.17,224,0.875,bilinear,-86.833,-68.057,+7 +tf_efficientnet_b0_ap,5.307,94.693,28.813,71.187,5.29,224,0.875,bicubic,-86.893,-69.207,+4 +densenet121,5.293,94.707,29.907,70.093,7.98,224,0.875,bicubic,-86.277,-68.123,+21 +repvgg_b1g4,5.293,94.707,30.813,69.187,39.97,224,0.875,bilinear,-87.687,-67.617,-36 +res2net50_26w_4s,5.160,94.840,29.360,70.640,25.70,224,0.875,bilinear,-87.340,-68.700,-11 +tf_mixnet_m,5.080,94.920,28.147,71.853,5.01,224,0.875,bicubic,-87.250,-69.743,-7 +mobilenetv3_large_100,5.067,94.933,28.187,71.813,5.48,224,0.875,bicubic,-86.253,-69.523,+22 +tf_efficientnet_b0,5.067,94.933,28.800,71.200,5.29,224,0.875,bicubic,-87.183,-69.200,-4 +res2net50_14w_8s,5.040,94.960,28.773,71.227,25.06,224,0.875,bilinear,-87.700,-69.407,-29 +hardcorenas_b,4.947,95.053,28.120,71.880,5.18,224,0.875,bilinear,-86.823,-69.660,+11 +mobilenetv3_rw,4.907,95.093,29.853,70.147,5.48,224,0.875,bicubic,-86.303,-67.807,+20 +mixnet_s,4.907,95.093,28.573,71.427,4.13,224,0.875,bicubic,-86.923,-69.117,+8 +gluon_resnet50_v1c,4.893,95.107,28.147,71.853,25.58,224,0.875,bicubic,-88.137,-70.243,-49 +hardcorenas_a,4.867,95.133,28.093,71.907,5.26,224,0.875,bilinear,-86.483,-69.767,+14 +regnetx_032,4.853,95.147,30.280,69.720,15.30,224,0.875,bicubic,-88.267,-68.110,-55 +tv_resnext50_32x4d,4.840,95.160,30.307,69.693,25.03,224,0.875,bilinear,-87.900,-67.963,-35 +tv_resnet101,4.707,95.293,29.333,70.667,44.55,224,0.875,bilinear,-88.103,-68.917,-41 +densenet161,4.693,95.307,29.547,70.453,28.68,224,0.875,bicubic,-87.807,-68.743,-25 +selecsls42b,4.667,95.333,28.587,71.413,32.46,224,0.875,bicubic,-87.613,-69.563,-17 +tf_efficientnet_lite1,4.613,95.387,28.387,71.613,5.42,240,0.882,bicubic,-88.007,-69.693,-32 +mobilenetv2_120d,4.533,95.467,29.280,70.720,5.83,224,0.875,bicubic,-87.867,-68.770,-24 +efficientnet_es_pruned,4.187,95.813,26.520,73.480,5.44,224,0.875,bicubic,-86.993,-71.230,+12 +fbnetc_100,4.133,95.867,25.933,74.067,5.57,224,0.875,bilinear,-86.567,-71.277,+23 +gluon_resnet50_v1b,4.120,95.880,26.933,73.067,25.56,224,0.875,bicubic,-88.420,-71.237,-33 +densenet201,4.120,95.880,27.547,72.453,20.01,224,0.875,bicubic,-88.630,-70.683,-45 +resnet26d,4.040,95.960,28.520,71.480,16.01,224,0.875,bicubic,-88.030,-69.440,-17 +semnasnet_100,3.960,96.040,26.947,73.053,3.89,224,0.875,bicubic,-87.320,-70.613,+3 +repvgg_a2,3.947,96.053,27.267,72.733,28.21,224,0.875,bilinear,-87.993,-70.883,-14 +tf_mixnet_s,3.880,96.120,25.253,74.747,4.13,224,0.875,bicubic,-87.630,-72.367,-4 +dpn68,3.867,96.133,26.080,73.920,12.61,224,0.875,bicubic,-88.143,-71.970,-19 +tf_efficientnet_es,3.827,96.173,26.107,73.893,5.44,224,0.875,bicubic,-88.153,-71.753,-19 +regnety_008,3.813,96.187,27.133,72.867,6.26,224,0.875,bicubic,-87.937,-71.047,-10 +dla60,3.773,96.227,27.933,72.067,22.04,224,0.875,bilinear,-88.457,-70.177,-28 ssl_resnet18,3.747,96.253,25.427,74.573,11.69,224,0.875,bilinear,-86.473,-72.123,+20 -mobilenetv2_140,3.720,96.280,26.747,73.253,6.11,224,0.875,bicubic,-88.110,-71.113,-14 -densenet169,3.707,96.293,25.613,74.387,14.15,224,0.875,bicubic,-88.223,-72.487,-20 -regnetx_016,3.627,96.373,26.293,73.707,9.19,224,0.875,bicubic,-88.543,-71.917,-28 -res2net50_48w_2s,3.587,96.413,26.613,73.387,25.29,224,0.875,bilinear,-88.963,-71.467,-45 -spnasnet_100,3.547,96.453,24.293,75.707,4.42,224,0.875,bilinear,-86.803,-73.137,+13 -tf_mobilenetv3_large_100,3.547,96.453,25.053,74.947,5.48,224,0.875,bilinear,-87.693,-72.607,-8 -regnety_006,3.467,96.533,24.893,75.107,6.06,224,0.875,bicubic,-87.903,-72.817,-13 -legacy_seresnet34,3.333,96.667,23.800,76.200,21.96,224,0.875,bilinear,-87.557,-73.780,+2 -efficientnet_lite0,3.253,96.747,25.867,74.133,4.65,224,0.875,bicubic,-87.887,-71.763,-7 -dla34,3.227,96.773,23.573,76.427,15.74,224,0.875,bilinear,-87.533,-74.087,+2 +mobilenetv2_140,3.720,96.280,26.747,73.253,6.11,224,0.875,bicubic,-88.110,-71.113,-15 +densenet169,3.707,96.293,25.613,74.387,14.15,224,0.875,bicubic,-88.223,-72.487,-21 +regnetx_016,3.627,96.373,26.293,73.707,9.19,224,0.875,bicubic,-88.543,-71.917,-30 +res2net50_48w_2s,3.587,96.413,26.613,73.387,25.29,224,0.875,bilinear,-88.963,-71.467,-47 +spnasnet_100,3.547,96.453,24.293,75.707,4.42,224,0.875,bilinear,-86.803,-72.897,+14 +tf_mobilenetv3_large_100,3.547,96.453,25.053,74.947,5.48,224,0.875,bilinear,-87.693,-72.607,-9 +regnety_006,3.467,96.533,24.893,75.107,6.06,224,0.875,bicubic,-87.903,-72.817,-14 +legacy_seresnet34,3.333,96.667,23.800,76.200,21.96,224,0.875,bilinear,-87.557,-73.780,+1 +efficientnet_lite0,3.253,96.747,25.867,74.133,4.65,224,0.875,bicubic,-87.887,-71.763,-8 +dla34,3.227,96.773,23.573,76.427,15.74,224,0.875,bilinear,-87.533,-74.087,+1 ghostnet_100,3.227,96.773,24.853,75.147,5.18,224,0.875,bilinear,-86.793,-72.517,+12 regnety_004,3.200,96.800,22.653,77.347,4.34,224,0.875,bicubic,-87.300,-74.887,+5 -mobilenetv2_110d,3.173,96.827,24.587,75.413,4.52,224,0.875,bicubic,-87.777,-72.963,-5 +mobilenetv2_110d,3.173,96.827,24.587,75.413,4.52,224,0.875,bicubic,-87.777,-72.963,-6 mnasnet_100,3.120,96.880,24.227,75.773,4.38,224,0.875,bicubic,-87.390,-73.243,+2 -tf_efficientnet_lite0,3.080,96.920,22.907,77.093,4.65,224,0.875,bicubic,-87.960,-74.683,-9 +tf_efficientnet_lite0,3.080,96.920,22.907,77.093,4.65,224,0.875,bicubic,-87.960,-74.683,-10 skresnet18,3.013,96.987,22.800,77.200,11.96,224,0.875,bicubic,-86.647,-74.430,+12 vgg19_bn,2.947,97.053,23.480,76.520,143.68,224,0.875,bilinear,-87.133,-74.100,+5 -resnet34,2.920,97.080,23.680,76.320,21.80,224,0.875,bilinear,-88.210,-73.940,-15 +resnet34,2.920,97.080,23.680,76.320,21.80,224,0.875,bilinear,-88.210,-73.940,-16 tf_mobilenetv3_large_075,2.867,97.133,21.573,78.427,3.99,224,0.875,bilinear,-86.813,-75.637,+7 -hrnet_w18_small_v2,2.720,97.280,23.693,76.307,15.60,224,0.875,bilinear,-88.470,-74.207,-20 -gluon_resnet34_v1b,2.667,97.333,21.680,78.320,21.80,224,0.875,bicubic,-88.293,-75.950,-14 -regnetx_008,2.653,97.347,22.453,77.547,7.26,224,0.875,bicubic,-88.397,-75.257,-17 +hrnet_w18_small_v2,2.720,97.280,23.693,76.307,15.60,224,0.875,bilinear,-88.470,-74.207,-21 +gluon_resnet34_v1b,2.667,97.333,21.680,78.320,21.80,224,0.875,bicubic,-88.293,-75.950,-15 +regnetx_008,2.653,97.347,22.453,77.547,7.26,224,0.875,bicubic,-88.397,-75.257,-18 vgg16_bn,2.653,97.347,23.773,76.227,138.37,224,0.875,bilinear,-87.437,-73.597,-2 vgg16,2.640,97.360,20.427,79.573,138.36,224,0.875,bilinear,-85.910,-76.363,+13 resnet18d,2.600,97.400,21.613,78.387,11.71,224,0.875,bicubic,-86.680,-75.537,+5 -tv_densenet121,2.560,97.440,22.667,77.333,7.98,224,0.875,bicubic,-88.330,-75.043,-15 -repvgg_b0,2.547,97.453,24.013,75.987,15.82,224,0.875,bilinear,-88.883,-73.977,-34 -regnetx_006,2.507,97.493,20.653,79.347,6.20,224,0.875,bicubic,-87.843,-76.537,-9 +tv_densenet121,2.560,97.440,22.667,77.333,7.98,224,0.875,bicubic,-88.330,-75.043,-16 +repvgg_b0,2.547,97.453,24.013,75.987,15.82,224,0.875,bilinear,-88.883,-73.977,-35 +regnetx_006,2.507,97.493,20.653,79.347,6.20,224,0.875,bicubic,-87.843,-76.777,-10 legacy_seresnet18,2.493,97.507,20.080,79.920,11.78,224,0.875,bicubic,-86.387,-76.900,+6 -resnet26,2.480,97.520,22.987,77.013,16.00,224,0.875,bicubic,-88.630,-74.753,-26 +resnet26,2.480,97.520,22.987,77.013,16.00,224,0.875,bicubic,-88.630,-74.753,-27 mobilenetv2_100,2.147,97.853,19.907,80.093,3.50,224,0.875,bicubic,-87.453,-77.233,-2 regnety_002,2.147,97.853,18.880,81.120,3.16,224,0.875,bicubic,-85.233,-77.710,+9 vgg19,2.107,97.893,20.733,79.267,143.67,224,0.875,bilinear,-86.933,-76.137,-1 @@ -361,4 +392,4 @@ dla46_c,1.520,98.480,15.267,84.733,1.30,224,0.875,bilinear,-82.130,-79.653,+2 regnetx_002,1.373,98.627,15.027,84.973,2.68,224,0.875,bicubic,-84.817,-80.953,-2 resnet18,1.160,98.840,16.213,83.787,11.69,224,0.875,bilinear,-86.230,-80.077,-9 tf_mobilenetv3_small_minimal_100,1.013,98.987,11.493,88.507,2.04,224,0.875,bilinear,-80.367,-82.177,+1 -tv_resnet50,0.000,100.000,14.453,85.547,25.56,224,0.875,bilinear,-91.880,-83.587,-67 +tv_resnet50,0.000,100.000,14.453,85.547,25.56,224,0.875,bilinear,-91.880,-83.587,-68 diff --git a/results/results-imagenet-r-clean.csv b/results/results-imagenet-r-clean.csv index 3cc426ed..db508eaa 100644 --- a/results/results-imagenet-r-clean.csv +++ b/results/results-imagenet-r-clean.csv @@ -4,12 +4,14 @@ tf_efficientnet_l2_ns_475,97.750,2.250,99.820,0.180,480.31,475,0.936,bicubic tf_efficientnet_b7_ns,97.200,2.800,99.700,0.300,66.35,600,0.949,bicubic swin_large_patch4_window12_384,97.170,2.830,99.680,0.320,196.74,384,1.000,bicubic swin_base_patch4_window12_384,97.120,2.880,99.780,0.220,87.90,384,1.000,bicubic +tf_efficientnetv2_l_in21ft1k,97.110,2.890,99.710,0.290,118.52,480,1.000,bicubic tf_efficientnet_b6_ns,97.020,2.980,99.710,0.290,43.04,528,0.942,bicubic dm_nfnet_f6,96.990,3.010,99.740,0.260,438.36,576,0.956,bicubic ig_resnext101_32x48d,96.970,3.030,99.670,0.330,828.41,224,0.875,bilinear +tf_efficientnetv2_m_in21ft1k,96.970,3.030,99.610,0.390,54.14,480,1.000,bicubic swin_large_patch4_window7_224,96.950,3.050,99.660,0.340,196.53,224,0.900,bicubic -cait_m48_448,96.880,3.120,99.620,0.380,356.46,448,1.000,bicubic resnetv2_152x4_bitm,96.880,3.120,99.660,0.340,936.53,480,1.000,bilinear +cait_m48_448,96.880,3.120,99.620,0.380,356.46,448,1.000,bicubic tf_efficientnet_b5_ns,96.870,3.130,99.640,0.360,30.39,456,0.934,bicubic cait_m36_384,96.830,3.170,99.660,0.340,271.22,384,1.000,bicubic dm_nfnet_f4,96.820,3.180,99.600,0.400,316.07,512,0.951,bicubic @@ -18,25 +20,29 @@ dm_nfnet_f5,96.710,3.290,99.680,0.320,377.21,544,0.954,bicubic tf_efficientnet_b4_ns,96.710,3.290,99.640,0.360,19.34,380,0.922,bicubic tf_efficientnet_b8,96.700,3.300,99.530,0.470,87.41,672,0.954,bicubic swin_base_patch4_window7_224,96.680,3.320,99.660,0.340,87.77,224,0.900,bicubic +tf_efficientnetv2_l,96.650,3.350,99.560,0.440,118.52,480,1.000,bicubic cait_s36_384,96.630,3.370,99.600,0.400,68.37,384,1.000,bicubic dm_nfnet_f3,96.630,3.370,99.640,0.360,254.92,416,0.940,bicubic tf_efficientnet_b7,96.580,3.420,99.510,0.490,66.35,600,0.949,bicubic cait_s24_384,96.570,3.430,99.550,0.450,47.06,384,1.000,bicubic tf_efficientnet_b8_ap,96.550,3.450,99.540,0.460,87.41,672,0.954,bicubic +tf_efficientnetv2_m,96.550,3.450,99.570,0.430,54.14,480,1.000,bicubic vit_deit_base_distilled_patch16_384,96.510,3.490,99.590,0.410,87.63,384,1.000,bicubic dm_nfnet_f2,96.500,3.500,99.570,0.430,193.78,352,0.920,bicubic resnetv2_152x2_bitm,96.500,3.500,99.620,0.380,236.34,480,1.000,bilinear +tf_efficientnetv2_s_in21ft1k,96.470,3.530,99.570,0.430,21.46,384,1.000,bicubic ecaresnet269d,96.460,3.540,99.610,0.390,102.09,352,1.000,bicubic vit_base_r50_s16_384,96.450,3.550,99.660,0.340,98.95,384,1.000,bicubic ig_resnext101_32x16d,96.440,3.560,99.540,0.460,194.03,224,0.875,bilinear resnetrs420,96.400,3.600,99.540,0.460,191.89,416,1.000,bicubic -dm_nfnet_f1,96.370,3.630,99.470,0.530,132.63,320,0.910,bicubic tf_efficientnet_b6_ap,96.370,3.630,99.550,0.450,43.04,528,0.942,bicubic +dm_nfnet_f1,96.370,3.630,99.470,0.530,132.63,320,0.910,bicubic resnetv2_101x3_bitm,96.360,3.640,99.600,0.400,387.93,480,1.000,bilinear vit_large_patch16_384,96.360,3.640,99.630,0.370,304.72,384,1.000,bicubic tf_efficientnet_b7_ap,96.350,3.650,99.590,0.410,66.35,600,0.949,bicubic seresnet152d,96.310,3.690,99.510,0.490,66.84,320,1.000,bicubic tf_efficientnet_b6,96.290,3.710,99.520,0.480,43.04,528,0.942,bicubic +efficientnetv2_rw_m,96.270,3.730,99.560,0.440,53.24,416,1.000,bicubic swsl_resnext101_32x16d,96.270,3.730,99.500,0.500,194.03,224,0.875,bilinear resnetrs350,96.240,3.760,99.470,0.530,163.96,384,1.000,bicubic swsl_resnext101_32x8d,96.240,3.760,99.590,0.410,88.79,224,0.875,bilinear @@ -55,27 +61,32 @@ cait_xs24_384,96.010,3.990,99.430,0.570,26.67,384,1.000,bicubic resnetrs200,95.990,4.010,99.440,0.560,93.21,320,1.000,bicubic tf_efficientnet_b5,95.980,4.020,99.450,0.550,30.39,456,0.934,bicubic resnetrs152,95.960,4.040,99.380,0.620,86.62,320,1.000,bicubic -ig_resnext101_32x8d,95.930,4.070,99.380,0.620,88.79,224,0.875,bilinear eca_nfnet_l1,95.930,4.070,99.500,0.500,41.41,320,1.000,bicubic +ig_resnext101_32x8d,95.930,4.070,99.380,0.620,88.79,224,0.875,bilinear regnety_160,95.880,4.120,99.560,0.440,83.59,288,1.000,bicubic resnet152d,95.870,4.130,99.430,0.570,60.21,320,1.000,bicubic resnet101d,95.750,4.250,99.440,0.560,44.57,320,1.000,bicubic vit_deit_base_distilled_patch16_224,95.750,4.250,99.280,0.720,87.34,224,0.900,bicubic swin_small_patch4_window7_224,95.720,4.280,99.290,0.710,49.61,224,0.900,bicubic -efficientnet_v2s,95.710,4.290,99.380,0.620,23.94,384,1.000,bicubic -cait_s24_224,95.650,4.350,99.390,0.610,46.92,224,1.000,bicubic +twins_pcpvt_large,95.720,4.280,99.490,0.510,60.99,224,0.900,bicubic +twins_svt_large,95.720,4.280,99.370,0.630,99.27,224,0.900,bicubic +efficientnetv2_rw_s,95.710,4.290,99.380,0.620,23.94,384,1.000,bicubic +tf_efficientnetv2_s,95.710,4.290,99.400,0.600,21.46,384,1.000,bicubic vit_deit_base_patch16_384,95.650,4.350,99.240,0.760,86.86,384,1.000,bicubic +cait_s24_224,95.650,4.350,99.390,0.610,46.92,224,1.000,bicubic dm_nfnet_f0,95.630,4.370,99.300,0.700,71.49,256,0.900,bicubic swsl_resnext50_32x4d,95.620,4.380,99.440,0.560,25.03,224,0.875,bilinear tf_efficientnet_b4,95.590,4.410,99.330,0.670,19.34,380,0.922,bicubic resnest101e,95.570,4.430,99.270,0.730,48.28,256,0.875,bilinear +twins_svt_base,95.570,4.430,99.230,0.770,56.07,224,0.900,bicubic efficientnet_b4,95.520,4.480,99.390,0.610,19.34,384,1.000,bicubic tf_efficientnet_b2_ns,95.520,4.480,99.340,0.660,9.11,260,0.890,bicubic resnetv2_101x1_bitm,95.510,4.490,99.510,0.490,44.54,480,1.000,bilinear tresnet_xl_448,95.510,4.490,99.340,0.660,78.44,448,0.875,bilinear tf_efficientnet_b4_ap,95.490,4.510,99.390,0.610,19.34,380,0.922,bicubic -eca_nfnet_l0,95.470,4.530,99.380,0.620,24.14,288,1.000,bicubic regnety_032,95.470,4.530,99.320,0.680,19.44,288,1.000,bicubic +eca_nfnet_l0,95.470,4.530,99.380,0.620,24.14,288,1.000,bicubic +twins_pcpvt_base,95.460,4.540,99.390,0.610,43.83,224,0.900,bicubic ssl_resnext101_32x16d,95.410,4.590,99.410,0.590,194.03,224,0.875,bilinear tresnet_l_448,95.410,4.590,99.300,0.700,55.99,448,0.875,bilinear nfnet_l0,95.390,4.610,99.420,0.580,35.07,288,1.000,bicubic @@ -87,50 +98,62 @@ vit_base_patch32_384,95.260,4.740,99.180,0.820,88.30,384,1.000,bicubic resnetrs101,95.250,4.750,99.210,0.790,63.62,288,0.940,bicubic vit_large_patch32_384,95.240,4.760,99.320,0.680,306.63,384,1.000,bicubic cait_xxs36_384,95.220,4.780,99.320,0.680,17.37,384,1.000,bicubic +levit_384,95.210,4.790,99.160,0.840,39.13,224,0.900,bicubic vit_base_patch16_224,95.210,4.790,99.230,0.770,86.57,224,0.900,bicubic +resnet51q,95.200,4.800,99.280,0.720,35.70,288,1.000,bilinear swsl_resnet50,95.200,4.800,99.390,0.610,25.56,224,0.875,bilinear -ssl_resnext101_32x4d,95.160,4.840,99.300,0.700,44.18,224,0.875,bilinear ecaresnet101d,95.160,4.840,99.230,0.770,44.57,224,0.875,bicubic +ssl_resnext101_32x4d,95.160,4.840,99.300,0.700,44.18,224,0.875,bilinear nasnetalarge,95.150,4.850,99.130,0.870,88.75,331,0.911,bicubic efficientnet_b3,95.140,4.860,99.210,0.790,12.23,320,1.000,bicubic +tf_efficientnetv2_b3,95.120,4.880,99.200,0.800,14.36,300,0.904,bicubic +convit_base,95.100,4.900,99.140,0.860,86.54,224,0.875,bicubic +coat_lite_small,95.080,4.920,99.020,0.980,19.84,224,0.900,bicubic ecaresnet50t,95.070,4.930,99.290,0.710,25.57,320,0.950,bicubic tresnet_xl,95.060,4.940,99.260,0.740,78.44,224,0.875,bilinear vit_deit_base_patch16_224,95.010,4.990,98.980,1.020,86.57,224,0.900,bicubic tf_efficientnet_b3_ap,94.970,5.030,99.110,0.890,12.23,300,0.904,bicubic +visformer_small,94.960,5.040,99.210,0.790,40.22,224,0.900,bicubic gernet_l,94.930,5.070,99.200,0.800,31.08,256,0.875,bilinear cait_xxs24_384,94.920,5.080,99.140,0.860,12.03,384,1.000,bicubic +convit_small,94.920,5.080,99.110,0.890,27.78,224,0.875,bicubic tf_efficientnet_b3,94.910,5.090,99.110,0.890,12.23,300,0.904,bicubic tresnet_l,94.900,5.100,99.030,0.970,55.99,224,0.875,bilinear +mixer_b16_224_miil,94.880,5.120,99.080,0.920,59.88,224,0.875,bilinear tf_efficientnet_lite4,94.870,5.130,99.090,0.910,13.01,380,0.920,bilinear tf_efficientnet_b1_ns,94.860,5.140,99.250,0.750,7.79,240,0.882,bicubic seresnext50_32x4d,94.820,5.180,99.130,0.870,27.56,224,0.875,bicubic pit_b_224,94.790,5.210,98.820,1.180,73.76,224,0.900,bicubic +coat_mini,94.770,5.230,98.950,1.050,10.34,224,0.900,bicubic +twins_svt_small,94.770,5.230,99.080,0.920,24.06,224,0.900,bicubic legacy_senet154,94.730,5.270,99.100,0.900,115.09,224,0.875,bilinear pit_s_distilled_224,94.730,5.270,99.190,0.810,24.04,224,0.900,bicubic gluon_resnet152_v1s,94.720,5.280,99.060,0.940,60.32,224,0.875,bicubic gluon_senet154,94.710,5.290,98.970,1.030,115.09,224,0.875,bicubic resnest50d_4s2x40d,94.710,5.290,99.130,0.870,30.42,224,0.875,bicubic ssl_resnext50_32x4d,94.700,5.300,99.240,0.760,25.03,224,0.875,bilinear -wide_resnet50_2,94.670,5.330,99.050,0.950,68.88,224,0.875,bicubic efficientnet_el,94.670,5.330,99.130,0.870,10.59,300,0.904,bicubic -rexnet_200,94.660,5.340,99.090,0.910,16.37,224,0.875,bicubic +wide_resnet50_2,94.670,5.330,99.050,0.950,68.88,224,0.875,bicubic tresnet_m_448,94.660,5.340,99.150,0.850,31.39,448,0.875,bilinear +rexnet_200,94.660,5.340,99.090,0.910,16.37,224,0.875,bicubic gluon_seresnext101_64x4d,94.650,5.350,98.980,1.020,88.23,224,0.875,bicubic resnest50d,94.620,5.380,99.030,0.970,27.48,224,0.875,bilinear swin_tiny_patch4_window7_224,94.620,5.380,99.120,0.880,28.29,224,0.900,bicubic +twins_pcpvt_small,94.600,5.400,99.150,0.850,24.11,224,0.900,bicubic pit_s_224,94.590,5.410,98.930,1.070,23.46,224,0.900,bicubic vit_deit_small_distilled_patch16_224,94.590,5.410,99.100,0.900,22.44,224,0.900,bicubic efficientnet_b3_pruned,94.580,5.420,99.070,0.930,9.86,300,0.904,bicubic tnt_s_patch16_224,94.580,5.420,99.180,0.820,23.76,224,0.900,bicubic -resnetv2_50x1_bitm,94.550,5.450,99.250,0.750,25.55,480,1.000,bilinear -repvgg_b3,94.550,5.450,98.910,1.090,123.09,224,0.875,bilinear gernet_m,94.550,5.450,98.930,1.070,21.14,224,0.875,bilinear +repvgg_b3,94.550,5.450,98.910,1.090,123.09,224,0.875,bilinear +resnetv2_50x1_bitm,94.550,5.450,99.250,0.750,25.55,480,1.000,bilinear regnety_320,94.520,5.480,99.170,0.830,145.05,224,0.875,bicubic repvgg_b3g4,94.490,5.510,99.020,0.980,83.83,224,0.875,bilinear -ecaresnet101d_pruned,94.450,5.550,99.100,0.900,24.88,224,0.875,bicubic gluon_seresnext101_32x4d,94.450,5.550,99.090,0.910,48.96,224,0.875,bicubic +ecaresnet101d_pruned,94.450,5.550,99.100,0.900,24.88,224,0.875,bicubic gluon_resnet152_v1d,94.440,5.560,99.010,0.990,60.21,224,0.875,bicubic nf_resnet50,94.410,5.590,99.100,0.900,25.56,288,0.940,bicubic +levit_256,94.400,5.600,99.060,0.940,18.89,224,0.900,bicubic resnest50d_1s4x24d,94.390,5.610,99.070,0.930,25.68,224,0.875,bicubic inception_v4,94.380,5.620,98.820,1.180,42.68,299,0.875,bicubic efficientnet_b2,94.370,5.630,99.050,0.950,9.11,288,1.000,bicubic @@ -139,18 +162,18 @@ gluon_resnext101_64x4d,94.350,5.650,98.880,1.120,83.46,224,0.875,bicubic inception_resnet_v2,94.340,5.660,98.800,1.200,55.84,299,0.897,bicubic ssl_resnet50,94.310,5.690,99.150,0.850,25.56,224,0.875,bilinear regnetx_120,94.270,5.730,99.190,0.810,46.11,224,0.875,bicubic -rexnet_150,94.270,5.730,99.080,0.920,9.73,224,0.875,bicubic tf_efficientnet_b2_ap,94.270,5.730,98.950,1.050,9.11,260,0.890,bicubic +rexnet_150,94.270,5.730,99.080,0.920,9.73,224,0.875,bicubic mixnet_xl,94.230,5.770,98.820,1.180,11.90,224,0.875,bicubic regnetx_320,94.210,5.790,99.050,0.950,107.81,224,0.875,bicubic tf_efficientnet_b2,94.210,5.790,99.030,0.970,9.11,260,0.890,bicubic dpn92,94.190,5.810,98.930,1.070,37.67,224,0.875,bicubic ecaresnet50d,94.190,5.810,99.020,0.980,25.58,224,0.875,bicubic -gluon_seresnext50_32x4d,94.170,5.830,98.910,1.090,27.56,224,0.875,bicubic -gluon_resnet101_v1s,94.170,5.830,99.010,0.990,44.67,224,0.875,bicubic gluon_resnet101_v1d,94.170,5.830,98.940,1.060,44.57,224,0.875,bicubic -ecaresnetlight,94.140,5.860,98.950,1.050,30.16,224,0.875,bicubic +gluon_resnet101_v1s,94.170,5.830,99.010,0.990,44.67,224,0.875,bicubic +gluon_seresnext50_32x4d,94.170,5.830,98.910,1.090,27.56,224,0.875,bicubic regnety_064,94.140,5.860,99.030,0.970,30.58,224,0.875,bicubic +ecaresnetlight,94.140,5.860,98.950,1.050,30.16,224,0.875,bicubic ens_adv_inception_resnet_v2,94.130,5.870,98.790,1.210,55.84,299,0.897,bicubic legacy_seresnext101_32x4d,94.130,5.870,98.970,1.030,48.96,224,0.875,bilinear tf_efficientnet_lite3,94.130,5.870,98.960,1.040,8.20,300,0.904,bilinear @@ -159,6 +182,7 @@ cspdarknet53,94.090,5.910,98.980,1.020,27.64,256,0.887,bilinear efficientnet_el_pruned,94.090,5.910,99.010,0.990,10.59,300,0.904,bicubic seresnet50,94.080,5.920,98.970,1.030,28.09,224,0.875,bicubic resnet50d,94.070,5.930,98.920,1.080,25.58,224,0.875,bicubic +tf_efficientnetv2_b2,94.070,5.930,98.930,1.070,10.10,260,0.890,bicubic gluon_resnet152_v1b,94.030,5.970,98.740,1.260,60.19,224,0.875,bicubic hrnet_w48,94.030,5.970,99.040,0.960,77.47,224,0.875,bilinear resnetrs50,94.020,5.980,98.850,1.150,35.69,224,0.910,bicubic @@ -168,35 +192,37 @@ dla102x2,94.000,6.000,99.030,0.970,41.28,224,0.875,bilinear vit_deit_small_patch16_224,94.000,6.000,98.960,1.040,22.05,224,0.900,bicubic dpn107,93.960,6.040,98.840,1.160,86.92,224,0.875,bicubic skresnext50_32x4d,93.950,6.050,98.820,1.180,27.48,224,0.875,bicubic +cait_xxs36_224,93.940,6.060,98.890,1.110,17.30,224,1.000,bicubic dpn98,93.940,6.060,98.920,1.080,61.57,224,0.875,bicubic ecaresnet26t,93.940,6.060,98.920,1.080,16.01,320,0.950,bicubic -cait_xxs36_224,93.940,6.060,98.890,1.110,17.30,224,1.000,bicubic -nf_regnet_b1,93.890,6.110,98.750,1.250,10.22,288,0.900,bicubic -regnety_080,93.890,6.110,99.000,1.000,39.18,224,0.875,bicubic xception71,93.890,6.110,98.950,1.050,42.34,299,0.903,bicubic -gluon_resnet152_v1c,93.880,6.120,98.800,1.200,60.21,224,0.875,bicubic +regnety_080,93.890,6.110,99.000,1.000,39.18,224,0.875,bicubic +nf_regnet_b1,93.890,6.110,98.750,1.250,10.22,288,0.900,bicubic regnetx_160,93.880,6.120,99.090,0.910,54.28,224,0.875,bicubic +gluon_resnet152_v1c,93.880,6.120,98.800,1.200,60.21,224,0.875,bicubic cspresnet50,93.860,6.140,98.870,1.130,21.62,256,0.887,bilinear ese_vovnet39b,93.850,6.150,98.900,1.100,24.57,224,0.875,bicubic resnext50_32x4d,93.840,6.160,98.830,1.170,25.03,224,0.875,bicubic hrnet_w64,93.830,6.170,98.930,1.070,128.06,224,0.875,bilinear -repvgg_b2g4,93.820,6.180,98.930,1.070,61.76,224,0.875,bilinear ecaresnet50d_pruned,93.820,6.180,99.000,1.000,19.94,224,0.875,bicubic +repvgg_b2g4,93.820,6.180,98.930,1.070,61.76,224,0.875,bilinear resnext50d_32x4d,93.810,6.190,98.740,1.260,25.05,224,0.875,bicubic -efficientnet_b2_pruned,93.800,6.200,98.910,1.090,8.31,260,0.890,bicubic dla169,93.800,6.200,98.840,1.160,53.39,224,0.875,bilinear +efficientnet_b2_pruned,93.800,6.200,98.910,1.090,8.31,260,0.890,bicubic regnetx_080,93.790,6.210,98.910,1.090,39.57,224,0.875,bicubic resnext101_32x8d,93.770,6.230,98.950,1.050,88.79,224,0.875,bilinear -cspresnext50,93.760,6.240,98.840,1.160,20.57,224,0.875,bilinear -dpn131,93.760,6.240,98.800,1.200,79.25,224,0.875,bicubic gluon_resnet101_v1b,93.760,6.240,98.700,1.300,44.55,224,0.875,bicubic xception65,93.760,6.240,98.860,1.140,39.92,299,0.903,bicubic +cspresnext50,93.760,6.240,98.840,1.160,20.57,224,0.875,bilinear +dpn131,93.760,6.240,98.800,1.200,79.25,224,0.875,bicubic efficientnet_em,93.740,6.260,98.930,1.070,6.90,240,0.882,bicubic tf_efficientnet_b0_ns,93.740,6.260,98.980,1.020,5.29,224,0.875,bicubic wide_resnet101_2,93.730,6.270,98.810,1.190,126.89,224,0.875,bilinear hrnet_w40,93.710,6.290,98.800,1.200,57.56,224,0.875,bilinear -tf_efficientnet_b1,93.710,6.290,98.800,1.200,7.79,240,0.882,bicubic +levit_192,93.710,6.290,98.790,1.210,10.95,224,0.900,bicubic resnetblur50,93.710,6.290,98.810,1.190,25.56,224,0.875,bicubic +tf_efficientnet_b1,93.710,6.290,98.800,1.200,7.79,240,0.882,bicubic +tf_efficientnetv2_b1,93.710,6.290,98.820,1.180,8.14,240,0.882,bicubic gluon_resnet101_v1c,93.690,6.310,98.760,1.240,44.57,224,0.875,bicubic regnetx_040,93.680,6.320,98.940,1.060,22.12,224,0.875,bicubic rexnet_130,93.670,6.330,98.710,1.290,7.56,224,0.875,bicubic @@ -204,18 +230,19 @@ gluon_resnext50_32x4d,93.650,6.350,98.690,1.310,25.03,224,0.875,bicubic xception,93.640,6.360,98.770,1.230,22.86,299,0.897,bicubic regnetx_064,93.630,6.370,99.050,0.950,26.21,224,0.875,bicubic tf_efficientnet_b1_ap,93.630,6.370,98.800,1.200,7.79,240,0.882,bicubic -regnety_040,93.620,6.380,98.950,1.050,20.65,224,0.875,bicubic dpn68b,93.620,6.380,98.700,1.300,12.61,224,0.875,bicubic hrnet_w44,93.620,6.380,98.960,1.040,67.06,224,0.875,bilinear -gluon_resnet50_v1s,93.590,6.410,98.840,1.160,25.68,224,0.875,bicubic +regnety_040,93.620,6.380,98.950,1.050,20.65,224,0.875,bicubic repvgg_b2,93.590,6.410,99.070,0.930,89.02,224,0.875,bilinear res2net50_26w_6s,93.590,6.410,98.750,1.250,37.05,224,0.875,bilinear +gluon_resnet50_v1s,93.590,6.410,98.840,1.160,25.68,224,0.875,bicubic dla60_res2next,93.570,6.430,98.800,1.200,17.03,224,0.875,bilinear tf_efficientnet_cc_b1_8e,93.570,6.430,98.690,1.310,39.72,240,0.882,bicubic gluon_inception_v3,93.540,6.460,98.830,1.170,23.83,299,0.875,bicubic dla102x,93.530,6.470,98.850,1.150,26.31,224,0.875,bilinear gluon_resnet50_v1d,93.530,6.470,98.710,1.290,25.58,224,0.875,bicubic res2net101_26w_4s,93.520,6.480,98.600,1.400,45.21,224,0.875,bilinear +coat_tiny,93.510,6.490,98.690,1.310,5.50,224,0.900,bicubic selecsls60b,93.500,6.500,98.840,1.160,32.77,224,0.875,bicubic cait_xxs24_224,93.490,6.510,98.770,1.230,11.96,224,1.000,bicubic xception41,93.480,6.520,98.750,1.250,26.97,299,0.903,bicubic @@ -231,31 +258,33 @@ dla102,93.260,6.740,98.780,1.220,33.27,224,0.875,bilinear legacy_seresnet101,93.260,6.740,98.740,1.260,49.33,224,0.875,bilinear mixnet_l,93.260,6.740,98.700,1.300,7.33,224,0.875,bicubic regnetx_032,93.250,6.750,98.730,1.270,15.30,224,0.875,bicubic +tv_resnet152,93.240,6.760,98.750,1.250,60.19,224,0.875,bilinear pit_xs_distilled_224,93.240,6.760,98.820,1.180,11.00,224,0.900,bicubic resnest26d,93.240,6.760,98.850,1.150,17.07,224,0.875,bilinear -tv_resnet152,93.240,6.760,98.750,1.250,60.19,224,0.875,bilinear tf_inception_v3,93.200,6.800,98.480,1.520,23.83,299,0.875,bicubic dla60x,93.190,6.810,98.710,1.290,17.35,224,0.875,bilinear res2net50_26w_4s,93.180,6.820,98.670,1.330,25.70,224,0.875,bilinear tf_efficientnet_em,93.170,6.830,98.670,1.330,6.90,240,0.882,bicubic res2next50,93.150,6.850,98.660,1.340,24.67,224,0.875,bilinear +tf_efficientnetv2_b0,93.060,6.940,98.700,1.300,7.14,224,0.875,bicubic +levit_128,93.050,6.950,98.690,1.310,9.21,224,0.900,bicubic tf_mixnet_l,93.040,6.960,98.540,1.460,7.33,224,0.875,bicubic -efficientnet_b1,93.030,6.970,98.710,1.290,7.79,256,1.000,bicubic repvgg_b1g4,93.030,6.970,98.820,1.180,39.97,224,0.875,bilinear res2net50_14w_8s,93.030,6.970,98.700,1.300,25.06,224,0.875,bilinear +efficientnet_b1,93.030,6.970,98.710,1.290,7.79,256,1.000,bicubic adv_inception_v3,93.010,6.990,98.490,1.510,23.83,299,0.875,bicubic selecsls60,93.010,6.990,98.830,1.170,30.67,224,0.875,bicubic regnety_016,93.000,7.000,98.680,1.320,11.20,224,0.875,bicubic efficientnet_b1_pruned,92.980,7.020,98.530,1.470,6.33,240,0.882,bicubic hardcorenas_f,92.980,7.020,98.620,1.380,8.20,224,0.875,bilinear -hrnet_w32,92.950,7.050,98.840,1.160,41.23,224,0.875,bilinear hardcorenas_e,92.950,7.050,98.570,1.430,8.07,224,0.875,bilinear +hrnet_w32,92.950,7.050,98.840,1.160,41.23,224,0.875,bilinear efficientnet_es,92.910,7.090,98.690,1.310,5.44,224,0.875,bicubic gluon_resnet50_v1c,92.910,7.090,98.710,1.290,25.58,224,0.875,bicubic pit_xs_224,92.910,7.090,98.780,1.220,10.62,224,0.900,bicubic -densenet161,92.900,7.100,98.810,1.190,28.68,224,0.875,bicubic -inception_v3,92.900,7.100,98.330,1.670,23.83,299,0.875,bicubic tv_resnext50_32x4d,92.900,7.100,98.720,1.280,25.03,224,0.875,bilinear +inception_v3,92.900,7.100,98.330,1.670,23.83,299,0.875,bicubic +densenet161,92.900,7.100,98.810,1.190,28.68,224,0.875,bicubic tv_resnet101,92.880,7.120,98.660,1.340,44.55,224,0.875,bilinear tf_efficientnet_cc_b0_8e,92.870,7.130,98.460,1.540,24.01,224,0.875,bicubic coat_lite_tiny,92.850,7.150,98.640,1.360,5.72,224,0.900,bicubic @@ -266,13 +295,13 @@ res2net50_48w_2s,92.790,7.210,98.470,1.530,25.29,224,0.875,bilinear hrnet_w18,92.760,7.240,98.660,1.340,21.30,224,0.875,bilinear densenet201,92.690,7.310,98.650,1.350,20.01,224,0.875,bicubic repvgg_a2,92.680,7.320,98.520,1.480,28.21,224,0.875,bilinear -dla60,92.670,7.330,98.630,1.370,22.04,224,0.875,bilinear legacy_seresnet50,92.670,7.330,98.650,1.350,28.09,224,0.875,bilinear +dla60,92.670,7.330,98.630,1.370,22.04,224,0.875,bilinear resnet34d,92.640,7.360,98.420,1.580,21.82,224,0.875,bicubic -mobilenetv2_120d,92.610,7.390,98.510,1.490,5.83,224,0.875,bicubic tf_efficientnet_b0_ap,92.610,7.390,98.370,1.630,5.29,224,0.875,bicubic -vit_small_patch16_224,92.600,7.400,98.390,1.610,48.75,224,0.900,bicubic +mobilenetv2_120d,92.610,7.390,98.510,1.490,5.83,224,0.875,bicubic hardcorenas_d,92.600,7.400,98.430,1.570,7.50,224,0.875,bilinear +vit_small_patch16_224,92.600,7.400,98.390,1.610,48.75,224,0.900,bicubic tf_efficientnet_lite2,92.590,7.410,98.550,1.450,6.09,260,0.890,bicubic legacy_seresnext26_32x4d,92.570,7.430,98.420,1.580,16.79,224,0.875,bicubic skresnet34,92.570,7.430,98.520,1.480,22.28,224,0.875,bicubic @@ -280,8 +309,8 @@ gluon_resnet50_v1b,92.560,7.440,98.550,1.450,25.56,224,0.875,bicubic regnetx_016,92.540,7.460,98.550,1.450,9.19,224,0.875,bicubic efficientnet_b0,92.480,7.520,98.680,1.320,5.29,224,0.875,bicubic selecsls42b,92.480,7.520,98.440,1.560,32.46,224,0.875,bicubic -gernet_s,92.440,7.560,98.500,1.500,8.17,224,0.875,bilinear seresnext26d_32x4d,92.440,7.560,98.540,1.460,16.81,224,0.875,bicubic +gernet_s,92.440,7.560,98.500,1.500,8.17,224,0.875,bilinear densenetblur121d,92.400,7.600,98.410,1.590,8.00,224,0.875,bicubic tf_efficientnet_b0,92.400,7.600,98.470,1.530,5.29,224,0.875,bicubic hardcorenas_c,92.330,7.670,98.340,1.660,5.52,224,0.875,bilinear @@ -307,8 +336,9 @@ semnasnet_100,91.660,8.340,98.270,1.730,3.89,224,0.875,bicubic hardcorenas_a,91.620,8.380,98.170,1.830,5.26,224,0.875,bilinear regnety_006,91.570,8.430,98.430,1.570,6.06,224,0.875,bicubic mobilenetv3_rw,91.550,8.450,98.270,1.730,5.48,224,0.875,bicubic -mobilenetv3_large_100,91.480,8.520,98.320,1.680,5.48,224,0.875,bicubic +levit_128s,91.500,8.500,98.400,1.600,7.78,224,0.900,bicubic legacy_seresnet34,91.480,8.520,98.200,1.800,21.96,224,0.875,bilinear +mobilenetv3_large_100,91.480,8.520,98.320,1.680,5.48,224,0.875,bicubic resnet26,91.440,8.560,98.280,1.720,16.00,224,0.875,bicubic tf_mobilenetv3_large_100,91.420,8.580,98.260,1.740,5.48,224,0.875,bilinear tv_densenet121,91.400,8.600,98.250,1.750,7.98,224,0.875,bicubic @@ -317,8 +347,8 @@ tf_efficientnet_lite0,91.300,8.700,98.090,1.910,4.65,224,0.875,bicubic fbnetc_100,91.270,8.730,97.830,2.170,5.57,224,0.875,bilinear efficientnet_lite0,91.260,8.740,98.250,1.750,4.65,224,0.875,bicubic dla34,91.240,8.760,98.180,1.820,15.74,224,0.875,bilinear -resnet34,91.200,8.800,98.240,1.760,21.80,224,0.875,bilinear mnasnet_100,91.200,8.800,98.050,1.950,4.38,224,0.875,bicubic +resnet34,91.200,8.800,98.240,1.760,21.80,224,0.875,bilinear regnetx_008,91.180,8.820,98.380,1.620,7.26,224,0.875,bicubic hrnet_w18_small_v2,91.170,8.830,98.340,1.660,15.60,224,0.875,bilinear mixer_b16_224,91.140,8.860,97.400,2.600,59.88,224,0.875,bicubic @@ -333,6 +363,7 @@ regnetx_006,90.760,9.240,98.100,1.900,6.20,224,0.875,bicubic ssl_resnet18,90.700,9.300,98.020,1.980,11.69,224,0.875,bilinear spnasnet_100,90.610,9.390,97.950,2.050,4.42,224,0.875,bilinear vgg16_bn,90.540,9.460,97.990,2.010,138.37,224,0.875,bilinear +convit_tiny,90.530,9.470,98.210,1.790,5.71,224,0.875,bicubic ghostnet_100,90.440,9.560,97.830,2.170,5.18,224,0.875,bilinear pit_ti_224,90.420,9.580,98.010,1.990,4.85,224,0.900,bicubic tf_mobilenetv3_large_075,90.320,9.680,97.870,2.130,3.99,224,0.875,bilinear diff --git a/results/results-imagenet-r.csv b/results/results-imagenet-r.csv index c0a01c3a..3c6ee501 100644 --- a/results/results-imagenet-r.csv +++ b/results/results-imagenet-r.csv @@ -1,352 +1,383 @@ model,top1,top1_err,top5,top5_err,param_count,img_size,cropt_pct,interpolation,top1_diff,top5_diff,rank_diff -ig_resnext101_32x48d,79.650,20.350,89.393,10.607,828.41,224,0.875,bilinear,-17.320,-10.277,+7 -ig_resnext101_32x32d,79.457,20.543,89.183,10.817,468.53,224,0.875,bilinear,-17.323,-10.347,+13 -ig_resnext101_32x16d,78.837,21.163,88.480,11.520,194.03,224,0.875,bilinear,-17.603,-11.060,+27 +ig_resnext101_32x48d,79.650,20.350,89.393,10.607,828.41,224,0.875,bilinear,-17.320,-10.277,+8 +ig_resnext101_32x32d,79.457,20.543,89.183,10.817,468.53,224,0.875,bilinear,-17.323,-10.347,+15 +ig_resnext101_32x16d,78.837,21.163,88.480,11.520,194.03,224,0.875,bilinear,-17.603,-11.060,+32 tf_efficientnet_l2_ns_475,76.480,23.520,88.653,11.347,480.31,475,0.936,bicubic,-21.270,-11.167,-2 -swsl_resnext101_32x16d,76.303,23.697,87.733,12.267,194.03,224,0.875,bilinear,-19.967,-11.767,+34 -ig_resnext101_32x8d,75.813,24.187,86.200,13.800,88.79,224,0.875,bilinear,-20.117,-13.300,+51 -swsl_resnext101_32x8d,75.590,24.410,86.937,13.063,88.79,224,0.875,bilinear,-20.650,-12.653,+34 +swsl_resnext101_32x16d,76.303,23.697,87.733,12.267,194.03,224,0.875,bilinear,-19.967,-11.767,+40 +ig_resnext101_32x8d,75.813,24.187,86.200,13.800,88.79,224,0.875,bilinear,-20.117,-13.180,+58 +swsl_resnext101_32x8d,75.590,24.410,86.937,13.063,88.79,224,0.875,bilinear,-20.650,-12.653,+40 tf_efficientnet_l2_ns,74.650,25.350,87.543,12.457,480.31,800,0.960,bicubic,-23.130,-12.347,-7 -swsl_resnext101_32x4d,72.660,27.340,85.157,14.843,44.18,224,0.875,bilinear,-23.390,-14.373,+42 -swsl_resnext50_32x4d,68.977,31.023,82.810,17.190,25.03,224,0.875,bilinear,-26.643,-16.630,+58 -swsl_resnet50,68.297,31.703,83.313,16.687,25.56,224,0.875,bilinear,-26.903,-16.077,+79 +swsl_resnext101_32x4d,72.660,27.340,85.157,14.843,44.18,224,0.875,bilinear,-23.390,-14.373,+48 +swsl_resnext50_32x4d,68.977,31.023,82.810,17.190,25.03,224,0.875,bilinear,-26.643,-16.630,+67 +swsl_resnet50,68.297,31.703,83.313,16.687,25.56,224,0.875,bilinear,-26.903,-16.077,+92 tf_efficientnet_b7_ns,67.510,32.490,81.383,18.617,66.35,600,0.949,bicubic,-29.690,-18.317,-9 swin_large_patch4_window12_384,66.283,33.717,79.783,20.217,196.74,384,1.000,bicubic,-30.887,-19.897,-9 -tf_efficientnet_b6_ns,65.587,34.413,79.553,20.447,43.04,528,0.942,bicubic,-31.433,-20.157,-8 -swin_large_patch4_window7_224,63.870,36.130,78.180,21.820,196.53,224,0.900,bicubic,-33.080,-21.480,-6 +tf_efficientnet_b6_ns,65.587,34.413,79.553,20.447,43.04,528,0.942,bicubic,-31.433,-20.157,-7 +swin_large_patch4_window7_224,63.870,36.130,78.180,21.820,196.53,224,0.900,bicubic,-33.080,-21.480,-4 swin_base_patch4_window12_384,63.470,36.530,78.063,21.937,87.90,384,1.000,bicubic,-33.650,-21.717,-11 -tf_efficientnet_b5_ns,63.047,36.953,77.777,22.223,30.39,456,0.934,bicubic,-33.823,-21.863,-5 -tf_efficientnet_b4_ns,61.230,38.770,76.173,23.827,19.34,380,0.922,bicubic,-35.480,-23.467,-1 -swin_base_patch4_window7_224,59.537,40.463,74.247,25.753,87.77,224,0.900,bicubic,-37.143,-25.413,0 -tf_efficientnet_b8_ap,57.830,42.170,72.957,27.043,87.41,672,0.954,bicubic,-38.720,-26.583,+4 -cait_m48_448,57.470,42.530,71.860,28.140,356.46,448,1.000,bicubic,-39.410,-27.760,-11 +tf_efficientnet_b5_ns,63.047,36.953,77.777,22.223,30.39,456,0.934,bicubic,-33.823,-21.863,-3 +tf_efficientnet_b4_ns,61.230,38.770,76.173,23.827,19.34,380,0.922,bicubic,-35.480,-23.467,+1 +tf_efficientnetv2_l_in21ft1k,60.953,39.047,75.847,24.153,118.52,480,1.000,bicubic,-36.157,-23.863,-13 +swin_base_patch4_window7_224,59.537,40.463,74.247,25.753,87.77,224,0.900,bicubic,-37.143,-25.413,+1 +tf_efficientnetv2_m_in21ft1k,58.647,41.353,73.983,26.017,54.14,480,1.000,bicubic,-38.323,-25.627,-11 +tf_efficientnet_b8_ap,57.830,42.170,72.957,27.043,87.41,672,0.954,bicubic,-38.720,-26.583,+5 +cait_m48_448,57.470,42.530,71.860,28.140,356.46,448,1.000,bicubic,-39.410,-27.800,-10 cait_m36_384,57.467,42.533,72.313,27.687,271.22,384,1.000,bicubic,-39.363,-27.347,-9 -tf_efficientnet_b3_ns,57.417,42.583,72.387,27.613,12.23,300,0.904,bicubic,-38.683,-27.093,+23 -vit_large_patch16_384,54.750,45.250,70.007,29.993,304.72,384,1.000,bicubic,-41.610,-29.623,+11 -vit_base_r50_s16_384,54.400,45.600,69.560,30.440,98.95,384,1.000,bicubic,-42.050,-30.100,+4 -resnetv2_152x4_bitm,54.263,45.737,70.137,29.863,936.53,480,1.000,bilinear,-42.617,-29.523,-15 -dm_nfnet_f6,54.073,45.927,69.110,30.890,438.36,576,0.956,bicubic,-42.917,-30.630,-20 -tf_efficientnet_b5_ap,53.870,46.130,69.160,30.840,30.39,456,0.934,bicubic,-42.210,-30.380,+19 +tf_efficientnet_b3_ns,57.417,42.583,72.387,27.613,12.23,300,0.904,bicubic,-38.683,-27.093,+27 +vit_large_patch16_384,54.750,45.250,70.007,29.993,304.72,384,1.000,bicubic,-41.610,-29.623,+14 +vit_base_r50_s16_384,54.400,45.600,69.560,30.440,98.95,384,1.000,bicubic,-42.050,-30.100,+7 +resnetv2_152x4_bitm,54.263,45.737,70.137,29.863,936.53,480,1.000,bilinear,-42.617,-29.483,-16 +dm_nfnet_f6,54.073,45.927,69.110,30.890,438.36,576,0.956,bicubic,-42.917,-30.630,-21 +tf_efficientnet_b5_ap,53.870,46.130,69.160,30.840,30.39,456,0.934,bicubic,-42.210,-30.380,+23 dm_nfnet_f5,53.773,46.227,68.500,31.500,377.21,544,0.954,bicubic,-42.937,-31.180,-13 -tf_efficientnet_b2_ns,53.600,46.400,70.270,29.730,9.11,260,0.890,bicubic,-41.920,-29.070,+42 -tf_efficientnet_b6_ap,53.560,46.440,68.550,31.450,43.04,528,0.942,bicubic,-42.810,-31.000,+2 -cait_s36_384,53.550,46.450,68.000,32.000,68.37,384,1.000,bicubic,-43.080,-31.600,-12 +tf_efficientnet_b2_ns,53.600,46.400,70.270,29.730,9.11,260,0.890,bicubic,-41.920,-29.070,+50 +tf_efficientnet_b6_ap,53.560,46.440,68.550,31.450,43.04,528,0.942,bicubic,-42.810,-30.920,+4 +cait_s36_384,53.550,46.450,68.000,32.000,68.37,384,1.000,bicubic,-43.080,-31.600,-11 tf_efficientnet_b8,53.410,46.590,69.090,30.910,87.41,672,0.954,bicubic,-43.290,-30.440,-15 -tf_efficientnet_b7_ap,53.260,46.740,68.873,31.127,66.35,600,0.949,bicubic,-43.090,-30.717,+2 -dm_nfnet_f3,53.190,46.810,68.083,31.917,254.92,416,0.940,bicubic,-43.440,-31.557,-14 -tf_efficientnet_b4_ap,53.090,46.910,68.210,31.790,19.34,380,0.922,bicubic,-42.400,-31.180,+39 +tf_efficientnet_b7_ap,53.260,46.740,68.873,31.127,66.35,600,0.949,bicubic,-43.090,-30.717,+5 +dm_nfnet_f3,53.190,46.810,68.083,31.917,254.92,416,0.940,bicubic,-43.440,-31.557,-13 +tf_efficientnetv2_s_in21ft1k,53.150,46.850,69.000,31.000,21.46,384,1.000,bicubic,-43.320,-30.570,-6 +tf_efficientnet_b4_ap,53.090,46.910,68.210,31.790,19.34,380,0.922,bicubic,-42.400,-31.180,+46 tf_efficientnet_b7,52.393,47.607,68.233,31.767,66.35,600,0.949,bicubic,-44.187,-31.277,-15 -swsl_resnet18,52.327,47.673,70.480,29.520,11.69,224,0.875,bilinear,-38.763,-27.730,+289 -dm_nfnet_f4,52.260,47.740,67.120,32.880,316.07,512,0.951,bicubic,-44.560,-32.480,-25 -vit_deit_base_distilled_patch16_384,52.257,47.743,67.733,32.267,87.63,384,1.000,bicubic,-44.253,-31.857,-15 -cait_s24_384,51.783,48.217,66.313,33.687,47.06,384,1.000,bicubic,-44.787,-33.237,-18 +tf_efficientnetv2_l,52.377,47.623,67.237,32.763,118.52,480,1.000,bicubic,-44.273,-32.323,-19 +swsl_resnet18,52.327,47.673,70.480,29.520,11.69,224,0.875,bilinear,-38.763,-27.730,+315 +efficientnetv2_rw_m,52.323,47.677,67.210,32.790,53.24,416,1.000,bicubic,-43.947,-32.350,+1 +dm_nfnet_f4,52.260,47.740,67.120,32.880,316.07,512,0.951,bicubic,-44.560,-32.480,-28 +vit_deit_base_distilled_patch16_384,52.257,47.743,67.733,32.267,87.63,384,1.000,bicubic,-44.253,-31.857,-16 +cait_s24_384,51.783,48.217,66.313,33.687,47.06,384,1.000,bicubic,-44.787,-33.237,-20 ecaresnet269d,51.670,48.330,66.047,33.953,102.09,352,1.000,bicubic,-44.790,-33.563,-14 -vit_base_patch16_224_miil,51.557,48.443,65.207,34.793,86.54,224,0.875,bilinear,-44.473,-34.143,+9 -pit_b_distilled_224,51.153,48.847,66.770,33.230,74.79,224,0.900,bicubic,-44.917,-32.610,+4 -resnetv2_152x2_bitm,51.040,48.960,68.527,31.473,236.34,480,1.000,bilinear,-45.460,-31.093,-18 -vit_base_patch16_384,50.883,49.117,65.270,34.730,86.86,384,1.000,bicubic,-45.307,-34.260,-5 -tf_efficientnet_b1_ns,50.883,49.117,67.910,32.090,7.79,240,0.882,bicubic,-43.977,-31.340,+58 -vit_large_patch16_224,50.877,49.123,66.227,33.773,304.33,224,0.900,bicubic,-44.413,-33.083,+36 -efficientnet_b4,50.510,49.490,65.703,34.297,19.34,384,1.000,bicubic,-45.010,-33.687,+22 -ssl_resnext101_32x16d,50.257,49.743,66.033,33.967,194.03,224,0.875,bilinear,-45.153,-33.377,+28 -cait_s24_224,50.243,49.757,65.027,34.973,46.92,224,1.000,bicubic,-45.407,-34.363,+14 +vit_base_patch16_224_miil,51.557,48.443,65.207,34.793,86.54,224,0.875,bilinear,-44.473,-34.143,+10 +pit_b_distilled_224,51.153,48.847,66.770,33.230,74.79,224,0.900,bicubic,-44.917,-32.610,+5 +resnetv2_152x2_bitm,51.040,48.960,68.527,31.473,236.34,480,1.000,bilinear,-45.460,-31.093,-19 +tf_efficientnet_b1_ns,50.883,49.117,67.910,32.090,7.79,240,0.882,bicubic,-43.977,-31.340,+72 +vit_base_patch16_384,50.883,49.117,65.270,34.730,86.86,384,1.000,bicubic,-45.307,-34.260,-4 +vit_large_patch16_224,50.877,49.123,66.227,33.773,304.33,224,0.900,bicubic,-44.413,-33.083,+42 +tf_efficientnetv2_m,50.557,49.443,66.010,33.990,54.14,480,1.000,bicubic,-45.993,-33.560,-26 +efficientnet_b4,50.510,49.490,65.703,34.297,19.34,384,1.000,bicubic,-45.010,-33.687,+26 +ssl_resnext101_32x16d,50.257,49.743,66.033,33.967,194.03,224,0.875,bilinear,-45.153,-33.377,+33 +cait_s24_224,50.243,49.757,65.027,34.973,46.92,224,1.000,bicubic,-45.407,-34.213,+18 resnest269e,50.153,49.847,64.670,35.330,110.93,416,0.928,bicubic,-45.967,-34.850,-8 vit_deit_base_distilled_patch16_224,50.063,49.937,66.227,33.773,87.34,224,0.900,bicubic,-45.687,-33.053,+9 -tf_efficientnet_b3_ap,50.057,49.943,65.210,34.790,12.23,300,0.904,bicubic,-44.913,-33.900,+44 +tf_efficientnet_b3_ap,50.057,49.943,65.210,34.790,12.23,300,0.904,bicubic,-44.913,-33.900,+54 resnest200e,49.873,50.127,64.743,35.257,70.20,320,0.909,bicubic,-46.197,-34.737,-6 -resnetv2_101x3_bitm,49.823,50.177,66.917,33.083,387.93,480,1.000,bilinear,-46.537,-32.683,-22 +resnetv2_101x3_bitm,49.823,50.177,66.917,33.083,387.93,480,1.000,bilinear,-46.537,-32.683,-23 cait_xs24_384,49.527,50.473,64.900,35.100,26.67,384,1.000,bicubic,-46.483,-34.530,-4 tf_efficientnet_b5,49.510,50.490,65.657,34.343,30.39,456,0.934,bicubic,-46.470,-33.793,-3 resnet200d,49.470,50.530,64.330,35.670,64.69,320,1.000,bicubic,-46.640,-35.130,-14 -resnest101e,49.367,50.633,65.587,34.413,48.28,256,0.875,bilinear,-46.203,-33.683,+10 +resnest101e,49.367,50.633,65.587,34.413,48.28,256,0.875,bilinear,-46.203,-33.683,+13 resnet152d,49.253,50.747,64.413,35.587,60.21,320,1.000,bicubic,-46.617,-35.017,-1 -seresnet152d,49.247,50.753,64.170,35.830,66.84,320,1.000,bicubic,-47.063,-35.340,-25 -ssl_resnext101_32x8d,49.067,50.933,65.480,34.520,88.79,224,0.875,bilinear,-46.273,-33.840,+20 -repvgg_b3,48.917,51.083,64.887,35.113,123.09,224,0.875,bilinear,-45.633,-34.023,+61 -resnetrs420,48.857,51.143,63.427,36.573,191.89,416,1.000,bicubic,-47.543,-36.113,-34 -dm_nfnet_f2,48.623,51.377,63.537,36.463,193.78,352,0.920,bicubic,-47.877,-36.033,-40 -efficientnet_v2s,48.603,51.397,63.840,36.160,23.94,384,1.000,bicubic,-47.107,-35.540,-3 -efficientnet_b3,48.563,51.437,64.250,35.750,12.23,320,1.000,bicubic,-46.577,-34.960,+26 -ecaresnet101d,48.527,51.473,64.100,35.900,44.57,224,0.875,bicubic,-46.633,-35.200,+23 -repvgg_b3g4,48.310,51.690,64.800,35.200,83.83,224,0.875,bilinear,-46.180,-34.220,+58 -vit_large_patch32_384,48.250,51.750,61.830,38.170,306.63,384,1.000,bicubic,-46.990,-37.490,+16 -resnetrs350,48.050,51.950,62.653,37.347,163.96,384,1.000,bicubic,-48.190,-36.817,-32 -repvgg_b2g4,47.787,52.213,64.390,35.610,61.76,224,0.875,bilinear,-46.033,-34.610,+109 -eca_nfnet_l1,47.663,52.337,62.767,37.233,41.41,320,1.000,bicubic,-48.267,-36.613,-16 -pit_s_distilled_224,47.543,52.457,63.493,36.507,24.04,224,0.900,bicubic,-47.187,-35.697,+33 -resnest50d_4s2x40d,47.483,52.517,63.807,36.193,30.42,224,0.875,bicubic,-47.227,-35.323,+35 -efficientnet_b3_pruned,47.447,52.553,62.793,37.207,9.86,300,0.904,bicubic,-47.133,-36.277,+45 -vit_base_patch16_224,47.340,52.660,61.607,38.393,86.57,224,0.900,bicubic,-47.870,-37.623,+11 -tresnet_m,47.230,52.770,61.993,38.007,31.39,224,0.875,bilinear,-48.150,-37.157,+2 -tf_efficientnet_b6,47.213,52.787,63.110,36.890,43.04,528,0.942,bicubic,-49.077,-36.410,-42 -ssl_resnext101_32x4d,47.177,52.823,63.367,36.633,44.18,224,0.875,bilinear,-47.983,-35.863,+10 -resnetrs270,47.107,52.893,62.010,37.990,129.86,352,1.000,bicubic,-48.953,-37.480,-32 +seresnet152d,49.247,50.753,64.170,35.830,66.84,320,1.000,bicubic,-47.063,-35.340,-26 +ssl_resnext101_32x8d,49.067,50.933,65.480,34.520,88.79,224,0.875,bilinear,-46.273,-33.840,+25 +repvgg_b3,48.917,51.083,64.887,35.113,123.09,224,0.875,bilinear,-45.633,-34.023,+77 +resnetrs420,48.857,51.143,63.427,36.573,191.89,416,1.000,bicubic,-47.543,-36.113,-35 +dm_nfnet_f2,48.623,51.377,63.537,36.463,193.78,352,0.920,bicubic,-47.877,-36.033,-42 +efficientnetv2_rw_s,48.603,51.397,63.840,36.160,23.94,384,1.000,bicubic,-47.107,-35.540,-1 +efficientnet_b3,48.563,51.437,64.250,35.750,12.23,320,1.000,bicubic,-46.577,-34.960,+33 +ecaresnet101d,48.527,51.473,64.100,35.900,44.57,224,0.875,bicubic,-46.633,-35.130,+29 +repvgg_b3g4,48.310,51.690,64.800,35.200,83.83,224,0.875,bilinear,-46.180,-34.220,+74 +vit_large_patch32_384,48.250,51.750,61.830,38.170,306.63,384,1.000,bicubic,-46.990,-37.490,+21 +convit_base,48.217,51.783,63.000,37.000,86.54,224,0.875,bicubic,-46.883,-36.140,+31 +resnetrs350,48.050,51.950,62.653,37.347,163.96,384,1.000,bicubic,-48.190,-36.817,-33 +twins_svt_large,47.947,52.053,62.907,37.093,99.27,224,0.900,bicubic,-47.773,-36.463,-9 +mixer_b16_224_miil,47.790,52.210,63.400,36.600,59.88,224,0.875,bilinear,-47.090,-35.680,+40 +repvgg_b2g4,47.787,52.213,64.390,35.610,61.76,224,0.875,bilinear,-46.033,-34.540,+125 +eca_nfnet_l1,47.663,52.337,62.767,37.233,41.41,320,1.000,bicubic,-48.267,-36.733,-20 +pit_s_distilled_224,47.543,52.457,63.493,36.507,24.04,224,0.900,bicubic,-47.187,-35.697,+45 +resnest50d_4s2x40d,47.483,52.517,63.807,36.193,30.42,224,0.875,bicubic,-47.227,-35.323,+47 +efficientnet_b3_pruned,47.447,52.553,62.793,37.207,9.86,300,0.904,bicubic,-47.133,-36.277,+58 +vit_base_patch16_224,47.340,52.660,61.607,38.393,86.57,224,0.900,bicubic,-47.870,-37.623,+14 +tresnet_m,47.230,52.770,61.993,38.007,31.39,224,0.875,bilinear,-48.150,-37.157,+4 +tf_efficientnet_b6,47.213,52.787,63.110,36.890,43.04,528,0.942,bicubic,-49.077,-36.410,-46 +ssl_resnext101_32x4d,47.177,52.823,63.367,36.633,44.18,224,0.875,bilinear,-47.983,-35.933,+15 +resnetrs270,47.107,52.893,62.010,37.990,129.86,352,1.000,bicubic,-48.953,-37.480,-35 tf_efficientnet_b4,47.083,52.917,62.867,37.133,19.34,380,0.922,bicubic,-48.507,-36.463,-14 -resnet101d,46.893,53.107,62.317,37.683,44.57,320,1.000,bicubic,-48.857,-37.123,-23 -resnetrs200,46.837,53.163,62.487,37.513,93.21,320,1.000,bicubic,-49.153,-36.953,-31 -resnetv2_50x3_bitm,46.827,53.173,64.873,35.127,217.32,480,1.000,bilinear,-49.313,-34.747,-43 -dm_nfnet_f1,46.693,53.307,61.560,38.440,132.63,320,0.910,bicubic,-49.677,-37.910,-55 -gluon_seresnext101_64x4d,46.677,53.323,61.303,38.697,88.23,224,0.875,bicubic,-47.973,-37.677,+29 -tresnet_xl,46.283,53.717,61.943,38.057,78.44,224,0.875,bilinear,-48.777,-37.317,+7 -vit_deit_small_distilled_patch16_224,46.160,53.840,62.417,37.583,22.44,224,0.900,bicubic,-48.430,-36.683,+31 -regnety_160,46.153,53.847,61.837,38.163,83.59,288,1.000,bicubic,-49.727,-37.723,-32 -gernet_m,46.150,53.850,62.700,37.300,21.14,224,0.875,bilinear,-48.400,-36.550,+34 -resnest50d_1s4x24d,46.083,53.917,62.377,37.623,25.68,224,0.875,bicubic,-48.307,-36.693,+40 -tf_efficientnet_b0_ns,46.047,53.953,63.253,36.747,5.29,224,0.875,bicubic,-47.693,-35.727,+100 -resnest50d,45.937,54.063,62.623,37.377,27.48,224,0.875,bilinear,-48.683,-36.407,+23 -regnety_032,45.893,54.107,61.537,38.463,19.44,288,1.000,bicubic,-49.577,-37.783,-19 -gluon_seresnext101_32x4d,45.590,54.410,61.143,38.857,48.96,224,0.875,bicubic,-48.860,-37.947,+33 -gluon_resnet152_v1d,45.430,54.570,60.077,39.923,60.21,224,0.875,bicubic,-49.010,-38.933,+33 -dm_nfnet_f0,45.420,54.580,60.990,39.010,71.49,256,0.900,bicubic,-50.210,-38.310,-32 -ssl_resnext50_32x4d,45.407,54.593,62.047,37.953,25.03,224,0.875,bilinear,-49.293,-37.193,+12 -nfnet_l0,45.390,54.610,62.057,37.943,35.07,288,1.000,bicubic,-50.000,-37.363,-21 -tresnet_xl_448,45.223,54.777,61.437,38.563,78.44,448,0.875,bilinear,-50.287,-37.903,-28 -nasnetalarge,45.210,54.790,57.883,42.117,88.75,331,0.911,bicubic,-49.940,-41.247,-10 -swin_small_patch4_window7_224,45.163,54.837,60.330,39.670,49.61,224,0.900,bicubic,-50.557,-38.960,-41 -tf_efficientnet_b3,45.107,54.893,60.650,39.350,12.23,300,0.904,bicubic,-49.803,-38.460,-4 -rexnet_200,45.047,54.953,62.317,37.683,16.37,224,0.875,bicubic,-49.613,-36.773,+9 -resnetrs152,44.943,55.057,59.713,40.287,86.62,320,1.000,bicubic,-51.017,-39.667,-51 -ecaresnetlight,44.890,55.110,60.770,39.230,30.16,224,0.875,bicubic,-49.250,-38.180,+43 -vit_deit_base_patch16_224,44.870,55.130,59.177,40.823,86.57,224,0.900,bicubic,-50.140,-39.803,-12 -vit_deit_base_patch16_384,44.777,55.223,59.617,40.383,86.86,384,1.000,bicubic,-50.873,-39.623,-44 -cait_xxs36_384,44.773,55.227,59.380,40.620,17.37,384,1.000,bicubic,-50.447,-39.940,-23 -gernet_l,44.740,55.260,58.943,41.057,31.08,256,0.875,bilinear,-50.190,-40.257,-13 -tf_efficientnet_b2_ap,44.700,55.300,60.680,39.320,9.11,260,0.890,bicubic,-49.570,-38.270,+29 -vit_base_patch32_384,44.693,55.307,58.530,41.470,88.30,384,1.000,bicubic,-50.567,-40.650,-29 -ens_adv_inception_resnet_v2,44.393,55.607,58.117,41.883,55.84,299,0.897,bicubic,-49.737,-40.673,+38 -tresnet_l,44.363,55.637,59.953,40.047,55.99,224,0.875,bilinear,-50.537,-39.077,-14 -gluon_resnext101_32x4d,44.290,55.710,59.090,40.910,44.18,224,0.875,bicubic,-49.830,-39.840,+39 -wide_resnet50_2,44.177,55.823,59.727,40.273,68.88,224,0.875,bicubic,-50.493,-39.403,-5 -cspresnext50,44.147,55.853,60.533,39.467,20.57,224,0.875,bilinear,-49.613,-38.307,+70 -seresnext50_32x4d,44.127,55.873,59.490,40.510,27.56,224,0.875,bicubic,-50.693,-39.640,-15 -gluon_resnet152_v1s,44.073,55.927,58.703,41.297,60.32,224,0.875,bicubic,-50.647,-40.357,-12 -pit_b_224,44.070,55.930,58.017,41.983,73.76,224,0.900,bicubic,-50.720,-40.803,-16 -ssl_resnet50,44.010,55.990,61.887,38.113,25.56,224,0.875,bilinear,-50.300,-37.263,+16 -inception_resnet_v2,44.003,55.997,57.907,42.093,55.84,299,0.897,bicubic,-50.337,-40.893,+14 -pnasnet5large,43.950,56.050,56.730,43.270,86.06,331,0.911,bicubic,-51.410,-42.400,-43 -pit_s_224,43.890,56.110,58.627,41.373,23.46,224,0.900,bicubic,-50.700,-40.303,-6 -gluon_resnext101_64x4d,43.877,56.123,58.710,41.290,83.46,224,0.875,bicubic,-50.473,-40.170,+10 -tnt_s_patch16_224,43.773,56.227,59.197,40.803,23.76,224,0.900,bicubic,-50.807,-39.983,-5 -cait_xxs36_224,43.760,56.240,58.720,41.280,17.30,224,1.000,bicubic,-50.180,-40.200,+43 -ecaresnet50d,43.750,56.250,60.387,39.613,25.58,224,0.875,bicubic,-50.440,-38.633,+17 -ecaresnet101d_pruned,43.737,56.263,59.607,40.393,24.88,224,0.875,bicubic,-50.713,-39.493,-2 -rexnet_150,43.690,56.310,60.897,39.103,9.73,224,0.875,bicubic,-50.580,-38.183,+9 -pit_xs_distilled_224,43.663,56.337,60.703,39.297,11.00,224,0.900,bicubic,-49.577,-38.117,+100 -gluon_resnet101_v1d,43.440,56.560,58.613,41.387,44.57,224,0.875,bicubic,-50.730,-40.297,+16 -ecaresnet50t,43.407,56.593,59.300,40.700,25.57,320,0.950,bicubic,-51.663,-39.990,-40 -gluon_resnet101_v1s,43.363,56.637,58.503,41.497,44.67,224,0.875,bicubic,-50.807,-40.507,+13 -cspdarknet53,43.357,56.643,59.430,40.570,27.64,256,0.887,bilinear,-50.733,-39.550,+20 -dpn68b,43.287,56.713,58.673,41.327,12.61,224,0.875,bicubic,-50.333,-40.287,+69 -eca_nfnet_l0,43.230,56.770,59.913,40.087,24.14,288,1.000,bicubic,-52.240,-39.467,-63 -resnest26d,43.140,56.860,60.623,39.377,17.07,224,0.875,bilinear,-50.100,-38.227,+94 -resnetv2_101x1_bitm,43.113,56.887,60.950,39.050,44.54,480,1.000,bilinear,-52.397,-38.560,-68 -dpn131,43.047,56.953,57.440,42.560,79.25,224,0.875,bicubic,-50.713,-41.360,+48 -cspresnet50,43.030,56.970,59.153,40.847,21.62,256,0.887,bilinear,-50.830,-39.717,+35 +resnet101d,46.893,53.107,62.317,37.683,44.57,320,1.000,bicubic,-48.857,-37.123,-26 +resnetrs200,46.837,53.163,62.487,37.513,93.21,320,1.000,bicubic,-49.153,-36.953,-34 +resnetv2_50x3_bitm,46.827,53.173,64.873,35.127,217.32,480,1.000,bilinear,-49.313,-34.747,-46 +dm_nfnet_f1,46.693,53.307,61.560,38.440,132.63,320,0.910,bicubic,-49.677,-37.990,-58 +gluon_seresnext101_64x4d,46.677,53.323,61.303,38.697,88.23,224,0.875,bicubic,-47.973,-37.677,+41 +twins_pcpvt_large,46.637,53.363,62.240,37.760,60.99,224,0.900,bicubic,-49.083,-37.250,-28 +tresnet_xl,46.283,53.717,61.943,38.057,78.44,224,0.875,bilinear,-48.777,-37.317,+13 +vit_deit_small_distilled_patch16_224,46.160,53.840,62.417,37.583,22.44,224,0.900,bicubic,-48.430,-36.683,+43 +regnety_160,46.153,53.847,61.837,38.163,83.59,288,1.000,bicubic,-49.727,-37.723,-36 +gernet_m,46.150,53.850,62.700,37.300,21.14,224,0.875,bilinear,-48.400,-36.230,+44 +resnest50d_1s4x24d,46.083,53.917,62.377,37.623,25.68,224,0.875,bicubic,-48.307,-36.693,+53 +tf_efficientnet_b0_ns,46.047,53.953,63.253,36.747,5.29,224,0.875,bicubic,-47.693,-35.727,+114 +resnet51q,46.027,53.973,60.910,39.090,35.70,288,1.000,bilinear,-49.173,-38.370,-3 +resnest50d,45.937,54.063,62.623,37.377,27.48,224,0.875,bilinear,-48.683,-36.407,+33 +regnety_032,45.893,54.107,61.537,38.463,19.44,288,1.000,bicubic,-49.577,-37.843,-21 +twins_pcpvt_base,45.893,54.107,61.337,38.663,43.83,224,0.900,bicubic,-49.567,-38.053,-20 +levit_384,45.877,54.123,61.693,38.307,39.13,224,0.900,bicubic,-49.333,-37.467,-9 +twins_svt_base,45.877,54.123,60.967,39.033,56.07,224,0.900,bicubic,-49.693,-38.263,-30 +gluon_seresnext101_32x4d,45.590,54.410,61.143,38.857,48.96,224,0.875,bicubic,-48.860,-37.957,+40 +gluon_resnet152_v1d,45.430,54.570,60.077,39.923,60.21,224,0.875,bicubic,-49.010,-38.933,+41 +dm_nfnet_f0,45.420,54.580,60.990,39.010,71.49,256,0.900,bicubic,-50.210,-38.310,-37 +ssl_resnext50_32x4d,45.407,54.593,62.047,37.953,25.03,224,0.875,bilinear,-49.293,-37.193,+19 +nfnet_l0,45.390,54.610,62.057,37.943,35.07,288,1.000,bicubic,-50.000,-37.363,-24 +tresnet_xl_448,45.223,54.777,61.437,38.563,78.44,448,0.875,bilinear,-50.287,-37.903,-32 +nasnetalarge,45.210,54.790,57.883,42.117,88.75,331,0.911,bicubic,-49.940,-41.247,-11 +convit_small,45.203,54.797,60.510,39.490,27.78,224,0.875,bicubic,-49.717,-38.600,0 +swin_small_patch4_window7_224,45.163,54.837,60.330,39.670,49.61,224,0.900,bicubic,-50.557,-38.960,-50 +tf_efficientnet_b3,45.107,54.893,60.650,39.350,12.23,300,0.904,bicubic,-49.803,-38.460,-1 +rexnet_200,45.047,54.953,62.317,37.683,16.37,224,0.875,bicubic,-49.613,-36.833,+16 +resnetrs152,44.943,55.057,59.713,40.287,86.62,320,1.000,bicubic,-51.017,-39.667,-60 +ecaresnetlight,44.890,55.110,60.770,39.230,30.16,224,0.875,bicubic,-49.250,-38.260,+52 +vit_deit_base_patch16_224,44.870,55.130,59.177,40.823,86.57,224,0.900,bicubic,-50.140,-39.803,-11 +vit_deit_base_patch16_384,44.777,55.223,59.617,40.383,86.86,384,1.000,bicubic,-50.873,-39.773,-51 +cait_xxs36_384,44.773,55.227,59.380,40.620,17.37,384,1.000,bicubic,-50.447,-39.940,-27 +gernet_l,44.740,55.260,58.943,41.057,31.08,256,0.875,bilinear,-50.190,-40.257,-11 +tf_efficientnet_b2_ap,44.700,55.300,60.680,39.320,9.11,260,0.890,bicubic,-49.570,-38.400,+36 +vit_base_patch32_384,44.693,55.307,58.530,41.470,88.30,384,1.000,bicubic,-50.567,-40.650,-33 +ens_adv_inception_resnet_v2,44.393,55.607,58.117,41.883,55.84,299,0.897,bicubic,-49.737,-40.673,+46 +tresnet_l,44.363,55.637,59.953,40.047,55.99,224,0.875,bilinear,-50.537,-39.077,-11 +gluon_resnext101_32x4d,44.290,55.710,59.090,40.910,44.18,224,0.875,bicubic,-49.830,-39.840,+47 +wide_resnet50_2,44.177,55.823,59.727,40.273,68.88,224,0.875,bicubic,-50.493,-39.323,+2 +cspresnext50,44.147,55.853,60.533,39.467,20.57,224,0.875,bilinear,-49.613,-38.167,+81 +seresnext50_32x4d,44.127,55.873,59.490,40.510,27.56,224,0.875,bicubic,-50.693,-39.640,-11 +gluon_resnet152_v1s,44.073,55.927,58.703,41.297,60.32,224,0.875,bicubic,-50.647,-40.357,-6 +pit_b_224,44.070,55.930,58.017,41.983,73.76,224,0.900,bicubic,-50.720,-40.803,-12 +ssl_resnet50,44.010,55.990,61.887,38.113,25.56,224,0.875,bilinear,-50.300,-37.263,+24 +inception_resnet_v2,44.003,55.997,57.907,42.093,55.84,299,0.897,bicubic,-50.337,-40.893,+22 +pnasnet5large,43.950,56.050,56.730,43.270,86.06,331,0.911,bicubic,-51.410,-42.400,-47 +pit_s_224,43.890,56.110,58.627,41.373,23.46,224,0.900,bicubic,-50.700,-40.303,+1 +gluon_resnext101_64x4d,43.877,56.123,58.710,41.290,83.46,224,0.875,bicubic,-50.473,-40.170,+18 +coat_lite_small,43.823,56.177,57.147,42.853,19.84,224,0.900,bicubic,-51.257,-41.873,-33 +tnt_s_patch16_224,43.773,56.227,59.197,40.803,23.76,224,0.900,bicubic,-50.807,-39.983,+1 +cait_xxs36_224,43.760,56.240,58.720,41.280,17.30,224,1.000,bicubic,-50.180,-40.170,+49 +ecaresnet50d,43.750,56.250,60.387,39.613,25.58,224,0.875,bicubic,-50.440,-38.633,+24 +ecaresnet101d_pruned,43.737,56.263,59.607,40.393,24.88,224,0.875,bicubic,-50.713,-39.483,+5 +tf_efficientnetv2_s,43.710,56.290,58.597,41.403,21.46,384,1.000,bicubic,-52.000,-40.803,-75 +rexnet_150,43.690,56.310,60.897,39.103,9.73,224,0.875,bicubic,-50.580,-38.053,+16 +pit_xs_distilled_224,43.663,56.337,60.703,39.297,11.00,224,0.900,bicubic,-49.577,-38.147,+111 +gluon_resnet101_v1d,43.440,56.560,58.613,41.387,44.57,224,0.875,bicubic,-50.730,-40.327,+20 +ecaresnet50t,43.407,56.593,59.300,40.700,25.57,320,0.950,bicubic,-51.663,-39.990,-41 +gluon_resnet101_v1s,43.363,56.637,58.503,41.497,44.67,224,0.875,bicubic,-50.807,-40.507,+19 +cspdarknet53,43.357,56.643,59.430,40.570,27.64,256,0.887,bilinear,-50.733,-39.550,+26 +dpn68b,43.287,56.713,58.673,41.327,12.61,224,0.875,bicubic,-50.333,-40.027,+77 +visformer_small,43.253,56.747,57.993,42.007,40.22,224,0.900,bicubic,-51.707,-41.217,-41 +eca_nfnet_l0,43.230,56.770,59.913,40.087,24.14,288,1.000,bicubic,-52.240,-39.407,-70 +resnest26d,43.140,56.860,60.623,39.377,17.07,224,0.875,bilinear,-50.100,-38.127,+104 +resnetv2_101x1_bitm,43.113,56.887,60.950,39.050,44.54,480,1.000,bilinear,-52.397,-38.560,-76 +twins_pcpvt_small,43.090,56.910,58.873,41.127,24.11,224,0.900,bicubic,-51.510,-40.277,-19 +dpn131,43.047,56.953,57.440,42.560,79.25,224,0.875,bicubic,-50.713,-41.420,+55 +cspresnet50,43.030,56.970,59.153,40.847,21.62,256,0.887,bilinear,-50.830,-39.717,+40 tf_efficientnet_lite4,42.967,57.033,57.620,42.380,13.01,380,0.920,bilinear,-51.903,-41.470,-41 -gluon_resnet152_v1b,42.903,57.097,57.750,42.250,60.19,224,0.875,bicubic,-51.127,-40.990,+16 -dpn107,42.857,57.143,57.367,42.633,86.92,224,0.875,bicubic,-51.103,-41.473,+22 -tf_efficientnet_b1_ap,42.803,57.197,58.813,41.187,7.79,240,0.882,bicubic,-50.827,-39.987,+58 -gluon_resnet152_v1c,42.800,57.200,57.737,42.263,60.21,224,0.875,bicubic,-51.080,-41.063,+28 -gluon_xception65,42.793,57.207,58.820,41.180,39.92,299,0.903,bicubic,-51.217,-40.200,+15 -tresnet_l_448,42.753,57.247,58.947,41.053,55.99,448,0.875,bilinear,-52.657,-40.353,-71 -resnet50d,42.703,57.297,58.697,41.303,25.58,224,0.875,bicubic,-51.367,-40.223,+9 -gluon_seresnext50_32x4d,42.683,57.317,58.710,41.290,27.56,224,0.875,bicubic,-51.487,-40.230,-4 -resnext101_32x8d,42.557,57.443,58.317,41.683,88.79,224,0.875,bilinear,-51.213,-40.633,+35 -seresnet50,42.510,57.490,58.667,41.333,28.09,224,0.875,bicubic,-51.570,-40.303,+5 -resnetrs101,42.437,57.563,57.300,42.700,63.62,288,0.940,bicubic,-52.813,-41.910,-69 -nf_resnet50,42.400,57.600,59.540,40.460,25.56,288,0.940,bicubic,-52.010,-39.560,-24 -dpn98,42.280,57.720,56.880,43.120,61.57,224,0.875,bicubic,-51.660,-42.010,+13 -vit_deit_small_patch16_224,42.263,57.737,58.020,41.980,22.05,224,0.900,bicubic,-51.737,-40.940,+9 -tf_efficientnet_cc_b1_8e,42.233,57.767,58.420,41.580,39.72,240,0.882,bicubic,-51.337,-40.270,+54 -legacy_senet154,42.207,57.793,56.597,43.403,115.09,224,0.875,bilinear,-52.523,-42.503,-53 -cait_xxs24_384,42.187,57.813,57.460,42.540,12.03,384,1.000,bicubic,-52.733,-41.680,-61 -tf_efficientnet_b2,42.120,57.880,58.197,41.803,9.11,260,0.890,bicubic,-52.090,-40.833,-17 -gluon_resnext50_32x4d,42.043,57.957,57.667,42.333,25.03,224,0.875,bicubic,-51.607,-41.023,+39 -resnet50,42.013,57.987,56.000,44.000,25.56,224,0.875,bicubic,-51.447,-42.600,+57 -ecaresnet50d_pruned,41.953,58.047,58.217,41.783,19.94,224,0.875,bicubic,-51.867,-40.713,+18 -efficientnet_b2,41.933,58.067,58.300,41.700,9.11,288,1.000,bicubic,-52.437,-40.750,-31 -dla102x2,41.647,58.353,57.967,42.033,41.28,224,0.875,bilinear,-52.353,-41.063,-1 -hrnet_w64,41.637,58.363,57.130,42.870,128.06,224,0.875,bilinear,-52.193,-41.800,+13 -gluon_senet154,41.627,58.373,56.373,43.627,115.09,224,0.875,bicubic,-53.083,-42.597,-59 -inception_v4,41.577,58.423,55.383,44.617,42.68,299,0.875,bicubic,-52.803,-43.437,-36 -efficientnet_el,41.497,58.503,58.303,41.697,10.59,300,0.904,bicubic,-53.173,-40.747,-57 -efficientnet_em,41.493,58.507,58.877,41.123,6.90,240,0.882,bicubic,-52.247,-40.053,+21 -tf_efficientnet_cc_b0_8e,41.487,58.513,57.377,42.623,24.01,224,0.875,bicubic,-51.383,-41.083,+86 -swin_tiny_patch4_window7_224,41.457,58.543,57.303,42.697,28.29,224,0.900,bicubic,-53.163,-41.817,-55 -resnext50_32x4d,41.443,58.557,56.997,43.003,25.03,224,0.875,bicubic,-52.397,-41.833,+5 -cait_xxs24_224,41.383,58.617,57.527,42.473,11.96,224,1.000,bicubic,-52.107,-41.243,+43 -tv_resnet152,41.330,58.670,57.520,42.480,60.19,224,0.875,bilinear,-51.910,-41.230,+58 -xception71,41.270,58.730,55.873,44.127,42.34,299,0.903,bicubic,-52.620,-43.077,-3 -dpn92,41.267,58.733,56.333,43.667,37.67,224,0.875,bicubic,-52.923,-42.597,-33 -adv_inception_v3,41.263,58.737,56.317,43.683,23.83,299,0.875,bicubic,-51.747,-42.173,+65 -gernet_s,41.247,58.753,58.830,41.170,8.17,224,0.875,bilinear,-51.193,-39.670,+101 -resnetblur50,41.053,58.947,57.077,42.923,25.56,224,0.875,bicubic,-52.657,-41.723,+16 -nf_regnet_b1,41.010,58.990,58.117,41.883,10.22,288,0.900,bicubic,-52.880,-40.633,-10 -gluon_resnet50_v1d,40.970,59.030,57.137,42.863,25.58,224,0.875,bicubic,-52.560,-41.573,+32 -gluon_inception_v3,40.907,59.093,55.617,44.383,23.83,299,0.875,bicubic,-52.633,-43.213,+29 -ese_vovnet39b,40.867,59.133,56.950,43.050,24.57,224,0.875,bicubic,-52.983,-41.950,-7 -regnety_320,40.813,59.187,56.117,43.883,145.05,224,0.875,bicubic,-53.707,-43.053,-60 -resnet34d,40.810,59.190,56.530,43.470,21.82,224,0.875,bicubic,-51.830,-41.890,+82 -xception,40.763,59.237,56.387,43.613,22.86,299,0.897,bicubic,-52.877,-42.383,+14 -skresnext50_32x4d,40.700,59.300,56.023,43.977,27.48,224,0.875,bicubic,-53.250,-42.797,-21 -gluon_resnet101_v1b,40.683,59.317,56.117,43.883,44.55,224,0.875,bicubic,-53.077,-42.583,0 -hrnet_w40,40.660,59.340,56.757,43.243,57.56,224,0.875,bilinear,-53.050,-42.043,+4 -repvgg_b1,40.593,59.407,57.837,42.163,57.42,224,0.875,bilinear,-52.817,-40.953,+33 +twins_svt_small,42.923,57.077,58.453,41.547,24.06,224,0.900,bicubic,-51.847,-40.627,-37 +gluon_resnet152_v1b,42.903,57.097,57.750,42.250,60.19,224,0.875,bicubic,-51.127,-40.990,+20 +dpn107,42.857,57.143,57.367,42.633,86.92,224,0.875,bicubic,-51.103,-41.473,+26 +levit_256,42.823,57.177,57.897,42.103,18.89,224,0.900,bicubic,-51.577,-41.163,-12 +tf_efficientnet_b1_ap,42.803,57.197,58.813,41.187,7.79,240,0.882,bicubic,-50.827,-39.987,+63 +gluon_resnet152_v1c,42.800,57.200,57.737,42.263,60.21,224,0.875,bicubic,-51.080,-41.353,+32 +gluon_xception65,42.793,57.207,58.820,41.180,39.92,299,0.903,bicubic,-51.217,-40.200,+18 +tresnet_l_448,42.753,57.247,58.947,41.053,55.99,448,0.875,bilinear,-52.657,-40.353,-81 +resnet50d,42.703,57.297,58.697,41.303,25.58,224,0.875,bicubic,-51.367,-40.223,+11 +gluon_seresnext50_32x4d,42.683,57.317,58.710,41.290,27.56,224,0.875,bicubic,-51.487,-40.200,0 +resnext101_32x8d,42.557,57.443,58.317,41.683,88.79,224,0.875,bilinear,-51.213,-40.633,+38 +seresnet50,42.510,57.490,58.667,41.333,28.09,224,0.875,bicubic,-51.570,-40.303,+7 +resnetrs101,42.437,57.563,57.300,42.700,63.62,288,0.940,bicubic,-52.813,-41.910,-79 +nf_resnet50,42.400,57.600,59.540,40.460,25.56,288,0.940,bicubic,-52.010,-39.560,-23 +tf_efficientnetv2_b3,42.313,57.687,57.940,42.060,14.36,300,0.904,bicubic,-52.807,-41.260,-70 +dpn98,42.280,57.720,56.880,43.120,61.57,224,0.875,bicubic,-51.660,-42.040,+16 +vit_deit_small_patch16_224,42.263,57.737,58.020,41.980,22.05,224,0.900,bicubic,-51.737,-40.940,+11 +tf_efficientnet_cc_b1_8e,42.233,57.767,58.420,41.580,39.72,240,0.882,bicubic,-51.337,-40.270,+58 +legacy_senet154,42.207,57.793,56.597,43.403,115.09,224,0.875,bilinear,-52.523,-42.503,-54 +cait_xxs24_384,42.187,57.813,57.460,42.540,12.03,384,1.000,bicubic,-52.733,-41.680,-66 +tf_efficientnet_b2,42.120,57.880,58.197,41.803,9.11,260,0.890,bicubic,-52.090,-40.833,-16 +gluon_resnext50_32x4d,42.043,57.957,57.667,42.333,25.03,224,0.875,bicubic,-51.607,-41.023,+43 +resnet50,42.013,57.987,56.000,44.000,25.56,224,0.875,bicubic,-51.447,-42.600,+62 +ecaresnet50d_pruned,41.953,58.047,58.217,41.783,19.94,224,0.875,bicubic,-51.867,-40.783,+19 +efficientnet_b2,41.933,58.067,58.300,41.700,9.11,288,1.000,bicubic,-52.437,-40.750,-30 +dla102x2,41.647,58.353,57.967,42.033,41.28,224,0.875,bilinear,-52.353,-41.063,+1 +hrnet_w64,41.637,58.363,57.130,42.870,128.06,224,0.875,bilinear,-52.193,-41.800,+15 +gluon_senet154,41.627,58.373,56.373,43.627,115.09,224,0.875,bicubic,-53.083,-42.597,-60 +inception_v4,41.577,58.423,55.383,44.617,42.68,299,0.875,bicubic,-52.803,-43.437,-35 +efficientnet_el,41.497,58.503,58.303,41.697,10.59,300,0.904,bicubic,-53.173,-40.827,-59 +efficientnet_em,41.493,58.507,58.877,41.123,6.90,240,0.882,bicubic,-52.247,-40.053,+23 +tf_efficientnet_cc_b0_8e,41.487,58.513,57.377,42.623,24.01,224,0.875,bicubic,-51.383,-41.083,+93 +swin_tiny_patch4_window7_224,41.457,58.543,57.303,42.697,28.29,224,0.900,bicubic,-53.163,-41.817,-56 +resnext50_32x4d,41.443,58.557,56.997,43.003,25.03,224,0.875,bicubic,-52.397,-41.833,+7 +cait_xxs24_224,41.383,58.617,57.527,42.473,11.96,224,1.000,bicubic,-52.107,-41.243,+48 +tv_resnet152,41.327,58.673,57.520,42.480,60.19,224,0.875,bilinear,-51.913,-41.300,+61 +xception71,41.270,58.730,55.873,44.127,42.34,299,0.903,bicubic,-52.620,-42.877,-3 +dpn92,41.267,58.733,56.333,43.667,37.67,224,0.875,bicubic,-52.923,-42.597,-32 +adv_inception_v3,41.263,58.737,56.317,43.683,23.83,299,0.875,bicubic,-51.747,-42.173,+72 +gernet_s,41.247,58.753,58.830,41.170,8.17,224,0.875,bilinear,-51.193,-39.710,+109 +resnetblur50,41.053,58.947,57.077,42.923,25.56,224,0.875,bicubic,-52.657,-41.733,+18 +nf_regnet_b1,41.010,58.990,58.117,41.883,10.22,288,0.900,bicubic,-52.880,-40.833,-6 +gluon_resnet50_v1d,40.970,59.030,57.137,42.863,25.58,224,0.875,bicubic,-52.560,-41.573,+36 +gluon_inception_v3,40.907,59.093,55.617,44.383,23.83,299,0.875,bicubic,-52.633,-43.213,+33 +ese_vovnet39b,40.867,59.133,56.947,43.053,24.57,224,0.875,bicubic,-52.983,-41.953,-5 +levit_192,40.847,59.153,56.687,43.313,10.95,224,0.900,bicubic,-52.863,-42.103,+12 +regnety_320,40.813,59.187,56.117,43.883,145.05,224,0.875,bicubic,-53.707,-43.053,-61 +resnet34d,40.810,59.190,56.530,43.470,21.82,224,0.875,bicubic,-51.830,-41.890,+88 +xception,40.763,59.237,56.387,43.613,22.86,299,0.897,bicubic,-52.877,-42.383,+17 +skresnext50_32x4d,40.700,59.300,56.023,43.977,27.48,224,0.875,bicubic,-53.250,-42.797,-20 +gluon_resnet101_v1b,40.683,59.317,56.117,43.883,44.55,224,0.875,bicubic,-53.077,-42.723,-1 +hrnet_w40,40.660,59.340,56.753,43.247,57.56,224,0.875,bilinear,-53.050,-42.047,+5 +repvgg_b1,40.593,59.407,57.837,42.163,57.42,224,0.875,bilinear,-52.817,-40.953,+37 tf_efficientnet_lite3,40.563,59.437,56.477,43.523,8.20,300,0.904,bilinear,-53.567,-42.483,-39 -tresnet_m_448,40.530,59.470,56.700,43.300,31.39,448,0.875,bilinear,-54.130,-42.450,-79 -pit_xs_224,40.497,59.503,56.530,43.470,10.62,224,0.900,bicubic,-52.413,-42.250,+58 -dla169,40.493,59.507,57.263,42.737,53.39,224,0.875,bilinear,-53.307,-41.647,-11 -repvgg_b2,40.467,59.533,57.780,42.220,89.02,224,0.875,bilinear,-53.123,-41.290,+12 +tresnet_m_448,40.530,59.470,56.700,43.300,31.39,448,0.875,bilinear,-54.130,-42.390,-82 +pit_xs_224,40.497,59.503,56.530,43.470,10.62,224,0.900,bicubic,-52.413,-42.250,+64 +dla169,40.493,59.507,57.263,42.737,53.39,224,0.875,bilinear,-53.307,-41.577,-11 +repvgg_b2,40.467,59.533,57.780,42.220,89.02,224,0.875,bilinear,-53.123,-41.060,+14 regnetx_320,40.443,59.557,55.660,44.340,107.81,224,0.875,bicubic,-53.767,-43.390,-55 -skresnet34,40.397,59.603,56.737,43.263,22.28,224,0.875,bicubic,-52.173,-41.783,+77 -efficientnet_el_pruned,40.390,59.610,56.903,43.097,10.59,300,0.904,bicubic,-53.700,-42.107,-43 -efficientnet_b2_pruned,40.383,59.617,56.537,43.463,8.31,260,0.890,bicubic,-53.417,-42.303,-17 -legacy_seresnext101_32x4d,40.360,59.640,54.817,45.183,48.96,224,0.875,bilinear,-53.770,-44.153,-50 +coat_mini,40.420,59.580,55.167,44.833,10.34,224,0.900,bicubic,-54.350,-43.783,-97 +skresnet34,40.397,59.603,56.737,43.263,22.28,224,0.875,bicubic,-52.173,-41.783,+82 +efficientnet_el_pruned,40.390,59.610,56.903,43.097,10.59,300,0.904,bicubic,-53.700,-42.107,-44 +efficientnet_b2_pruned,40.383,59.617,56.537,43.463,8.31,260,0.890,bicubic,-53.417,-42.373,-16 +coat_lite_mini,40.360,59.640,55.717,44.283,11.01,224,0.900,bicubic,-53.090,-43.063,+22 +legacy_seresnext101_32x4d,40.360,59.640,54.817,45.183,48.96,224,0.875,bilinear,-53.770,-44.153,-51 wide_resnet101_2,40.360,59.640,55.780,44.220,126.89,224,0.875,bilinear,-53.370,-43.030,-10 -coat_lite_mini,40.360,59.640,55.717,44.283,11.01,224,0.900,bicubic,-53.090,-43.063,+19 -tf_efficientnet_b0_ap,40.337,59.663,56.787,43.213,5.29,224,0.875,bicubic,-52.273,-41.583,+66 -xception65,40.273,59.727,55.283,44.717,39.92,299,0.903,bicubic,-53.487,-43.577,-15 -regnetx_160,40.270,59.730,56.050,43.950,54.28,224,0.875,bicubic,-53.610,-43.040,-31 -densenet201,40.267,59.733,56.710,43.290,20.01,224,0.875,bicubic,-52.423,-41.940,+57 +tf_efficientnet_b0_ap,40.337,59.663,56.787,43.213,5.29,224,0.875,bicubic,-52.273,-41.723,+70 +xception65,40.273,59.727,55.283,44.717,39.92,299,0.903,bicubic,-53.487,-43.517,-17 +regnetx_160,40.270,59.730,56.050,43.950,54.28,224,0.875,bicubic,-53.610,-42.750,-32 +densenet201,40.267,59.733,56.710,43.290,20.01,224,0.875,bicubic,-52.423,-41.940,+62 resnext50d_32x4d,40.170,59.830,55.487,44.513,25.05,224,0.875,bicubic,-53.640,-43.253,-26 -vit_small_patch16_224,40.130,59.870,56.543,43.457,48.75,224,0.900,bicubic,-52.470,-41.887,+62 +vit_small_patch16_224,40.130,59.870,56.543,43.457,48.75,224,0.900,bicubic,-52.470,-41.847,+68 hrnet_w48,40.093,59.907,56.640,43.360,77.47,224,0.875,bilinear,-53.937,-42.400,-50 -legacy_seresnet152,40.043,59.957,55.820,44.180,66.82,224,0.875,bilinear,-53.397,-43.030,+11 -hrnet_w30,40.030,59.970,57.093,42.907,37.71,224,0.875,bilinear,-53.340,-41.737,+14 +legacy_seresnet152,40.043,59.957,55.820,44.180,66.82,224,0.875,bilinear,-53.397,-43.030,+14 +hrnet_w30,40.030,59.970,57.093,42.907,37.71,224,0.875,bilinear,-53.340,-41.737,+17 regnetx_080,40.000,60.000,55.977,44.023,39.57,224,0.875,bicubic,-53.790,-42.933,-28 -tf_efficientnet_b1,39.977,60.023,56.137,43.863,7.79,240,0.882,bicubic,-53.733,-42.673,-19 -gluon_resnet101_v1c,39.953,60.047,55.300,44.700,44.57,224,0.875,bicubic,-53.737,-43.460,-18 -res2net101_26w_4s,39.717,60.283,54.550,45.450,45.21,224,0.875,bilinear,-53.803,-44.050,-1 -regnetx_120,39.687,60.313,55.633,44.367,46.11,224,0.875,bicubic,-54.583,-43.557,-79 -hrnet_w44,39.677,60.323,55.333,44.667,67.06,224,0.875,bilinear,-53.943,-43.617,-12 -densenet161,39.620,60.380,56.133,43.867,28.68,224,0.875,bicubic,-53.280,-42.677,+34 -mixnet_xl,39.617,60.383,55.887,44.113,11.90,224,0.875,bicubic,-54.613,-42.933,-79 -xception41,39.610,60.390,55.037,44.963,26.97,299,0.903,bicubic,-53.870,-43.713,-3 -res2net50_26w_8s,39.603,60.397,54.550,45.450,48.40,224,0.875,bilinear,-53.847,-44.150,-1 +tf_efficientnet_b1,39.977,60.023,56.137,43.863,7.79,240,0.882,bicubic,-53.733,-42.663,-17 +gluon_resnet101_v1c,39.953,60.047,55.300,44.700,44.57,224,0.875,bicubic,-53.737,-43.460,-16 +tf_efficientnetv2_b0,39.787,60.213,56.283,43.717,7.14,224,0.875,bicubic,-53.273,-42.417,+26 +res2net101_26w_4s,39.717,60.283,54.550,45.450,45.21,224,0.875,bilinear,-53.803,-44.050,0 +regnetx_120,39.687,60.313,55.633,44.367,46.11,224,0.875,bicubic,-54.583,-43.557,-81 +hrnet_w44,39.677,60.323,55.333,44.667,67.06,224,0.875,bilinear,-53.943,-43.627,-12 +densenet161,39.620,60.380,56.133,43.867,28.68,224,0.875,bicubic,-53.280,-42.587,+40 +mixnet_xl,39.617,60.383,55.887,44.113,11.90,224,0.875,bicubic,-54.613,-42.933,-81 +xception41,39.610,60.390,55.037,44.963,26.97,299,0.903,bicubic,-53.870,-43.713,-1 +res2net50_26w_8s,39.603,60.397,54.550,45.450,48.40,224,0.875,bilinear,-53.847,-44.150,+1 +tf_efficientnetv2_b1,39.570,60.430,55.343,44.657,8.14,240,0.882,bicubic,-54.140,-43.477,-26 dla102x,39.553,60.447,56.323,43.677,26.31,224,0.875,bilinear,-53.977,-42.527,-10 rexnet_130,39.487,60.513,56.640,43.360,7.56,224,0.875,bicubic,-54.183,-42.070,-25 -hrnet_w32,39.463,60.537,56.123,43.877,41.23,224,0.875,bilinear,-53.487,-42.447,+23 -regnety_064,39.403,60.597,55.773,44.227,30.58,224,0.875,bicubic,-54.737,-43.257,-76 -densenetblur121d,39.380,60.620,56.640,43.360,8.00,224,0.875,bicubic,-53.020,-41.770,+55 -regnety_120,39.347,60.653,55.277,44.723,51.82,224,0.875,bicubic,-54.663,-43.753,-65 -tv_resnet101,39.307,60.693,55.803,44.197,44.55,224,0.875,bilinear,-53.573,-42.857,+27 -tf_efficientnet_el,39.303,60.697,55.387,44.613,10.59,300,0.904,bicubic,-55.057,-43.713,-96 +hrnet_w32,39.463,60.537,56.123,43.877,41.23,224,0.875,bilinear,-53.487,-42.717,+27 +levit_128,39.433,60.567,55.350,44.650,9.21,224,0.900,bicubic,-53.617,-43.340,+15 +regnety_064,39.403,60.597,55.773,44.227,30.58,224,0.875,bicubic,-54.737,-43.177,-81 +densenetblur121d,39.380,60.620,56.640,43.360,8.00,224,0.875,bicubic,-53.020,-41.770,+57 +regnety_120,39.347,60.653,55.277,44.723,51.82,224,0.875,bicubic,-54.663,-43.753,-68 +tv_resnet101,39.307,60.693,55.803,44.197,44.55,224,0.875,bilinear,-53.573,-42.857,+29 +tf_efficientnet_el,39.303,60.697,55.387,44.613,10.59,300,0.904,bicubic,-55.057,-43.713,-100 tf_inception_v3,39.237,60.763,54.300,45.700,23.83,299,0.875,bicubic,-53.963,-44.180,+3 -gluon_resnet50_v1s,39.233,60.767,55.010,44.990,25.68,224,0.875,bicubic,-54.357,-43.830,-25 -densenet169,39.167,60.833,55.843,44.157,14.15,224,0.875,bicubic,-53.133,-42.747,+53 -legacy_seresnet101,39.037,60.963,55.003,44.997,49.33,224,0.875,bilinear,-54.223,-43.737,-6 -efficientnet_b1_pruned,39.010,60.990,55.647,44.353,6.33,240,0.882,bicubic,-53.970,-42.883,+11 -repvgg_b1g4,38.990,61.010,56.350,43.650,39.97,224,0.875,bilinear,-54.040,-42.470,+5 -inception_v3,38.960,61.040,53.853,46.147,23.83,299,0.875,bicubic,-53.940,-44.477,+17 -dpn68,38.933,61.067,54.933,45.067,12.61,224,0.875,bicubic,-53.307,-43.677,+51 -regnety_080,38.917,61.083,55.213,44.787,39.18,224,0.875,bicubic,-54.973,-43.787,-67 -legacy_seresnext50_32x4d,38.877,61.123,54.593,45.407,27.56,224,0.875,bilinear,-54.553,-44.207,-17 -dla102,38.833,61.167,55.323,44.677,33.27,224,0.875,bilinear,-54.427,-43.457,-14 -regnety_040,38.820,61.180,55.557,44.443,20.65,224,0.875,bicubic,-54.800,-43.143,-38 -densenet121,38.783,61.217,56.273,43.727,7.98,224,0.875,bicubic,-53.157,-42.007,+53 -res2net50_14w_8s,38.710,61.290,54.077,45.923,25.06,224,0.875,bilinear,-54.320,-44.623,-2 -regnetx_040,38.703,61.297,55.340,44.660,22.12,224,0.875,bicubic,-54.977,-43.600,-47 -res2net50_26w_6s,38.687,61.313,53.743,46.257,37.05,224,0.875,bilinear,-54.903,-45.007,-37 -regnetx_032,38.680,61.320,55.157,44.843,15.30,224,0.875,bicubic,-54.570,-43.573,-17 -selecsls60,38.623,61.377,55.630,44.370,30.67,224,0.875,bicubic,-54.387,-43.200,-4 -dla60x,38.617,61.383,55.383,44.617,17.35,224,0.875,bilinear,-54.573,-43.327,-14 -tf_efficientnet_b0,38.600,61.400,55.957,44.043,5.29,224,0.875,bicubic,-53.800,-42.513,+33 -dla60_res2net,38.590,61.410,54.560,45.440,20.85,224,0.875,bilinear,-54.790,-44.300,-26 -selecsls60b,38.573,61.427,55.307,44.693,32.77,224,0.875,bicubic,-54.927,-43.533,-36 -repvgg_a2,38.563,61.437,55.770,44.230,28.21,224,0.875,bilinear,-54.117,-42.750,+12 -hardcorenas_f,38.500,61.500,55.657,44.343,8.20,224,0.875,bilinear,-54.480,-42.963,-7 -dla60_res2next,38.450,61.550,54.950,45.050,17.03,224,0.875,bilinear,-55.120,-43.850,-45 -regnetx_064,38.430,61.570,54.990,45.010,26.21,224,0.875,bicubic,-55.200,-44.060,-54 -tf_efficientnet_cc_b0_4e,38.413,61.587,55.150,44.850,13.31,224,0.875,bicubic,-54.427,-43.290,+3 -gluon_resnet50_v1b,38.407,61.593,54.833,45.167,25.56,224,0.875,bicubic,-54.153,-43.717,+18 -resnetv2_50x1_bitm,38.287,61.713,56.967,43.033,25.55,480,1.000,bilinear,-56.263,-41.963,-137 -hrnet_w18,38.277,61.723,55.643,44.357,21.30,224,0.875,bilinear,-54.483,-43.017,+3 -mixnet_l,38.160,61.840,54.757,45.243,7.33,224,0.875,bicubic,-55.100,-43.943,-32 -hardcorenas_e,38.137,61.863,55.173,44.827,8.07,224,0.875,bilinear,-54.813,-43.667,-13 -efficientnet_b1,38.087,61.913,54.010,45.990,7.79,256,1.000,bicubic,-54.943,-44.700,-23 -coat_lite_tiny,38.070,61.930,53.453,46.547,5.72,224,0.900,bicubic,-54.780,-45.187,-6 -resnetrs50,37.957,62.043,53.310,46.690,35.69,224,0.910,bicubic,-56.063,-45.540,-104 -hardcorenas_c,37.883,62.117,55.717,44.283,5.52,224,0.875,bilinear,-54.447,-42.623,+18 -gluon_resnet50_v1c,37.843,62.157,54.123,45.877,25.58,224,0.875,bicubic,-55.067,-44.587,-16 -res2net50_26w_4s,37.827,62.173,53.073,46.927,25.70,224,0.875,bilinear,-55.353,-45.597,-32 -efficientnet_es,37.770,62.230,54.967,45.033,5.44,224,0.875,bicubic,-55.140,-43.723,-19 -resnest14d,37.767,62.233,56.470,43.530,10.61,224,0.875,bilinear,-53.363,-41.860,+52 -tv_resnext50_32x4d,37.750,62.250,54.113,45.887,25.03,224,0.875,bilinear,-55.150,-44.607,-16 -ecaresnet26t,37.650,62.350,54.350,45.650,16.01,320,0.950,bicubic,-56.290,-44.570,-103 -hardcorenas_d,37.550,62.450,54.723,45.277,7.50,224,0.875,bilinear,-55.050,-43.667,-1 -res2next50,37.477,62.523,52.853,47.147,24.67,224,0.875,bilinear,-55.673,-45.807,-36 -resnet34,37.443,62.557,54.297,45.703,21.80,224,0.875,bilinear,-53.757,-43.753,+42 -pit_ti_distilled_224,37.337,62.663,55.137,44.863,5.10,224,0.900,bicubic,-53.563,-43.083,+51 -hardcorenas_b,37.243,62.757,55.073,44.927,5.18,224,0.875,bilinear,-54.697,-43.327,+20 -mobilenetv3_large_100_miil,37.210,62.790,53.513,46.487,5.48,224,0.875,bilinear,-55.040,-44.737,+10 -res2net50_48w_2s,37.117,62.883,53.333,46.667,25.29,224,0.875,bilinear,-55.673,-45.137,-17 -dla60,37.073,62.927,54.200,45.800,22.04,224,0.875,bilinear,-55.597,-44.430,-14 -rexnet_100,37.063,62.937,54.020,45.980,4.80,224,0.875,bicubic,-55.787,-44.600,-22 -regnety_016,37.017,62.983,54.093,45.907,11.20,224,0.875,bicubic,-55.983,-44.587,-37 -tf_mixnet_l,36.987,63.013,52.583,47.417,7.33,224,0.875,bicubic,-56.053,-45.957,-44 -legacy_seresnet50,36.873,63.127,53.487,46.513,28.09,224,0.875,bilinear,-55.797,-45.163,-17 -tv_densenet121,36.810,63.190,54.033,45.967,7.98,224,0.875,bicubic,-54.590,-44.217,+26 -tf_efficientnet_lite2,36.807,63.193,53.320,46.680,6.09,260,0.890,bicubic,-55.783,-45.230,-13 -mobilenetv2_120d,36.780,63.220,54.047,45.953,5.83,224,0.875,bicubic,-55.830,-44.463,-18 -tf_efficientnet_lite1,36.737,63.263,53.590,46.410,5.42,240,0.882,bicubic,-55.573,-44.900,-3 -regnetx_016,36.683,63.317,53.297,46.703,9.19,224,0.875,bicubic,-55.857,-45.253,-12 -hardcorenas_a,36.640,63.360,54.910,45.090,5.26,224,0.875,bilinear,-54.980,-43.260,+14 +gluon_resnet50_v1s,39.233,60.767,55.010,44.990,25.68,224,0.875,bicubic,-54.357,-43.740,-24 +tf_efficientnetv2_b2,39.180,60.820,54.570,45.430,10.10,260,0.890,bicubic,-54.890,-44.360,-78 +densenet169,39.167,60.833,55.843,44.157,14.15,224,0.875,bicubic,-53.133,-42.747,+54 +legacy_seresnet101,39.037,60.963,55.003,44.997,49.33,224,0.875,bilinear,-54.223,-43.737,-7 +efficientnet_b1_pruned,39.010,60.990,55.647,44.353,6.33,240,0.882,bicubic,-53.970,-42.883,+12 +repvgg_b1g4,38.990,61.010,56.350,43.650,39.97,224,0.875,bilinear,-54.040,-42.360,+5 +inception_v3,38.960,61.040,53.853,46.147,23.83,299,0.875,bicubic,-53.940,-44.477,+18 +dpn68,38.933,61.067,54.933,45.067,12.61,224,0.875,bicubic,-53.307,-43.677,+52 +regnety_080,38.917,61.083,55.213,44.787,39.18,224,0.875,bicubic,-54.973,-43.787,-71 +legacy_seresnext50_32x4d,38.877,61.123,54.593,45.407,27.56,224,0.875,bilinear,-54.553,-44.207,-18 +dla102,38.833,61.167,55.323,44.677,33.27,224,0.875,bilinear,-54.427,-43.457,-15 +regnety_040,38.820,61.180,55.557,44.443,20.65,224,0.875,bicubic,-54.800,-43.393,-38 +densenet121,38.783,61.217,56.273,43.727,7.98,224,0.875,bicubic,-53.157,-42.007,+54 +res2net50_14w_8s,38.710,61.290,54.077,45.923,25.06,224,0.875,bilinear,-54.320,-44.743,-2 +regnetx_040,38.703,61.297,55.340,44.660,22.12,224,0.875,bicubic,-54.977,-43.600,-49 +res2net50_26w_6s,38.687,61.313,53.743,46.257,37.05,224,0.875,bilinear,-54.903,-45.327,-40 +regnetx_032,38.680,61.320,55.157,44.843,15.30,224,0.875,bicubic,-54.570,-43.573,-18 +selecsls60,38.623,61.377,55.630,44.370,30.67,224,0.875,bicubic,-54.387,-43.200,-3 +dla60x,38.617,61.383,55.383,44.617,17.35,224,0.875,bilinear,-54.573,-43.327,-15 +tf_efficientnet_b0,38.600,61.400,55.957,44.043,5.29,224,0.875,bicubic,-53.800,-42.513,+34 +dla60_res2net,38.590,61.410,54.560,45.440,20.85,224,0.875,bilinear,-54.790,-44.300,-27 +selecsls60b,38.573,61.427,55.307,44.693,32.77,224,0.875,bicubic,-54.927,-43.533,-37 +repvgg_a2,38.563,61.437,55.770,44.230,28.21,224,0.875,bilinear,-54.117,-42.750,+13 +hardcorenas_f,38.500,61.500,55.657,44.343,8.20,224,0.875,bilinear,-54.480,-42.963,-6 +dla60_res2next,38.450,61.550,54.950,45.050,17.03,224,0.875,bilinear,-55.120,-43.850,-47 +regnetx_064,38.430,61.570,54.990,45.010,26.21,224,0.875,bicubic,-55.200,-44.060,-56 +tf_efficientnet_cc_b0_4e,38.413,61.587,55.150,44.850,13.31,224,0.875,bicubic,-54.427,-43.290,+4 +gluon_resnet50_v1b,38.407,61.593,54.833,45.167,25.56,224,0.875,bicubic,-54.153,-43.717,+19 +resnetv2_50x1_bitm,38.287,61.713,56.967,43.033,25.55,480,1.000,bilinear,-56.263,-42.283,-141 +hrnet_w18,38.277,61.723,55.643,44.357,21.30,224,0.875,bilinear,-54.483,-43.017,+4 +mixnet_l,38.160,61.840,54.757,45.243,7.33,224,0.875,bicubic,-55.100,-43.943,-33 +hardcorenas_e,38.137,61.863,55.173,44.827,8.07,224,0.875,bilinear,-54.813,-43.397,-13 +efficientnet_b1,38.087,61.913,54.010,45.990,7.79,256,1.000,bicubic,-54.943,-44.690,-20 +coat_lite_tiny,38.070,61.930,53.453,46.547,5.72,224,0.900,bicubic,-54.780,-45.187,-5 +resnetrs50,37.957,62.043,53.310,46.690,35.69,224,0.910,bicubic,-56.063,-45.540,-108 +hardcorenas_c,37.883,62.117,55.717,44.283,5.52,224,0.875,bilinear,-54.447,-42.623,+19 +gluon_resnet50_v1c,37.843,62.157,54.123,45.877,25.58,224,0.875,bicubic,-55.067,-44.587,-15 +res2net50_26w_4s,37.827,62.173,53.073,46.927,25.70,224,0.875,bilinear,-55.353,-45.597,-33 +efficientnet_es,37.770,62.230,54.967,45.033,5.44,224,0.875,bicubic,-55.140,-43.723,-18 +resnest14d,37.767,62.233,56.470,43.530,10.61,224,0.875,bilinear,-53.363,-41.860,+54 +tv_resnext50_32x4d,37.750,62.250,54.113,45.887,25.03,224,0.875,bilinear,-55.150,-44.697,-17 +ecaresnet26t,37.650,62.350,54.350,45.650,16.01,320,0.950,bicubic,-56.290,-44.570,-106 +hardcorenas_d,37.550,62.450,54.723,45.277,7.50,224,0.875,bilinear,-55.050,-43.707,-1 +res2next50,37.477,62.523,52.853,47.147,24.67,224,0.875,bilinear,-55.673,-45.807,-37 +resnet34,37.443,62.557,54.297,45.703,21.80,224,0.875,bilinear,-53.757,-43.943,+45 +pit_ti_distilled_224,37.337,62.663,55.137,44.863,5.10,224,0.900,bicubic,-53.563,-43.083,+53 +hardcorenas_b,37.243,62.757,55.073,44.927,5.18,224,0.875,bilinear,-54.697,-43.327,+21 +mobilenetv3_large_100_miil,37.210,62.790,53.513,46.487,5.48,224,0.875,bilinear,-55.040,-44.737,+11 +res2net50_48w_2s,37.117,62.883,53.333,46.667,25.29,224,0.875,bilinear,-55.673,-45.137,-16 +dla60,37.073,62.927,54.200,45.800,22.04,224,0.875,bilinear,-55.597,-44.450,-12 +rexnet_100,37.063,62.937,54.020,45.980,4.80,224,0.875,bicubic,-55.787,-44.600,-21 +regnety_016,37.017,62.983,54.093,45.907,11.20,224,0.875,bicubic,-55.983,-44.587,-36 +tf_mixnet_l,36.987,63.013,52.583,47.417,7.33,224,0.875,bicubic,-56.053,-45.957,-43 +legacy_seresnet50,36.873,63.127,53.487,46.513,28.09,224,0.875,bilinear,-55.797,-45.143,-17 +tv_densenet121,36.810,63.190,54.033,45.967,7.98,224,0.875,bicubic,-54.590,-44.217,+28 +tf_efficientnet_lite2,36.807,63.193,53.320,46.680,6.09,260,0.890,bicubic,-55.783,-45.230,-12 +mobilenetv2_120d,36.780,63.220,54.047,45.953,5.83,224,0.875,bicubic,-55.830,-44.323,-16 +tf_efficientnet_lite1,36.737,63.263,53.590,46.410,5.42,240,0.882,bicubic,-55.573,-44.900,-2 +regnetx_016,36.683,63.317,53.297,46.703,9.19,224,0.875,bicubic,-55.857,-45.253,-11 +hardcorenas_a,36.640,63.360,54.910,45.090,5.26,224,0.875,bilinear,-54.980,-43.260,+15 +levit_128s,36.620,63.380,53.117,46.883,7.78,224,0.900,bicubic,-54.880,-45.283,+17 efficientnet_b0,36.600,63.400,53.497,46.503,5.29,224,0.875,bicubic,-55.880,-45.183,-13 -tf_efficientnet_em,36.380,63.620,52.840,47.160,6.90,240,0.882,bicubic,-56.790,-45.830,-55 -skresnet18,36.320,63.680,54.197,45.803,11.96,224,0.875,bicubic,-53.840,-43.583,+44 +tf_efficientnet_em,36.380,63.620,52.840,47.160,6.90,240,0.882,bicubic,-56.790,-45.830,-57 +skresnet18,36.320,63.680,54.197,45.803,11.96,224,0.875,bicubic,-53.840,-43.583,+46 repvgg_b0,36.287,63.713,54.057,45.943,15.82,224,0.875,bilinear,-55.393,-44.393,+7 tv_resnet50,36.177,63.823,52.803,47.197,25.56,224,0.875,bilinear,-55.963,-45.617,-3 -legacy_seresnet34,36.143,63.857,52.553,47.447,21.96,224,0.875,bilinear,-55.337,-45.767,+12 -tv_resnet34,36.087,63.913,53.533,46.467,21.80,224,0.875,bilinear,-54.203,-44.447,+39 +legacy_seresnet34,36.143,63.857,52.553,47.447,21.96,224,0.875,bilinear,-55.337,-45.647,+12 +coat_tiny,36.123,63.877,51.063,48.937,5.50,224,0.900,bicubic,-57.387,-47.627,-84 +tv_resnet34,36.087,63.913,53.533,46.467,21.80,224,0.875,bilinear,-54.203,-44.447,+40 vit_deit_tiny_distilled_patch16_224,36.023,63.977,54.240,45.760,5.91,224,0.900,bicubic,-55.077,-44.030,+26 -mobilenetv2_140,36.000,64.000,53.943,46.057,6.11,224,0.875,bicubic,-56.030,-44.307,-5 +mobilenetv2_140,36.000,64.000,53.943,46.057,6.11,224,0.875,bicubic,-56.030,-44.307,-6 tf_efficientnet_lite0,35.930,64.070,53.480,46.520,4.65,224,0.875,bicubic,-55.370,-44.610,+13 -selecsls42b,35.813,64.187,52.487,47.513,32.46,224,0.875,bicubic,-56.667,-45.953,-22 -gluon_resnet34_v1b,35.760,64.240,52.187,47.813,21.80,224,0.875,bicubic,-55.340,-45.993,+21 +selecsls42b,35.813,64.187,52.487,47.513,32.46,224,0.875,bicubic,-56.667,-45.953,-23 +gluon_resnet34_v1b,35.763,64.237,52.187,47.813,21.80,224,0.875,bicubic,-55.337,-45.993,+21 dla34,35.643,64.357,52.783,47.217,15.74,224,0.875,bilinear,-55.597,-45.397,+13 -mixnet_m,35.640,64.360,52.430,47.570,5.01,224,0.875,bicubic,-56.630,-45.920,-17 +mixnet_m,35.640,64.360,52.430,47.570,5.01,224,0.875,bicubic,-56.630,-45.920,-18 efficientnet_lite0,35.620,64.380,53.657,46.343,4.65,224,0.875,bicubic,-55.640,-44.593,+10 ssl_resnet18,35.597,64.403,53.740,46.260,11.69,224,0.875,bilinear,-55.103,-44.280,+24 -mobilenetv3_rw,35.547,64.453,53.713,46.287,5.48,224,0.875,bicubic,-56.003,-44.557,-1 -efficientnet_es_pruned,35.390,64.610,52.850,47.150,5.44,224,0.875,bicubic,-56.310,-45.570,-8 +mobilenetv3_rw,35.547,64.453,53.713,46.287,5.48,224,0.875,bicubic,-56.003,-44.557,-2 +efficientnet_es_pruned,35.390,64.610,52.850,47.150,5.44,224,0.875,bicubic,-56.310,-45.570,-9 mobilenetv2_110d,35.293,64.707,52.830,47.170,4.52,224,0.875,bicubic,-56.057,-45.360,+3 -tf_mixnet_m,35.180,64.820,50.987,49.013,5.01,224,0.875,bicubic,-57.020,-47.433,-19 +tf_mixnet_m,35.180,64.820,50.987,49.013,5.01,224,0.875,bicubic,-57.020,-47.433,-20 hrnet_w18_small_v2,35.173,64.827,52.440,47.560,15.60,224,0.875,bilinear,-55.997,-45.900,+9 -resnet18d,35.127,64.873,52.890,47.110,11.71,224,0.875,bicubic,-54.863,-44.940,+26 -ese_vovnet19b_dw,34.840,65.160,52.030,47.970,6.54,224,0.875,bicubic,-57.170,-46.480,-18 -regnety_008,34.807,65.193,51.743,48.257,6.26,224,0.875,bicubic,-57.093,-46.677,-16 +resnet18d,35.127,64.873,52.890,47.110,11.71,224,0.875,bicubic,-54.863,-44.940,+27 +convit_tiny,35.047,64.953,51.787,48.213,5.71,224,0.875,bicubic,-55.483,-46.423,+20 +ese_vovnet19b_dw,34.840,65.160,52.030,47.970,6.54,224,0.875,bicubic,-57.170,-46.480,-20 +regnety_008,34.807,65.193,51.743,48.257,6.26,224,0.875,bicubic,-57.093,-46.677,-18 pit_ti_224,34.670,65.330,52.170,47.830,4.85,224,0.900,bicubic,-55.750,-45.840,+19 -mobilenetv3_large_100,34.603,65.397,52.860,47.140,5.48,224,0.875,bicubic,-56.877,-45.340,-9 -seresnext26d_32x4d,34.543,65.457,51.543,48.457,16.81,224,0.875,bicubic,-57.897,-46.997,-36 -seresnext26t_32x4d,34.540,65.460,51.377,48.623,16.81,224,0.875,bicubic,-58.280,-47.183,-57 -mixer_b16_224,34.423,65.577,48.093,51.907,59.88,224,0.875,bicubic,-56.717,-49.307,+2 -resnet26d,34.273,65.727,51.687,48.313,16.01,224,0.875,bicubic,-57.957,-46.763,-30 -tf_efficientnet_es,34.263,65.737,51.350,48.650,5.44,224,0.875,bicubic,-57.837,-47.090,-28 -fbnetc_100,34.253,65.747,51.180,48.820,5.57,224,0.875,bilinear,-57.017,-46.650,-8 -regnety_006,34.150,65.850,51.277,48.723,6.06,224,0.875,bicubic,-57.420,-47.153,-18 -tf_mobilenetv3_large_100,33.950,66.050,51.490,48.510,5.48,224,0.875,bilinear,-57.470,-46.770,-14 -regnetx_008,33.770,66.230,50.547,49.453,7.26,224,0.875,bicubic,-57.410,-47.833,-6 -mnasnet_100,33.763,66.237,51.170,48.830,4.38,224,0.875,bicubic,-57.437,-47.070,-8 -semnasnet_100,33.520,66.480,50.787,49.213,3.89,224,0.875,bicubic,-58.140,-47.483,-24 -resnet26,33.500,66.500,50.927,49.073,16.00,224,0.875,bicubic,-57.940,-47.353,-19 -mixnet_s,33.480,66.520,50.997,49.003,4.13,224,0.875,bicubic,-58.300,-47.303,-30 -spnasnet_100,33.477,66.523,51.267,48.733,4.42,224,0.875,bilinear,-57.133,-46.683,+1 -vgg19_bn,33.230,66.770,50.803,49.197,143.68,224,0.875,bilinear,-57.760,-47.307,-5 +mobilenetv3_large_100,34.603,65.397,52.860,47.140,5.48,224,0.875,bicubic,-56.877,-45.460,-9 +seresnext26d_32x4d,34.543,65.457,51.543,48.457,16.81,224,0.875,bicubic,-57.897,-46.957,-39 +seresnext26t_32x4d,34.540,65.460,51.377,48.623,16.81,224,0.875,bicubic,-58.280,-47.183,-59 +mixer_b16_224,34.423,65.577,48.093,51.907,59.88,224,0.875,bicubic,-56.717,-49.307,+1 +resnet26d,34.273,65.727,51.687,48.313,16.01,224,0.875,bicubic,-57.957,-46.763,-32 +tf_efficientnet_es,34.263,65.737,51.350,48.650,5.44,224,0.875,bicubic,-57.837,-47.090,-30 +fbnetc_100,34.253,65.747,51.180,48.820,5.57,224,0.875,bilinear,-57.017,-46.650,-9 +regnety_006,34.150,65.850,51.277,48.723,6.06,224,0.875,bicubic,-57.420,-47.153,-20 +tf_mobilenetv3_large_100,33.950,66.050,51.490,48.510,5.48,224,0.875,bilinear,-57.470,-46.770,-15 +regnetx_008,33.770,66.230,50.547,49.453,7.26,224,0.875,bicubic,-57.410,-47.833,-7 +mnasnet_100,33.763,66.237,51.170,48.830,4.38,224,0.875,bicubic,-57.437,-46.880,-10 +semnasnet_100,33.520,66.480,50.787,49.213,3.89,224,0.875,bicubic,-58.140,-47.483,-26 +resnet26,33.500,66.500,50.927,49.073,16.00,224,0.875,bicubic,-57.940,-47.353,-20 +mixnet_s,33.480,66.520,50.997,49.003,4.13,224,0.875,bicubic,-58.300,-47.303,-32 +spnasnet_100,33.477,66.523,51.267,48.733,4.42,224,0.875,bilinear,-57.133,-46.683,0 +vgg19_bn,33.230,66.770,50.803,49.197,143.68,224,0.875,bilinear,-57.760,-47.307,-6 ghostnet_100,33.207,66.793,51.163,48.837,5.18,224,0.875,bilinear,-57.233,-46.667,+1 -regnetx_006,33.157,66.843,50.250,49.750,6.20,224,0.875,bicubic,-57.603,-47.850,-4 +regnetx_006,33.157,66.843,50.250,49.750,6.20,224,0.875,bicubic,-57.603,-47.850,-5 resnet18,33.067,66.933,51.170,48.830,11.69,224,0.875,bilinear,-55.083,-45.950,+17 -legacy_seresnext26_32x4d,32.757,67.243,49.237,50.763,16.79,224,0.875,bicubic,-59.813,-49.183,-61 +legacy_seresnext26_32x4d,32.757,67.243,49.237,50.763,16.79,224,0.875,bicubic,-59.813,-49.183,-63 hrnet_w18_small,32.667,67.333,50.587,49.413,13.19,224,0.875,bilinear,-57.213,-47.313,+3 vit_deit_tiny_patch16_224,32.667,67.333,50.273,49.727,5.72,224,0.900,bicubic,-56.953,-47.687,+5 legacy_seresnet18,32.600,67.400,50.340,49.660,11.78,224,0.875,bicubic,-56.670,-47.340,+7 mobilenetv2_100,32.523,67.477,50.800,49.200,3.50,224,0.875,bicubic,-57.307,-47.030,+1 regnetx_004,32.517,67.483,49.343,50.657,5.16,224,0.875,bicubic,-56.943,-48.427,+3 gluon_resnet18_v1b,32.407,67.593,49.727,50.273,11.69,224,0.875,bicubic,-56.253,-47.373,+7 -regnety_004,32.333,67.667,49.453,50.547,4.34,224,0.875,bicubic,-58.447,-48.627,-14 -tf_mixnet_s,32.183,67.817,48.493,51.507,4.13,224,0.875,bicubic,-59.497,-49.747,-41 +regnety_004,32.333,67.667,49.453,50.547,4.34,224,0.875,bicubic,-58.447,-48.627,-15 +tf_mixnet_s,32.183,67.817,48.493,51.507,4.13,224,0.875,bicubic,-59.497,-49.747,-43 tf_mobilenetv3_large_075,31.867,68.133,49.110,50.890,3.99,224,0.875,bilinear,-58.453,-48.760,-9 tf_mobilenetv3_large_minimal_100,31.597,68.403,49.337,50.663,3.92,224,0.875,bilinear,-57.583,-47.983,+2 -vgg16_bn,30.357,69.643,47.260,52.740,138.37,224,0.875,bilinear,-60.183,-50.730,-14 +vgg16_bn,30.357,69.643,47.260,52.740,138.37,224,0.875,bilinear,-60.183,-50.730,-15 regnety_002,29.687,70.313,46.787,53.213,3.16,224,0.875,bicubic,-58.513,-50.643,+3 vgg13_bn,28.883,71.117,46.737,53.263,133.05,224,0.875,bilinear,-60.317,-50.793,-2 regnetx_002,28.860,71.140,45.420,54.580,2.68,224,0.875,bicubic,-58.520,-51.570,+4 diff --git a/results/results-imagenet-real.csv b/results/results-imagenet-real.csv index fa0fda4a..f1165a01 100644 --- a/results/results-imagenet-real.csv +++ b/results/results-imagenet-real.csv @@ -3,135 +3,158 @@ tf_efficientnet_l2_ns,90.563,9.437,98.779,1.221,480.31,800,0.960,bicubic,+2.211, tf_efficientnet_l2_ns_475,90.537,9.463,98.710,1.290,480.31,475,0.936,bicubic,+2.303,+0.164,0 cait_m48_448,90.196,9.804,98.484,1.516,356.46,448,1.000,bicubic,+3.712,+0.730,+2 tf_efficientnet_b7_ns,90.100,9.900,98.614,1.386,66.35,600,0.949,bicubic,+3.260,+0.520,0 -cait_m36_384,90.046,9.954,98.493,1.507,271.22,384,1.000,bicubic,+3.992,+0.763,+6 +cait_m36_384,90.046,9.954,98.493,1.507,271.22,384,1.000,bicubic,+3.992,+0.763,+7 swin_large_patch4_window12_384,90.027,9.973,98.657,1.343,196.74,384,1.000,bicubic,+2.879,+0.423,-3 -swin_base_patch4_window12_384,89.995,10.005,98.695,1.304,87.90,384,1.000,bicubic,+3.563,+0.637,0 +tf_efficientnetv2_l_in21ft1k,90.008,9.992,98.619,1.381,118.52,480,1.000,bicubic,+3.704,+0.641,+2 +swin_base_patch4_window12_384,89.995,10.005,98.695,1.304,87.90,384,1.000,bicubic,+3.563,+0.637,-1 dm_nfnet_f6,89.901,10.099,98.529,1.471,438.36,576,0.956,bicubic,+3.605,+0.785,+1 -cait_s36_384,89.844,10.156,98.427,1.573,68.37,384,1.000,bicubic,+4.384,+0.947,+6 -swin_large_patch4_window7_224,89.796,10.204,98.640,1.360,196.53,224,0.900,bicubic,+3.477,+0.744,-2 -tf_efficientnet_b6_ns,89.782,10.218,98.510,1.490,43.04,528,0.942,bicubic,+3.330,+0.628,-5 -tf_efficientnet_b5_ns,89.651,10.349,98.482,1.518,30.39,456,0.934,bicubic,+3.563,+0.730,-2 -tf_efficientnet_b8_ap,89.581,10.419,98.305,1.695,87.41,672,0.954,bicubic,+4.211,+1.011,+6 -cait_s24_384,89.502,10.498,98.362,1.638,47.06,384,1.000,bicubic,+4.456,+1.016,+11 -tf_efficientnet_b7_ap,89.429,10.571,98.347,1.653,66.35,600,0.949,bicubic,+4.309,+1.096,+8 -vit_deit_base_distilled_patch16_384,89.429,10.571,98.441,1.559,87.63,384,1.000,bicubic,+4.007,+1.109,+1 +cait_s36_384,89.844,10.156,98.427,1.573,68.37,384,1.000,bicubic,+4.384,+0.947,+8 +swin_large_patch4_window7_224,89.796,10.204,98.640,1.360,196.53,224,0.900,bicubic,+3.477,+0.744,-3 +tf_efficientnet_b6_ns,89.782,10.218,98.510,1.490,43.04,528,0.942,bicubic,+3.330,+0.628,-6 +tf_efficientnetv2_m_in21ft1k,89.775,10.225,98.503,1.497,54.14,480,1.000,bicubic,+4.187,+0.751,+2 +tf_efficientnet_b5_ns,89.651,10.349,98.482,1.518,30.39,456,0.934,bicubic,+3.563,+0.730,-3 +tf_efficientnet_b8_ap,89.581,10.419,98.305,1.695,87.41,672,0.954,bicubic,+4.211,+1.011,+7 +cait_s24_384,89.502,10.498,98.362,1.638,47.06,384,1.000,bicubic,+4.456,+1.016,+12 +tf_efficientnet_b7_ap,89.429,10.571,98.347,1.653,66.35,600,0.949,bicubic,+4.309,+1.096,+9 +vit_deit_base_distilled_patch16_384,89.429,10.571,98.441,1.559,87.63,384,1.000,bicubic,+4.007,+1.109,+2 dm_nfnet_f3,89.393,10.607,98.315,1.685,254.92,416,0.940,bicubic,+3.833,+0.909,-3 +tf_efficientnetv2_l,89.367,10.633,98.275,1.725,118.52,480,1.000,bicubic,+3.877,+0.903,-3 tf_efficientnet_b8,89.355,10.645,98.303,1.697,87.41,672,0.954,bicubic,+3.985,+0.913,0 -tf_efficientnet_b6_ap,89.342,10.658,98.281,1.719,43.04,528,0.942,bicubic,+4.554,+1.143,+13 +tf_efficientnet_b6_ap,89.342,10.658,98.281,1.719,43.04,528,0.942,bicubic,+4.554,+1.143,+15 tf_efficientnet_b4_ns,89.305,10.694,98.347,1.653,19.34,380,0.922,bicubic,+4.143,+0.877,+1 -dm_nfnet_f4,89.299,10.701,98.224,1.776,316.07,512,0.951,bicubic,+3.641,+0.714,-8 -dm_nfnet_f5,89.184,10.816,98.232,1.768,377.21,544,0.954,bicubic,+3.470,+0.790,-10 -swin_base_patch4_window7_224,89.145,10.855,98.429,1.571,87.77,224,0.900,bicubic,+3.893,+0.867,-3 -cait_xs24_384,89.139,10.861,98.290,1.710,26.67,384,1.000,bicubic,+5.077,+1.402,+24 -ig_resnext101_32x48d,89.120,10.880,98.130,1.870,828.41,224,0.875,bilinear,+3.692,+0.558,-9 -ig_resnext101_32x32d,89.111,10.889,98.181,1.819,468.53,224,0.875,bilinear,+4.017,+0.743,-2 +dm_nfnet_f4,89.299,10.701,98.224,1.776,316.07,512,0.951,bicubic,+3.641,+0.714,-10 +tf_efficientnetv2_m,89.284,10.716,98.236,1.764,54.14,480,1.000,bicubic,+4.240,+0.958,+4 +dm_nfnet_f5,89.184,10.816,98.232,1.768,377.21,544,0.954,bicubic,+3.470,+0.790,-13 +swin_base_patch4_window7_224,89.145,10.855,98.429,1.571,87.77,224,0.900,bicubic,+3.893,+0.867,-4 +cait_xs24_384,89.139,10.861,98.290,1.710,26.67,384,1.000,bicubic,+5.077,+1.402,+26 +ig_resnext101_32x48d,89.120,10.880,98.130,1.870,828.41,224,0.875,bilinear,+3.692,+0.558,-10 +ig_resnext101_32x32d,89.111,10.889,98.181,1.819,468.53,224,0.875,bilinear,+4.017,+0.743,-3 tf_efficientnet_b7,89.086,10.914,98.183,1.817,66.35,600,0.949,bicubic,+4.150,+0.979,+3 ecaresnet269d,89.069,10.931,98.234,1.766,102.09,352,1.000,bicubic,+4.093,+1.008,0 -tf_efficientnet_b5_ap,88.938,11.062,98.164,1.836,30.39,456,0.934,bicubic,+4.686,+1.190,+13 -dm_nfnet_f2,88.889,11.111,98.117,1.883,193.78,352,0.920,bicubic,+3.899,+0.973,-3 -dm_nfnet_f1,88.853,11.147,98.093,1.907,132.63,320,0.910,bicubic,+4.249,+1.025,+3 -resnetrs420,88.840,11.160,98.034,1.966,191.89,416,1.000,bicubic,+3.832,+0.910,-6 +efficientnetv2_rw_m,88.987,11.013,98.213,1.787,53.24,416,1.000,bicubic,+4.179,+1.065,+3 +tf_efficientnet_b5_ap,88.938,11.062,98.164,1.836,30.39,456,0.934,bicubic,+4.686,+1.190,+14 +tf_efficientnetv2_s_in21ft1k,88.904,11.096,98.277,1.723,21.46,384,1.000,bicubic,+4.602,+1.025,+10 +dm_nfnet_f2,88.889,11.111,98.117,1.883,193.78,352,0.920,bicubic,+3.899,+0.973,-5 +dm_nfnet_f1,88.853,11.147,98.093,1.907,132.63,320,0.910,bicubic,+4.249,+1.025,+2 +resnetrs420,88.840,11.160,98.034,1.966,191.89,416,1.000,bicubic,+3.832,+0.910,-8 ig_resnext101_32x16d,88.834,11.166,98.049,1.951,194.03,224,0.875,bilinear,+4.664,+0.853,+11 -resnetrs270,88.834,11.166,98.136,1.864,129.86,352,1.000,bicubic,+4.400,+1.166,+3 -vit_base_r50_s16_384,88.808,11.192,98.232,1.768,98.95,384,1.000,bicubic,+3.836,+0.944,-6 -seresnet152d,88.795,11.205,98.172,1.828,66.84,320,1.000,bicubic,+4.433,+1.132,+3 +resnetrs270,88.834,11.166,98.136,1.864,129.86,352,1.000,bicubic,+4.400,+1.166,+2 +vit_base_r50_s16_384,88.808,11.192,98.232,1.768,98.95,384,1.000,bicubic,+3.836,+0.944,-8 +seresnet152d,88.795,11.205,98.172,1.828,66.84,320,1.000,bicubic,+4.433,+1.132,+2 swsl_resnext101_32x8d,88.770,11.230,98.147,1.853,88.79,224,0.875,bilinear,+4.486,+0.971,+3 tf_efficientnet_b6,88.761,11.239,98.064,1.937,43.04,528,0.942,bicubic,+4.651,+1.178,+8 -resnetrs350,88.759,11.241,98.029,1.971,163.96,384,1.000,bicubic,+4.039,+1.041,-6 +resnetrs350,88.759,11.241,98.029,1.971,163.96,384,1.000,bicubic,+4.039,+1.041,-7 vit_base_patch16_224_miil,88.737,11.262,98.027,1.973,86.54,224,0.875,bilinear,+4.469,+1.225,+1 -resnetv2_152x2_bitm,88.699,11.301,98.337,1.663,236.34,480,1.000,bilinear,+4.259,+0.891,-5 -regnety_160,88.697,11.303,98.068,1.932,83.59,288,1.000,bicubic,+5.011,+1.292,+15 +resnetv2_152x2_bitm,88.699,11.301,98.337,1.663,236.34,480,1.000,bilinear,+4.259,+0.891,-6 +regnety_160,88.697,11.303,98.068,1.932,83.59,288,1.000,bicubic,+5.011,+1.292,+16 pit_b_distilled_224,88.676,11.324,98.093,1.907,74.79,224,0.900,bicubic,+4.532,+1.237,+2 resnetrs200,88.605,11.395,98.034,1.966,93.21,320,1.000,bicubic,+4.539,+1.160,+3 eca_nfnet_l1,88.575,11.425,98.130,1.870,41.41,320,1.000,bicubic,+4.567,+1.102,+5 -resnetv2_152x4_bitm,88.565,11.435,98.185,1.815,936.53,480,1.000,bilinear,+3.633,+0.749,-15 +resnetv2_152x4_bitm,88.565,11.435,98.185,1.815,936.53,480,1.000,bilinear,+3.633,+0.749,-17 resnet200d,88.543,11.457,97.959,2.041,64.69,320,1.000,bicubic,+4.581,+1.135,+4 -resnest269e,88.522,11.478,98.027,1.973,110.93,416,0.928,bicubic,+4.004,+1.041,-13 -resnetv2_101x3_bitm,88.492,11.508,98.162,1.838,387.93,480,1.000,bilinear,+4.098,+0.800,-11 -efficientnet_v2s,88.473,11.527,97.974,2.026,23.94,384,1.000,bicubic,+4.665,+1.250,+4 -cait_s24_224,88.447,11.553,97.957,2.043,46.92,224,1.000,bicubic,+4.995,+1.393,+8 -resnest200e,88.432,11.568,98.042,1.958,70.20,320,0.909,bicubic,+4.600,+1.148,0 +resnest269e,88.522,11.478,98.027,1.973,110.93,416,0.928,bicubic,+4.004,+1.041,-14 +resnetv2_101x3_bitm,88.492,11.508,98.162,1.838,387.93,480,1.000,bilinear,+4.098,+0.800,-12 +efficientnetv2_rw_s,88.473,11.527,97.974,2.026,23.94,384,1.000,bicubic,+4.665,+1.250,+5 +cait_s24_224,88.447,11.553,97.957,2.043,46.92,224,1.000,bicubic,+4.995,+1.393,+10 +resnest200e,88.432,11.568,98.042,1.958,70.20,320,0.909,bicubic,+4.600,+1.148,+1 tf_efficientnet_b3_ns,88.426,11.574,98.029,1.971,12.23,300,0.904,bicubic,+4.378,+1.119,-4 -vit_large_patch16_384,88.407,11.593,98.187,1.813,304.72,384,1.000,bicubic,+3.249,+0.831,-32 -vit_base_patch16_384,88.389,11.611,98.155,1.845,86.86,384,1.000,bicubic,+4.180,+0.937,-12 -efficientnet_b4,88.372,11.628,97.961,2.039,19.34,384,1.000,bicubic,+4.944,+1.365,+4 +vit_large_patch16_384,88.407,11.593,98.187,1.813,304.72,384,1.000,bicubic,+3.249,+0.831,-35 +tf_efficientnetv2_s,88.402,11.598,97.927,2.073,21.46,384,1.000,bicubic,+4.508,+1.229,-3 +vit_base_patch16_384,88.389,11.611,98.155,1.845,86.86,384,1.000,bicubic,+4.180,+0.937,-13 +efficientnet_b4,88.372,11.628,97.961,2.039,19.34,384,1.000,bicubic,+4.944,+1.365,+5 resnet152d,88.355,11.645,97.935,2.065,60.21,320,1.000,bicubic,+4.675,+1.197,+1 -tf_efficientnet_b4_ap,88.349,11.651,97.893,2.107,19.34,380,0.922,bicubic,+5.101,+1.501,+5 resnetv2_50x3_bitm,88.349,11.651,98.108,1.892,217.32,480,1.000,bilinear,+4.565,+1.002,-3 +tf_efficientnet_b4_ap,88.349,11.651,97.893,2.107,19.34,380,0.922,bicubic,+5.101,+1.501,+6 tf_efficientnet_b5,88.321,11.679,97.912,2.088,30.39,456,0.934,bicubic,+4.509,+1.164,-7 resnetrs152,88.251,11.749,97.737,2.263,86.62,320,1.000,bicubic,+4.539,+1.123,-5 -vit_deit_base_distilled_patch16_224,88.214,11.786,97.914,2.086,87.34,224,0.900,bicubic,+4.826,+1.426,-1 -ig_resnext101_32x8d,88.146,11.854,97.856,2.144,88.79,224,0.875,bilinear,+5.458,+1.220,+14 -cait_xxs36_384,88.140,11.860,97.908,2.092,17.37,384,1.000,bicubic,+5.946,+1.760,+23 -dm_nfnet_f0,88.112,11.888,97.837,2.163,71.49,256,0.900,bicubic,+4.770,+1.277,-2 -swsl_resnext101_32x4d,88.099,11.901,97.967,2.033,44.18,224,0.875,bilinear,+4.869,+1.207,-1 -tf_efficientnet_b4,87.963,12.037,97.739,2.261,19.34,380,0.922,bicubic,+4.941,+1.439,+5 -nfnet_l0,87.948,12.052,97.850,2.150,35.07,288,1.000,bicubic,+5.188,+1.352,+7 -eca_nfnet_l0,87.943,12.057,97.861,2.139,24.14,288,1.000,bicubic,+5.355,+1.387,+10 -resnet101d,87.941,12.059,97.908,2.092,44.57,320,1.000,bicubic,+4.919,+1.462,+1 -regnety_032,87.937,12.063,97.891,2.109,19.44,288,1.000,bicubic,+5.213,+1.467,+5 -vit_deit_base_patch16_384,87.845,12.155,97.510,2.490,86.86,384,1.000,bicubic,+4.739,+1.138,-5 -tresnet_xl_448,87.796,12.204,97.459,2.541,78.44,448,0.875,bilinear,+4.746,+1.285,-3 -tresnet_m,87.736,12.264,97.523,2.477,31.39,224,0.875,bilinear,+4.656,+1.405,-6 -swin_small_patch4_window7_224,87.664,12.336,97.566,2.434,49.61,224,0.900,bicubic,+4.452,+1.244,-9 -resnetv2_101x1_bitm,87.638,12.362,97.955,2.045,44.54,480,1.000,bilinear,+5.426,+1.483,+10 -pnasnet5large,87.636,12.364,97.485,2.515,86.06,331,0.911,bicubic,+4.854,+1.445,-3 -swsl_resnext101_32x16d,87.615,12.386,97.820,2.180,194.03,224,0.875,bilinear,+4.269,+0.974,-16 -swsl_resnext50_32x4d,87.600,12.400,97.651,2.349,25.03,224,0.875,bilinear,+5.418,+1.421,+9 -tf_efficientnet_b2_ns,87.557,12.443,97.628,2.372,9.11,260,0.890,bicubic,+5.177,+1.380,+1 -ecaresnet50t,87.538,12.462,97.643,2.357,25.57,320,0.950,bicubic,+5.192,+1.505,+1 -efficientnet_b3,87.435,12.565,97.681,2.319,12.23,320,1.000,bicubic,+5.193,+1.567,+3 -cait_xxs24_384,87.416,12.584,97.619,2.381,12.03,384,1.000,bicubic,+6.450,+1.973,+37 -tresnet_l_448,87.377,12.623,97.485,2.515,55.99,448,0.875,bilinear,+5.109,+1.509,0 -nasnetalarge,87.350,12.650,97.417,2.583,88.75,331,0.911,bicubic,+4.730,+1.371,-7 -ecaresnet101d,87.288,12.712,97.562,2.438,44.57,224,0.875,bicubic,+5.116,+1.516,+3 -resnest101e,87.284,12.716,97.560,2.440,48.28,256,0.875,bilinear,+4.394,+1.240,-14 -pit_s_distilled_224,87.277,12.723,97.500,2.500,24.04,224,0.900,bicubic,+5.281,+1.702,+4 -resnetrs101,87.247,12.753,97.457,2.543,63.62,288,0.940,bicubic,+4.959,+1.449,-6 -tresnet_xl,87.224,12.776,97.400,2.600,78.44,224,0.875,bilinear,+5.170,+1.463,0 -tf_efficientnet_b3_ap,87.192,12.808,97.380,2.620,12.23,300,0.904,bicubic,+5.370,+1.756,+3 +vit_deit_base_distilled_patch16_224,88.214,11.786,97.914,2.086,87.34,224,0.900,bicubic,+4.826,+1.426,0 +ig_resnext101_32x8d,88.146,11.854,97.856,2.144,88.79,224,0.875,bilinear,+5.458,+1.220,+18 +cait_xxs36_384,88.140,11.860,97.908,2.092,17.37,384,1.000,bicubic,+5.946,+1.760,+32 +dm_nfnet_f0,88.112,11.888,97.837,2.163,71.49,256,0.900,bicubic,+4.770,+1.277,-1 +swsl_resnext101_32x4d,88.099,11.901,97.967,2.033,44.18,224,0.875,bilinear,+4.869,+1.207,0 +tf_efficientnet_b4,87.963,12.037,97.739,2.261,19.34,380,0.922,bicubic,+4.941,+1.439,+8 +nfnet_l0,87.948,12.052,97.850,2.150,35.07,288,1.000,bicubic,+5.188,+1.352,+10 +eca_nfnet_l0,87.943,12.057,97.861,2.139,24.14,288,1.000,bicubic,+5.355,+1.387,+14 +resnet101d,87.941,12.059,97.908,2.092,44.57,320,1.000,bicubic,+4.919,+1.462,+4 +regnety_032,87.937,12.063,97.891,2.109,19.44,288,1.000,bicubic,+5.213,+1.467,+8 +twins_svt_large,87.901,12.099,97.581,2.419,99.27,224,0.900,bicubic,+4.223,+0.987,-13 +twins_pcpvt_large,87.877,12.123,97.856,2.144,60.99,224,0.900,bicubic,+4.737,+1.258,-5 +vit_deit_base_patch16_384,87.845,12.155,97.510,2.490,86.86,384,1.000,bicubic,+4.739,+1.138,-4 +tresnet_xl_448,87.796,12.204,97.459,2.541,78.44,448,0.875,bilinear,+4.746,+1.285,-2 +tresnet_m,87.736,12.264,97.523,2.477,31.39,224,0.875,bilinear,+4.656,+1.405,-5 +twins_pcpvt_base,87.736,12.264,97.726,2.274,43.83,224,0.900,bicubic,+5.028,+1.380,+3 +swin_small_patch4_window7_224,87.664,12.336,97.566,2.434,49.61,224,0.900,bicubic,+4.452,+1.244,-11 +twins_svt_base,87.638,12.362,97.523,2.477,56.07,224,0.900,bicubic,+4.502,+1.105,-11 +resnetv2_101x1_bitm,87.638,12.362,97.955,2.045,44.54,480,1.000,bilinear,+5.426,+1.483,+16 +pnasnet5large,87.636,12.364,97.485,2.515,86.06,331,0.911,bicubic,+4.854,+1.445,-4 +swsl_resnext101_32x16d,87.615,12.386,97.820,2.180,194.03,224,0.875,bilinear,+4.269,+0.974,-19 +swsl_resnext50_32x4d,87.600,12.400,97.651,2.349,25.03,224,0.875,bilinear,+5.418,+1.421,+14 +tf_efficientnet_b2_ns,87.557,12.443,97.628,2.372,9.11,260,0.890,bicubic,+5.177,+1.380,+2 +levit_384,87.553,12.447,97.545,2.455,39.13,224,0.900,bicubic,+4.967,+1.529,-1 +ecaresnet50t,87.538,12.462,97.643,2.357,25.57,320,0.950,bicubic,+5.192,+1.505,+2 +efficientnet_b3,87.435,12.565,97.681,2.319,12.23,320,1.000,bicubic,+5.193,+1.567,+7 +cait_xxs24_384,87.416,12.584,97.619,2.381,12.03,384,1.000,bicubic,+6.450,+1.973,+48 +resnet51q,87.395,12.605,97.587,2.413,35.70,288,1.000,bilinear,+5.035,+1.407,-2 +coat_lite_small,87.380,12.620,97.365,2.635,19.84,224,0.900,bicubic,+5.072,+1.515,-1 +tresnet_l_448,87.377,12.623,97.485,2.515,55.99,448,0.875,bilinear,+5.109,+1.509,+2 +nasnetalarge,87.350,12.650,97.417,2.583,88.75,331,0.911,bicubic,+4.730,+1.371,-10 +ecaresnet101d,87.288,12.712,97.562,2.438,44.57,224,0.875,bicubic,+5.116,+1.516,+5 +resnest101e,87.284,12.716,97.560,2.440,48.28,256,0.875,bilinear,+4.394,+1.240,-18 +pit_s_distilled_224,87.277,12.723,97.500,2.500,24.04,224,0.900,bicubic,+5.281,+1.702,+7 +resnetrs101,87.247,12.753,97.457,2.543,63.62,288,0.940,bicubic,+4.959,+1.449,-4 +mixer_b16_224_miil,87.226,12.774,97.410,2.590,59.88,224,0.875,bilinear,+4.918,+1.694,-7 +tresnet_xl,87.224,12.776,97.400,2.600,78.44,224,0.875,bilinear,+5.170,+1.463,+2 +convit_base,87.200,12.800,97.286,2.714,86.54,224,0.875,bicubic,+4.910,+1.348,-8 +tf_efficientnet_b3_ap,87.192,12.808,97.380,2.620,12.23,300,0.904,bicubic,+5.370,+1.756,+5 +visformer_small,87.181,12.819,97.323,2.677,40.22,224,0.900,bicubic,+5.075,+1.451,-2 +convit_small,87.053,12.947,97.350,2.650,27.78,224,0.875,bicubic,+5.627,+1.606,+17 +tf_efficientnetv2_b3,87.032,12.968,97.303,2.697,14.36,300,0.904,bicubic,+5.062,+1.521,0 vit_base_patch32_384,87.019,12.981,97.654,2.346,88.30,384,1.000,bicubic,+5.367,+1.526,+5 -vit_large_patch16_224,87.006,12.994,97.690,2.310,304.33,224,0.900,bicubic,+3.944,+1.252,-24 -vit_deit_small_distilled_patch16_224,86.993,13.007,97.316,2.684,22.44,224,0.900,bicubic,+5.793,+1.938,+19 +vit_large_patch16_224,87.006,12.994,97.690,2.310,304.33,224,0.900,bicubic,+3.944,+1.252,-33 +vit_deit_small_distilled_patch16_224,86.993,13.007,97.316,2.684,22.44,224,0.900,bicubic,+5.793,+1.938,+22 tnt_s_patch16_224,86.903,13.097,97.368,2.632,23.76,224,0.900,bicubic,+5.385,+1.620,+7 -ssl_resnext101_32x16d,86.856,13.143,97.517,2.483,194.03,224,0.875,bilinear,+5.013,+1.421,-3 +ssl_resnext101_32x16d,86.856,13.143,97.517,2.483,194.03,224,0.875,bilinear,+5.013,+1.421,-4 rexnet_200,86.846,13.154,97.276,2.724,16.37,224,0.875,bicubic,+5.214,+1.608,+2 tf_efficientnet_b3,86.835,13.165,97.297,2.703,12.23,300,0.904,bicubic,+5.199,+1.579,0 -vit_deit_base_patch16_224,86.829,13.171,97.049,2.951,86.57,224,0.900,bicubic,+4.831,+1.315,-8 -tresnet_m_448,86.820,13.180,97.212,2.788,31.39,448,0.875,bilinear,+5.106,+1.640,-4 +vit_deit_base_patch16_224,86.829,13.171,97.049,2.951,86.57,224,0.900,bicubic,+4.831,+1.315,-10 +tresnet_m_448,86.820,13.180,97.212,2.788,31.39,448,0.875,bilinear,+5.106,+1.640,-5 ssl_resnext101_32x8d,86.807,13.193,97.466,2.534,88.79,224,0.875,bilinear,+5.191,+1.428,-1 -swsl_resnet50,86.807,13.193,97.498,2.502,25.56,224,0.875,bilinear,+5.641,+1.526,+12 +swsl_resnet50,86.807,13.193,97.498,2.502,25.56,224,0.875,bilinear,+5.641,+1.526,+15 tf_efficientnet_lite4,86.803,13.197,97.263,2.737,13.01,380,0.920,bilinear,+5.267,+1.595,-2 -vit_base_patch16_224,86.778,13.223,97.438,2.562,86.57,224,0.900,bicubic,+4.992,+1.316,-9 +coat_mini,86.793,13.207,97.162,2.837,10.34,224,0.900,bicubic,+5.525,+1.770,+9 +vit_base_patch16_224,86.778,13.223,97.438,2.562,86.57,224,0.900,bicubic,+4.992,+1.316,-11 tresnet_l,86.767,13.233,97.271,2.729,55.99,224,0.875,bilinear,+5.277,+1.647,-1 +twins_svt_small,86.756,13.244,97.175,2.825,24.06,224,0.900,bicubic,+5.074,+1.505,-11 +levit_256,86.728,13.272,97.259,2.741,18.89,224,0.900,bicubic,+5.218,+1.769,-5 seresnext50_32x4d,86.699,13.301,97.214,2.786,27.56,224,0.875,bicubic,+5.433,+1.594,+5 -pit_b_224,86.686,13.314,96.898,3.102,73.76,224,0.900,bicubic,+4.240,+1.188,-27 -tf_efficientnet_b1_ns,86.669,13.331,97.378,2.622,7.79,240,0.882,bicubic,+5.281,+1.640,-2 -swin_tiny_patch4_window7_224,86.664,13.336,97.197,2.803,28.29,224,0.900,bicubic,+5.286,+1.657,-2 -gernet_l,86.654,13.346,97.186,2.814,31.08,256,0.875,bilinear,+5.300,+1.650,-2 -wide_resnet50_2,86.647,13.353,97.214,2.786,68.88,224,0.875,bicubic,+5.191,+1.682,-6 -efficientnet_el,86.635,13.366,97.175,2.825,10.59,300,0.904,bicubic,+5.319,+1.649,-3 +pit_b_224,86.686,13.314,96.898,3.102,73.76,224,0.900,bicubic,+4.240,+1.188,-37 +tf_efficientnet_b1_ns,86.669,13.331,97.378,2.622,7.79,240,0.882,bicubic,+5.281,+1.640,-3 +swin_tiny_patch4_window7_224,86.664,13.336,97.197,2.803,28.29,224,0.900,bicubic,+5.286,+1.657,-3 +gernet_l,86.654,13.346,97.186,2.814,31.08,256,0.875,bilinear,+5.300,+1.650,-3 +wide_resnet50_2,86.647,13.353,97.214,2.786,68.88,224,0.875,bicubic,+5.191,+1.682,-8 +efficientnet_el,86.635,13.366,97.175,2.825,10.59,300,0.904,bicubic,+5.319,+1.649,-4 +twins_pcpvt_small,86.620,13.380,97.340,2.660,24.11,224,0.900,bicubic,+5.532,+1.698,+4 nf_resnet50,86.617,13.383,97.282,2.718,25.56,288,0.940,bicubic,+5.923,+1.926,+15 -resnest50d_4s2x40d,86.592,13.408,97.269,2.731,30.42,224,0.875,bicubic,+5.484,+1.711,+1 +resnest50d_4s2x40d,86.592,13.408,97.269,2.731,30.42,224,0.875,bicubic,+5.484,+1.711,0 efficientnet_b3_pruned,86.581,13.419,97.190,2.810,9.86,300,0.904,bicubic,+5.723,+1.948,+9 repvgg_b3,86.566,13.434,97.139,2.861,123.09,224,0.875,bilinear,+6.074,+1.879,+17 ssl_resnext101_32x4d,86.479,13.521,97.468,2.532,44.18,224,0.875,bilinear,+5.555,+1.740,+4 ecaresnet50d,86.470,13.530,97.186,2.814,25.58,224,0.875,bicubic,+5.878,+1.866,+13 gluon_resnet152_v1s,86.468,13.532,97.109,2.891,60.32,224,0.875,bicubic,+5.452,+1.697,-2 resnest50d_1s4x24d,86.447,13.553,97.148,2.852,25.68,224,0.875,bicubic,+5.459,+1.826,-2 -repvgg_b3g4,86.361,13.639,97.054,2.946,83.83,224,0.875,bilinear,+6.149,+1.944,+29 -legacy_senet154,86.342,13.658,96.928,3.072,115.09,224,0.875,bilinear,+5.032,+1.432,-12 -cait_xxs36_224,86.340,13.660,97.111,2.889,17.30,224,1.000,bicubic,+6.590,+2.245,+49 +repvgg_b3g4,86.361,13.639,97.054,2.946,83.83,224,0.875,bilinear,+6.147,+1.944,+29 +legacy_senet154,86.342,13.658,96.928,3.072,115.09,224,0.875,bilinear,+5.032,+1.432,-14 +cait_xxs36_224,86.340,13.660,97.111,2.889,17.30,224,1.000,bicubic,+6.590,+2.245,+51 gernet_m,86.319,13.681,97.096,2.904,21.14,224,0.875,bilinear,+5.587,+1.912,+3 -pit_s_224,86.316,13.684,97.045,2.955,23.46,224,0.900,bicubic,+5.222,+1.713,-9 +pit_s_224,86.316,13.684,97.045,2.955,23.46,224,0.900,bicubic,+5.222,+1.713,-10 efficientnet_b2,86.304,13.696,96.990,3.010,9.11,288,1.000,bicubic,+5.692,+1.672,+3 -gluon_senet154,86.278,13.722,96.949,3.051,115.09,224,0.875,bicubic,+5.044,+1.601,-15 +gluon_senet154,86.278,13.722,96.949,3.051,115.09,224,0.875,bicubic,+5.044,+1.601,-16 resnest50d,86.240,13.761,97.073,2.927,27.48,224,0.875,bilinear,+5.266,+1.695,-9 -ecaresnet101d_pruned,86.210,13.790,97.335,2.665,24.88,224,0.875,bicubic,+5.392,+1.707,-4 +ecaresnet101d_pruned,86.207,13.793,97.335,2.665,24.88,224,0.875,bicubic,+5.389,+1.707,-4 efficientnet_el_pruned,86.192,13.807,97.026,2.974,10.59,300,0.904,bicubic,+5.892,+1.808,+14 -cspdarknet53,86.182,13.818,97.013,2.987,27.64,256,0.887,bilinear,+6.124,+1.929,+25 -inception_v4,86.169,13.831,96.919,3.081,42.68,299,0.875,bicubic,+6.001,+1.951,+20 -resnetv2_50x1_bitm,86.154,13.846,97.560,2.440,25.55,480,1.000,bilinear,+5.982,+1.934,+18 +cspdarknet53,86.182,13.818,97.013,2.987,27.64,256,0.887,bilinear,+6.124,+1.929,+26 +inception_v4,86.169,13.831,96.919,3.081,42.68,299,0.875,bicubic,+6.001,+1.951,+21 +resnetv2_50x1_bitm,86.154,13.846,97.560,2.440,25.55,480,1.000,bilinear,+5.982,+1.934,+19 rexnet_150,86.154,13.846,97.058,2.942,9.73,224,0.875,bicubic,+5.844,+1.892,+8 inception_resnet_v2,86.133,13.867,97.043,2.957,55.84,299,0.897,bicubic,+5.675,+1.737,+2 ssl_resnext50_32x4d,86.086,13.914,97.212,2.788,25.03,224,0.875,bilinear,+5.768,+1.806,+5 @@ -140,106 +163,112 @@ gluon_resnet101_v1s,86.054,13.946,97.022,2.978,44.67,224,0.875,bicubic,+5.752,+1 ecaresnetlight,86.052,13.948,97.069,2.931,30.16,224,0.875,bicubic,+5.590,+1.819,-3 gluon_seresnext101_32x4d,86.032,13.968,96.977,3.023,48.96,224,0.875,bicubic,+5.128,+1.683,-18 resnet50d,86.009,13.991,96.979,3.021,25.58,224,0.875,bicubic,+5.479,+1.819,-9 -ecaresnet26t,85.983,14.017,97.041,2.959,16.01,320,0.950,bicubic,+6.129,+1.957,+26 +ecaresnet26t,85.983,14.017,97.041,2.959,16.01,320,0.950,bicubic,+6.129,+1.957,+27 tf_efficientnet_b2_ap,85.975,14.025,96.810,3.190,9.11,260,0.890,bicubic,+5.675,+1.782,+2 gluon_seresnext101_64x4d,85.960,14.040,96.979,3.021,88.23,224,0.875,bicubic,+5.066,+1.671,-21 gluon_resnet152_v1d,85.917,14.083,96.812,3.188,60.21,224,0.875,bicubic,+5.443,+1.606,-10 -vit_large_patch32_384,85.909,14.091,97.368,2.632,306.63,384,1.000,bicubic,+4.403,+1.276,-43 -tf_efficientnet_b2,85.902,14.098,96.862,3.139,9.11,260,0.890,bicubic,+5.816,+1.954,+8 -seresnet50,85.857,14.143,97.004,2.995,28.09,224,0.875,bicubic,+5.583,+1.934,-2 -repvgg_b2g4,85.855,14.145,96.812,3.188,61.76,224,0.875,bilinear,+6.489,+2.124,+37 -gluon_resnet101_v1d,85.849,14.151,96.663,3.337,44.57,224,0.875,bicubic,+5.435,+1.649,-12 -resnet50,85.804,14.196,96.712,3.288,25.56,224,0.875,bicubic,+6.766,+2.322,+58 -mixnet_xl,85.798,14.202,96.712,3.288,11.90,224,0.875,bicubic,+5.322,+1.776,-18 +vit_large_patch32_384,85.909,14.091,97.368,2.632,306.63,384,1.000,bicubic,+4.403,+1.276,-46 +tf_efficientnet_b2,85.902,14.098,96.862,3.139,9.11,260,0.890,bicubic,+5.816,+1.954,+9 +tf_efficientnetv2_b2,85.900,14.100,96.889,3.111,10.10,260,0.890,bicubic,+5.692,+1.847,+3 +seresnet50,85.857,14.143,97.004,2.995,28.09,224,0.875,bicubic,+5.583,+1.934,-3 +repvgg_b2g4,85.855,14.145,96.812,3.188,61.76,224,0.875,bilinear,+6.489,+2.124,+39 +gluon_resnet101_v1d,85.849,14.151,96.663,3.337,44.57,224,0.875,bicubic,+5.435,+1.649,-13 +resnet50,85.804,14.196,96.712,3.288,25.56,224,0.875,bicubic,+6.766,+2.322,+60 +mixnet_xl,85.798,14.202,96.712,3.288,11.90,224,0.875,bicubic,+5.322,+1.776,-19 ens_adv_inception_resnet_v2,85.781,14.220,96.759,3.241,55.84,299,0.897,bicubic,+5.799,+1.823,+6 -tf_efficientnet_lite3,85.755,14.245,96.887,3.113,8.20,300,0.904,bilinear,+5.935,+1.973,+16 -ese_vovnet39b,85.751,14.249,96.891,3.109,24.57,224,0.875,bicubic,+6.431,+2.179,+33 -gluon_resnext101_32x4d,85.746,14.254,96.635,3.365,44.18,224,0.875,bicubic,+5.412,+1.709,-16 -legacy_seresnext101_32x4d,85.746,14.254,96.757,3.243,48.96,224,0.875,bilinear,+5.518,+1.739,-8 +tf_efficientnet_lite3,85.755,14.245,96.887,3.113,8.20,300,0.904,bilinear,+5.935,+1.973,+17 +ese_vovnet39b,85.751,14.249,96.891,3.109,24.57,224,0.875,bicubic,+6.431,+2.179,+35 +gluon_resnext101_32x4d,85.746,14.254,96.635,3.365,44.18,224,0.875,bicubic,+5.412,+1.709,-17 +legacy_seresnext101_32x4d,85.746,14.254,96.757,3.243,48.96,224,0.875,bilinear,+5.518,+1.739,-9 cspresnext50,85.740,14.260,96.840,3.160,20.57,224,0.875,bilinear,+5.700,+1.896,-1 -regnety_320,85.727,14.273,96.725,3.275,145.05,224,0.875,bicubic,+4.915,+1.481,-33 -cspresnet50,85.721,14.279,96.795,3.205,21.62,256,0.887,bilinear,+6.147,+2.083,+20 +regnety_320,85.727,14.273,96.725,3.275,145.05,224,0.875,bicubic,+4.915,+1.481,-34 +cspresnet50,85.721,14.279,96.795,3.205,21.62,256,0.887,bilinear,+6.147,+2.083,+21 xception71,85.697,14.303,96.776,3.224,42.34,299,0.903,bicubic,+5.823,+1.854,+4 -gluon_resnext101_64x4d,85.693,14.307,96.644,3.356,83.46,224,0.875,bicubic,+5.089,+1.656,-32 -efficientnet_em,85.684,14.316,96.938,3.062,6.90,240,0.882,bicubic,+6.432,+2.144,+34 +gluon_resnext101_64x4d,85.693,14.307,96.644,3.356,83.46,224,0.875,bicubic,+5.089,+1.656,-33 +efficientnet_em,85.684,14.316,96.938,3.062,6.90,240,0.882,bicubic,+6.432,+2.144,+36 vit_deit_small_patch16_224,85.678,14.322,96.906,3.094,22.05,224,0.900,bicubic,+5.822,+1.854,+3 -pit_xs_distilled_224,85.657,14.343,96.667,3.333,11.00,224,0.900,bicubic,+6.351,+2.303,+28 +pit_xs_distilled_224,85.657,14.343,96.667,3.333,11.00,224,0.900,bicubic,+6.351,+2.303,+30 efficientnet_b2_pruned,85.642,14.358,96.746,3.254,8.31,260,0.890,bicubic,+5.726,+1.890,-5 dpn107,85.640,14.360,96.729,3.271,86.92,224,0.875,bicubic,+5.484,+1.819,-14 -ecaresnet50d_pruned,85.580,14.420,96.936,3.064,19.94,224,0.875,bicubic,+5.864,+2.056,+6 +ecaresnet50d_pruned,85.580,14.420,96.936,3.064,19.94,224,0.875,bicubic,+5.864,+2.056,+7 gluon_resnet152_v1c,85.580,14.420,96.646,3.354,60.21,224,0.875,bicubic,+5.670,+1.806,-7 +levit_192,85.580,14.420,96.740,3.260,10.95,224,0.900,bicubic,+5.738,+1.954,-1 resnext50d_32x4d,85.569,14.431,96.748,3.252,25.05,224,0.875,bicubic,+5.893,+1.882,+7 -regnety_120,85.543,14.457,96.785,3.215,51.82,224,0.875,bicubic,+5.177,+1.659,-32 -regnetx_320,85.524,14.476,96.669,3.331,107.81,224,0.875,bicubic,+5.278,+1.643,-24 +tf_efficientnetv2_b1,85.561,14.439,96.727,3.273,8.14,240,0.882,bicubic,+6.099,+2.005,+14 +regnety_120,85.543,14.457,96.785,3.215,51.82,224,0.875,bicubic,+5.177,+1.659,-35 +regnetx_320,85.524,14.476,96.669,3.331,107.81,224,0.875,bicubic,+5.278,+1.643,-27 nf_regnet_b1,85.499,14.501,96.799,3.200,10.22,288,0.900,bicubic,+6.193,+2.051,+19 -dpn92,85.494,14.506,96.635,3.365,37.67,224,0.875,bicubic,+5.486,+1.799,-16 -gluon_resnet152_v1b,85.475,14.525,96.550,3.450,60.19,224,0.875,bicubic,+5.789,+1.814,+1 -rexnet_130,85.473,14.527,96.684,3.316,7.56,224,0.875,bicubic,+5.973,+2.002,+7 -resnetrs50,85.462,14.538,96.736,3.264,35.69,224,0.910,bicubic,+5.570,+1.767,-14 -dpn131,85.398,14.602,96.639,3.361,79.25,224,0.875,bicubic,+5.576,+1.929,-9 -regnetx_160,85.390,14.610,96.637,3.363,54.28,224,0.875,bicubic,+5.534,+1.807,-13 +dpn92,85.494,14.506,96.635,3.365,37.67,224,0.875,bicubic,+5.486,+1.799,-18 +gluon_resnet152_v1b,85.475,14.525,96.550,3.450,60.19,224,0.875,bicubic,+5.789,+1.814,0 +rexnet_130,85.473,14.527,96.684,3.316,7.56,224,0.875,bicubic,+5.973,+2.002,+6 +resnetrs50,85.462,14.538,96.736,3.264,35.69,224,0.910,bicubic,+5.570,+1.767,-16 +dpn131,85.398,14.602,96.639,3.361,79.25,224,0.875,bicubic,+5.576,+1.929,-10 +regnetx_160,85.390,14.610,96.637,3.363,54.28,224,0.875,bicubic,+5.534,+1.807,-15 dla102x2,85.366,14.634,96.629,3.371,41.28,224,0.875,bilinear,+5.918,+1.989,+5 -gluon_seresnext50_32x4d,85.336,14.664,96.667,3.333,27.56,224,0.875,bicubic,+5.418,+1.845,-21 -xception65,85.315,14.685,96.637,3.363,39.92,299,0.903,bicubic,+5.763,+1.983,-1 -skresnext50_32x4d,85.313,14.687,96.390,3.610,27.48,224,0.875,bicubic,+5.157,+1.748,-29 -dpn98,85.311,14.689,96.469,3.531,61.57,224,0.875,bicubic,+5.669,+1.871,-6 -gluon_resnet101_v1c,85.304,14.696,96.405,3.595,44.57,224,0.875,bicubic,+5.770,+1.827,-3 +gluon_seresnext50_32x4d,85.336,14.664,96.667,3.333,27.56,224,0.875,bicubic,+5.418,+1.845,-23 +xception65,85.315,14.685,96.637,3.363,39.92,299,0.903,bicubic,+5.763,+1.983,-2 +skresnext50_32x4d,85.313,14.687,96.390,3.610,27.48,224,0.875,bicubic,+5.157,+1.748,-31 +dpn98,85.311,14.689,96.469,3.531,61.57,224,0.875,bicubic,+5.669,+1.871,-7 +gluon_resnet101_v1c,85.304,14.696,96.405,3.595,44.57,224,0.875,bicubic,+5.770,+1.827,-4 dpn68b,85.291,14.709,96.464,3.536,12.61,224,0.875,bicubic,+6.076,+2.050,+14 -regnety_064,85.283,14.717,96.639,3.361,30.58,224,0.875,bicubic,+5.561,+1.871,-14 +regnety_064,85.283,14.717,96.639,3.361,30.58,224,0.875,bicubic,+5.561,+1.871,-15 resnetblur50,85.283,14.717,96.531,3.470,25.56,224,0.875,bicubic,+5.997,+1.892,+7 coat_lite_mini,85.251,14.749,96.680,3.320,11.01,224,0.900,bicubic,+6.163,+2.076,+15 -regnety_080,85.245,14.755,96.633,3.367,39.18,224,0.875,bicubic,+5.369,+1.803,-26 -cait_xxs24_224,85.228,14.773,96.712,3.288,11.96,224,1.000,bicubic,+6.842,+2.402,+41 -resnext50_32x4d,85.221,14.779,96.526,3.474,25.03,224,0.875,bicubic,+5.453,+1.928,-21 +regnety_080,85.245,14.755,96.633,3.367,39.18,224,0.875,bicubic,+5.369,+1.803,-28 +cait_xxs24_224,85.228,14.773,96.712,3.288,11.96,224,1.000,bicubic,+6.842,+2.402,+43 +resnext50_32x4d,85.221,14.779,96.526,3.474,25.03,224,0.875,bicubic,+5.453,+1.928,-22 resnext101_32x8d,85.187,14.813,96.445,3.555,88.79,224,0.875,bilinear,+5.879,+1.927,-4 gluon_inception_v3,85.183,14.817,96.526,3.474,23.83,299,0.875,bicubic,+6.377,+2.156,+21 hrnet_w48,85.151,14.849,96.492,3.508,77.47,224,0.875,bilinear,+5.851,+1.980,-1 -gluon_xception65,85.148,14.851,96.597,3.403,39.92,299,0.903,bicubic,+5.433,+1.737,-21 +gluon_xception65,85.148,14.851,96.597,3.403,39.92,299,0.903,bicubic,+5.433,+1.737,-22 gluon_resnet101_v1b,85.142,14.858,96.366,3.634,44.55,224,0.875,bicubic,+5.836,+1.842,-6 -regnetx_120,85.131,14.869,96.477,3.523,46.11,224,0.875,bicubic,+5.535,+1.739,-19 +regnetx_120,85.131,14.869,96.477,3.523,46.11,224,0.875,bicubic,+5.535,+1.739,-20 xception,85.129,14.871,96.471,3.529,22.86,299,0.897,bicubic,+6.077,+2.079,+9 tf_efficientnet_b1_ap,85.127,14.873,96.405,3.595,7.79,240,0.882,bicubic,+5.847,+2.099,-4 -hrnet_w64,85.119,14.881,96.744,3.256,128.06,224,0.875,bilinear,+5.645,+2.092,-17 +hrnet_w64,85.119,14.881,96.744,3.256,128.06,224,0.875,bilinear,+5.645,+2.092,-18 ssl_resnet50,85.097,14.903,96.866,3.134,25.56,224,0.875,bilinear,+5.875,+2.034,-4 res2net101_26w_4s,85.093,14.907,96.381,3.619,45.21,224,0.875,bilinear,+5.895,+1.949,-1 tf_efficientnet_cc_b1_8e,85.063,14.937,96.422,3.578,39.72,240,0.882,bicubic,+5.755,+2.052,-14 res2net50_26w_8s,85.029,14.971,96.419,3.580,48.40,224,0.875,bilinear,+5.831,+2.052,-4 -resnest26d,85.008,14.992,96.637,3.363,17.07,224,0.875,bilinear,+6.530,+2.339,+21 +resnest26d,85.008,14.992,96.637,3.363,17.07,224,0.875,bilinear,+6.530,+2.339,+22 gluon_resnext50_32x4d,84.995,15.005,96.426,3.574,25.03,224,0.875,bicubic,+5.641,+2.000,-20 tf_efficientnet_b0_ns,84.984,15.016,96.503,3.497,5.29,224,0.875,bicubic,+6.326,+2.127,+14 -regnety_040,84.948,15.052,96.612,3.388,20.65,224,0.875,bicubic,+5.728,+1.956,-10 -dla169,84.920,15.080,96.535,3.465,53.39,224,0.875,bilinear,+6.232,+2.199,+10 -tf_efficientnet_b1,84.918,15.082,96.364,3.636,7.79,240,0.882,bicubic,+6.092,+2.166,+3 -legacy_seresnext50_32x4d,84.901,15.099,96.434,3.566,27.56,224,0.875,bilinear,+5.823,+1.998,-7 -hrnet_w44,84.884,15.116,96.434,3.566,67.06,224,0.875,bilinear,+5.988,+2.066,-1 -regnetx_080,84.862,15.138,96.434,3.566,39.57,224,0.875,bicubic,+5.668,+1.874,-11 -gluon_resnet50_v1s,84.860,15.140,96.443,3.557,25.68,224,0.875,bicubic,+6.148,+2.205,+4 -gluon_resnet50_v1d,84.832,15.168,96.398,3.602,25.58,224,0.875,bicubic,+5.758,+1.928,-10 -dla60_res2next,84.830,15.170,96.411,3.589,17.03,224,0.875,bilinear,+6.390,+2.259,+13 -mixnet_l,84.822,15.178,96.328,3.672,7.33,224,0.875,bicubic,+5.846,+2.146,-8 -tv_resnet152,84.815,15.185,96.225,3.775,60.19,224,0.875,bilinear,+6.503,+2.187,+16 -dla60_res2net,84.813,15.187,96.481,3.519,20.85,224,0.875,bilinear,+6.349,+2.275,+8 -dla102x,84.813,15.187,96.552,3.448,26.31,224,0.875,bilinear,+6.303,+2.324,+4 -pit_xs_224,84.792,15.208,96.492,3.508,10.62,224,0.900,bicubic,+6.610,+2.324,+18 -xception41,84.792,15.208,96.413,3.587,26.97,299,0.903,bicubic,+6.276,+2.135,+1 -regnetx_064,84.781,15.219,96.490,3.510,26.21,224,0.875,bicubic,+5.709,+2.032,-17 -hrnet_w40,84.743,15.257,96.554,3.446,57.56,224,0.875,bilinear,+5.823,+2.084,-14 -res2net50_26w_6s,84.726,15.274,96.281,3.719,37.05,224,0.875,bilinear,+6.156,+2.157,-3 -repvgg_b2,84.724,15.276,96.469,3.531,89.02,224,0.875,bilinear,+5.932,+2.055,-10 -legacy_seresnet152,84.704,15.296,96.417,3.583,66.82,224,0.875,bilinear,+6.044,+2.047,-7 +coat_tiny,84.976,15.024,96.409,3.591,5.50,224,0.900,bicubic,+6.542,+2.371,+23 +regnety_040,84.948,15.052,96.612,3.388,20.65,224,0.875,bicubic,+5.728,+1.956,-11 +dla169,84.920,15.080,96.535,3.465,53.39,224,0.875,bilinear,+6.232,+2.199,+9 +tf_efficientnet_b1,84.918,15.082,96.364,3.636,7.79,240,0.882,bicubic,+6.092,+2.166,+2 +legacy_seresnext50_32x4d,84.901,15.099,96.434,3.566,27.56,224,0.875,bilinear,+5.823,+1.998,-8 +hrnet_w44,84.884,15.116,96.434,3.566,67.06,224,0.875,bilinear,+5.988,+2.066,-2 +regnetx_080,84.862,15.138,96.434,3.566,39.57,224,0.875,bicubic,+5.668,+1.874,-12 +gluon_resnet50_v1s,84.860,15.140,96.443,3.557,25.68,224,0.875,bicubic,+6.150,+2.205,+3 +levit_128,84.843,15.157,96.360,3.640,9.21,224,0.900,bicubic,+6.357,+2.350,+9 +gluon_resnet50_v1d,84.832,15.168,96.398,3.602,25.58,224,0.875,bicubic,+5.758,+1.928,-12 +dla60_res2next,84.830,15.170,96.411,3.589,17.03,224,0.875,bilinear,+6.390,+2.259,+12 +mixnet_l,84.822,15.178,96.328,3.672,7.33,224,0.875,bicubic,+5.846,+2.146,-10 +tv_resnet152,84.815,15.185,96.225,3.775,60.19,224,0.875,bilinear,+6.503,+2.187,+17 +dla102x,84.813,15.187,96.552,3.448,26.31,224,0.875,bilinear,+6.303,+2.324,+2 +dla60_res2net,84.813,15.187,96.481,3.519,20.85,224,0.875,bilinear,+6.349,+2.275,+7 +pit_xs_224,84.792,15.208,96.492,3.508,10.62,224,0.900,bicubic,+6.610,+2.324,+19 +xception41,84.792,15.208,96.413,3.587,26.97,299,0.903,bicubic,+6.276,+2.135,-1 +regnetx_064,84.781,15.219,96.490,3.510,26.21,224,0.875,bicubic,+5.709,+2.032,-19 +hrnet_w40,84.743,15.257,96.554,3.446,57.56,224,0.875,bilinear,+5.823,+2.084,-16 +res2net50_26w_6s,84.726,15.274,96.281,3.719,37.05,224,0.875,bilinear,+6.156,+2.157,-5 +repvgg_b2,84.724,15.276,96.469,3.531,89.02,224,0.875,bilinear,+5.932,+2.055,-12 +legacy_seresnet152,84.704,15.296,96.417,3.583,66.82,224,0.875,bilinear,+6.044,+2.047,-9 selecsls60b,84.657,15.343,96.300,3.700,32.77,224,0.875,bicubic,+6.245,+2.126,+2 -hrnet_w32,84.651,15.349,96.407,3.593,41.23,224,0.875,bilinear,+6.201,+2.221,-1 -efficientnet_b1,84.608,15.392,96.332,3.668,7.79,256,1.000,bicubic,+5.814,+1.990,-15 -regnetx_040,84.600,15.400,96.383,3.617,22.12,224,0.875,bicubic,+6.118,+2.139,-6 +hrnet_w32,84.651,15.349,96.407,3.593,41.23,224,0.875,bilinear,+6.201,+2.221,-2 +tf_efficientnetv2_b0,84.625,15.375,96.274,3.726,7.14,224,0.875,bicubic,+6.269,+2.250,+4 +efficientnet_b1,84.608,15.392,96.332,3.668,7.79,256,1.000,bicubic,+5.814,+1.990,-18 +regnetx_040,84.600,15.400,96.383,3.617,22.12,224,0.875,bicubic,+6.118,+2.139,-8 efficientnet_es,84.591,15.409,96.311,3.689,5.44,224,0.875,bicubic,+6.525,+2.385,+12 hrnet_w30,84.572,15.428,96.388,3.612,37.71,224,0.875,bilinear,+6.366,+2.166,+5 -tf_mixnet_l,84.564,15.437,96.244,3.756,7.33,224,0.875,bicubic,+5.790,+2.246,-17 -wide_resnet101_2,84.557,15.443,96.349,3.651,126.89,224,0.875,bilinear,+5.701,+2.067,-23 +tf_mixnet_l,84.564,15.437,96.244,3.756,7.33,224,0.875,bicubic,+5.790,+2.246,-20 +wide_resnet101_2,84.557,15.443,96.349,3.651,126.89,224,0.875,bilinear,+5.701,+2.067,-26 dla60x,84.523,15.477,96.285,3.715,17.35,224,0.875,bilinear,+6.277,+2.267,-1 -legacy_seresnet101,84.504,15.496,96.330,3.670,49.33,224,0.875,bilinear,+6.122,+2.066,-5 -tf_efficientnet_em,84.450,15.550,96.180,3.820,6.90,240,0.882,bicubic,+6.320,+2.136,+3 +legacy_seresnet101,84.504,15.496,96.330,3.670,49.33,224,0.875,bilinear,+6.122,+2.066,-6 coat_lite_tiny,84.450,15.550,96.368,3.632,5.72,224,0.900,bicubic,+6.938,+2.452,+27 -repvgg_b1,84.416,15.584,96.221,3.779,57.42,224,0.875,bilinear,+6.050,+2.123,-7 +tf_efficientnet_em,84.450,15.550,96.180,3.820,6.90,240,0.882,bicubic,+6.320,+2.136,+3 +repvgg_b1,84.416,15.584,96.221,3.779,57.42,224,0.875,bilinear,+6.050,+2.123,-8 efficientnet_b1_pruned,84.393,15.607,96.140,3.860,6.33,240,0.882,bicubic,+6.157,+2.306,-4 res2net50_26w_4s,84.365,15.635,96.082,3.918,25.70,224,0.875,bilinear,+6.401,+2.228,+7 hardcorenas_f,84.326,15.674,96.025,3.975,8.20,224,0.875,bilinear,+6.222,+2.222,0 @@ -249,8 +278,8 @@ regnetx_032,84.237,15.763,96.247,3.753,15.30,224,0.875,bicubic,+6.065,+2.159,-6 res2next50,84.226,15.774,95.997,4.003,24.67,224,0.875,bilinear,+5.980,+2.105,-11 gluon_resnet50_v1c,84.207,15.793,96.161,3.839,25.58,224,0.875,bicubic,+6.195,+2.173,-2 dla102,84.190,15.810,96.206,3.794,33.27,224,0.875,bilinear,+6.158,+2.260,-4 -rexnet_100,84.162,15.838,96.255,3.745,4.80,224,0.875,bicubic,+6.304,+2.385,+3 -tf_inception_v3,84.132,15.868,95.920,4.080,23.83,299,0.875,bicubic,+6.276,+2.280,+4 +rexnet_100,84.162,15.838,96.255,3.745,4.80,224,0.875,bicubic,+6.304,+2.839,+5 +tf_inception_v3,84.134,15.866,95.920,4.080,23.83,299,0.875,bicubic,+6.276,+2.050,+2 res2net50_48w_2s,84.126,15.874,95.965,4.035,25.29,224,0.875,bilinear,+6.604,+2.411,+12 resnet34d,84.098,15.902,95.978,4.022,21.82,224,0.875,bicubic,+6.982,+2.596,+23 tf_efficientnet_lite2,84.094,15.906,96.069,3.931,6.09,260,0.890,bicubic,+6.626,+2.315,+12 @@ -268,7 +297,7 @@ tv_resnet101,83.848,16.152,95.892,4.108,44.55,224,0.875,bilinear,+6.474,+2.352,+ inception_v3,83.761,16.239,95.879,4.121,23.83,299,0.875,bicubic,+6.323,+2.403,+1 hardcorenas_d,83.759,16.241,95.734,4.266,7.50,224,0.875,bilinear,+6.327,+2.250,+1 seresnext26d_32x4d,83.754,16.246,95.849,4.151,16.81,224,0.875,bicubic,+6.152,+2.241,-8 -vit_small_patch16_224,83.735,16.265,95.758,4.242,48.75,224,0.900,bicubic,+5.877,+2.342,-15 +vit_small_patch16_224,83.735,16.265,95.758,4.242,48.75,224,0.900,bicubic,+5.877,+2.118,-15 dla60,83.729,16.271,95.933,4.067,22.04,224,0.875,bilinear,+6.697,+2.615,+10 repvgg_b1g4,83.699,16.301,96.020,3.980,39.97,224,0.875,bilinear,+6.105,+2.194,-10 legacy_seresnet50,83.662,16.337,95.973,4.027,28.09,224,0.875,bilinear,+6.032,+2.225,-14 @@ -279,23 +308,24 @@ densenet201,83.556,16.444,95.811,4.189,20.01,224,0.875,bicubic,+6.270,+2.333,-4 mobilenetv3_large_100_miil,83.556,16.444,95.452,4.548,5.48,224,0.875,bilinear,+5.640,+2.542,-27 gernet_s,83.522,16.478,95.794,4.206,8.17,224,0.875,bilinear,+6.606,+2.662,+5 legacy_seresnext26_32x4d,83.517,16.483,95.719,4.281,16.79,224,0.875,bicubic,+6.413,+2.403,-2 -mixnet_m,83.515,16.485,95.689,4.311,5.01,224,0.875,bicubic,+6.255,+2.265,-6 tf_efficientnet_b0,83.515,16.485,95.719,4.281,5.29,224,0.875,bicubic,+6.667,+2.491,+4 +mixnet_m,83.515,16.485,95.689,4.311,5.01,224,0.875,bicubic,+6.255,+2.265,-6 hrnet_w18,83.500,16.500,95.907,4.093,21.30,224,0.875,bilinear,+6.742,+2.463,+5 densenetblur121d,83.472,16.527,95.822,4.178,8.00,224,0.875,bicubic,+6.885,+2.630,+9 selecsls42b,83.457,16.543,95.745,4.255,32.46,224,0.875,bicubic,+6.283,+2.355,-9 tf_efficientnet_lite1,83.344,16.656,95.642,4.358,5.42,240,0.882,bicubic,+6.702,+2.416,+4 hardcorenas_c,83.342,16.658,95.706,4.294,5.52,224,0.875,bilinear,+6.288,+2.548,-7 regnetx_016,83.195,16.805,95.740,4.260,9.19,224,0.875,bicubic,+6.245,+2.320,-6 -mobilenetv2_140,83.182,16.818,95.689,4.311,6.11,224,0.875,bicubic,+6.666,+2.693,+6 -dpn68,83.178,16.822,95.597,4.402,12.61,224,0.875,bicubic,+6.860,+2.620,+7 +mobilenetv2_140,83.182,16.818,95.689,4.311,6.11,224,0.875,bicubic,+6.666,+2.693,+7 +dpn68,83.178,16.822,95.597,4.402,12.61,224,0.875,bicubic,+6.860,+2.620,+8 tf_efficientnet_es,83.178,16.822,95.585,4.415,5.44,224,0.875,bicubic,+6.584,+2.383,+1 tf_mixnet_m,83.176,16.824,95.461,4.539,5.01,224,0.875,bicubic,+6.234,+2.309,-9 ese_vovnet19b_dw,83.109,16.890,95.779,4.221,6.54,224,0.875,bicubic,+6.311,+2.511,-6 -resnet26d,83.050,16.950,95.604,4.396,16.01,224,0.875,bicubic,+6.354,+2.454,-5 +levit_128s,83.069,16.931,95.531,4.469,7.78,224,0.900,bicubic,+6.539,+2.665,+1 +resnet26d,83.050,16.950,95.604,4.396,16.01,224,0.875,bicubic,+6.354,+2.454,-6 repvgg_a2,83.001,16.999,95.589,4.411,28.21,224,0.875,bilinear,+6.541,+2.585,+1 tv_resnet50,82.958,17.042,95.467,4.533,25.56,224,0.875,bilinear,+6.820,+2.603,+3 -hardcorenas_b,82.873,17.128,95.392,4.607,5.18,224,0.875,bilinear,+6.335,+2.638,-3 +hardcorenas_b,82.873,17.128,95.392,4.607,5.18,224,0.875,bilinear,+6.335,+2.638,-4 densenet121,82.823,17.177,95.585,4.415,7.98,224,0.875,bicubic,+7.245,+2.933,+8 densenet169,82.683,17.317,95.600,4.400,14.15,224,0.875,bicubic,+6.776,+2.574,+3 mixnet_s,82.525,17.476,95.356,4.644,4.13,224,0.875,bicubic,+6.532,+2.560,0 @@ -312,7 +342,7 @@ mobilenetv2_110d,82.070,17.930,95.076,4.923,4.52,224,0.875,bicubic,+7.034,+2.890 tf_mixnet_s,82.038,17.962,95.121,4.879,4.13,224,0.875,bicubic,+6.388,+2.493,-7 repvgg_b0,82.001,17.999,95.100,4.900,15.82,224,0.875,bilinear,+6.849,+2.682,+1 vit_deit_tiny_distilled_patch16_224,81.997,18.003,95.141,4.859,5.91,224,0.900,bicubic,+7.487,+3.251,+14 -mixer_b16_224,81.978,18.022,94.449,5.551,59.88,224,0.875,bicubic,+5.376,+2.221,-23 +mixer_b16_224,81.978,18.022,94.449,5.551,59.88,224,0.875,bicubic,+5.376,+2.221,-24 pit_ti_distilled_224,81.967,18.033,95.145,4.855,5.10,224,0.900,bicubic,+7.437,+3.049,+11 hrnet_w18_small_v2,81.961,18.039,95.164,4.836,15.60,224,0.875,bilinear,+6.847,+2.748,-1 tf_efficientnet_lite0,81.952,18.048,95.168,4.832,4.65,224,0.875,bicubic,+7.122,+2.992,+3 @@ -327,21 +357,22 @@ gluon_resnet34_v1b,81.500,18.500,94.810,5.190,21.80,224,0.875,bicubic,+6.912,+2. regnetx_008,81.485,18.515,95.059,4.941,7.26,224,0.875,bicubic,+6.447,+2.724,-9 mnasnet_100,81.459,18.541,94.899,5.101,4.38,224,0.875,bicubic,+6.801,+2.785,-4 vgg19_bn,81.444,18.556,94.763,5.237,143.68,224,0.875,bilinear,+7.230,+2.921,0 -spnasnet_100,80.878,19.122,94.526,5.474,4.42,224,0.875,bilinear,+6.794,+2.708,0 -ghostnet_100,80.699,19.301,94.291,5.709,5.18,224,0.875,bilinear,+6.721,+2.835,+1 -regnety_004,80.659,19.341,94.686,5.314,4.34,224,0.875,bicubic,+6.624,+2.934,-1 +convit_tiny,81.126,18.874,95.044,4.955,5.71,224,0.875,bicubic,+8.010,+3.331,+8 +spnasnet_100,80.878,19.122,94.526,5.474,4.42,224,0.875,bilinear,+6.794,+2.708,-1 +ghostnet_100,80.699,19.301,94.291,5.709,5.18,224,0.875,bilinear,+6.721,+2.835,0 +regnety_004,80.659,19.341,94.686,5.314,4.34,224,0.875,bicubic,+6.624,+2.934,-2 skresnet18,80.637,19.363,94.378,5.622,11.96,224,0.875,bicubic,+7.599,+3.210,+5 -regnetx_006,80.629,19.371,94.524,5.476,6.20,224,0.875,bicubic,+6.777,+2.852,-1 +regnetx_006,80.629,19.371,94.524,5.476,6.20,224,0.875,bicubic,+6.777,+2.852,-2 pit_ti_224,80.605,19.395,94.618,5.383,4.85,224,0.900,bicubic,+7.693,+3.216,+5 -swsl_resnet18,80.575,19.425,94.743,5.256,11.69,224,0.875,bilinear,+7.299,+3.010,+1 -vgg16_bn,80.556,19.444,94.592,5.408,138.37,224,0.875,bilinear,+7.206,+3.086,-2 -tv_resnet34,80.389,19.611,94.436,5.564,21.80,224,0.875,bilinear,+7.077,+3.010,-2 +swsl_resnet18,80.575,19.425,94.743,5.256,11.69,224,0.875,bilinear,+7.299,+3.010,0 +vgg16_bn,80.556,19.444,94.592,5.408,138.37,224,0.875,bilinear,+7.206,+3.086,-3 +tv_resnet34,80.389,19.611,94.436,5.564,21.80,224,0.875,bilinear,+7.077,+3.010,-3 resnet18d,80.387,19.613,94.252,5.748,11.71,224,0.875,bicubic,+8.127,+3.556,+6 mobilenetv2_100,80.257,19.743,94.195,5.805,3.50,224,0.875,bicubic,+7.287,+3.179,-1 ssl_resnet18,80.101,19.899,94.590,5.410,11.69,224,0.875,bilinear,+7.491,+3.174,0 -tf_mobilenetv3_large_075,80.093,19.907,94.184,5.816,3.99,224,0.875,bilinear,+6.655,+2.834,-8 +tf_mobilenetv3_large_075,80.093,19.907,94.184,5.816,3.99,224,0.875,bilinear,+6.655,+2.834,-9 vit_deit_tiny_patch16_224,80.018,19.982,94.449,5.551,5.72,224,0.900,bicubic,+7.850,+3.331,+4 -hrnet_w18_small,79.557,20.443,93.898,6.102,13.19,224,0.875,bilinear,+7.215,+3.220,0 +hrnet_w18_small,79.557,20.443,93.898,6.102,13.19,224,0.875,bilinear,+7.213,+3.220,0 vgg19,79.480,20.520,93.870,6.130,143.67,224,0.875,bilinear,+7.112,+2.998,-2 regnetx_004,79.435,20.565,93.853,6.147,5.16,224,0.875,bicubic,+7.039,+3.023,-4 tf_mobilenetv3_large_minimal_100,79.222,20.778,93.706,6.294,3.92,224,0.875,bilinear,+6.974,+3.076,-1 @@ -360,5 +391,5 @@ dla60x_c,75.637,24.363,92.177,7.823,1.32,224,0.875,bilinear,+7.745,+3.751,+1 tf_mobilenetv3_small_100,74.717,25.283,91.257,8.743,2.54,224,0.875,bilinear,+6.795,+3.593,-1 dla46x_c,73.647,26.353,91.095,8.905,1.07,224,0.875,bilinear,+7.677,+4.115,0 tf_mobilenetv3_small_075,72.812,27.188,90.036,9.964,2.04,224,0.875,bilinear,+7.096,+3.906,0 -dla46_c,72.603,27.397,90.499,9.501,1.30,224,0.875,bilinear,+7.737,+4.207,0 +dla46_c,72.601,27.399,90.499,9.501,1.30,224,0.875,bilinear,+7.737,+4.207,0 tf_mobilenetv3_small_minimal_100,70.111,29.889,88.505,11.495,2.04,224,0.875,bilinear,+7.205,+4.275,0 diff --git a/results/results-imagenet.csv b/results/results-imagenet.csv index 321f01c1..9d32efba 100644 --- a/results/results-imagenet.csv +++ b/results/results-imagenet.csv @@ -7,12 +7,15 @@ cait_m48_448,86.484,13.516,97.754,2.246,356.46,448,1.000,bicubic tf_efficientnet_b6_ns,86.452,13.548,97.882,2.118,43.04,528,0.942,bicubic swin_base_patch4_window12_384,86.432,13.568,98.058,1.942,87.90,384,1.000,bicubic swin_large_patch4_window7_224,86.320,13.680,97.896,2.104,196.53,224,0.900,bicubic +tf_efficientnetv2_l_in21ft1k,86.304,13.696,97.978,2.022,118.52,480,1.000,bicubic dm_nfnet_f6,86.296,13.704,97.744,2.256,438.36,576,0.956,bicubic tf_efficientnet_b5_ns,86.088,13.912,97.752,2.248,30.39,456,0.934,bicubic cait_m36_384,86.054,13.946,97.730,2.270,271.22,384,1.000,bicubic dm_nfnet_f5,85.714,14.286,97.442,2.558,377.21,544,0.954,bicubic dm_nfnet_f4,85.658,14.342,97.510,2.490,316.07,512,0.951,bicubic +tf_efficientnetv2_m_in21ft1k,85.588,14.412,97.752,2.248,54.14,480,1.000,bicubic dm_nfnet_f3,85.560,14.440,97.406,2.594,254.92,416,0.940,bicubic +tf_efficientnetv2_l,85.490,14.510,97.372,2.628,118.52,480,1.000,bicubic cait_s36_384,85.460,14.540,97.480,2.520,68.37,384,1.000,bicubic ig_resnext101_32x48d,85.428,14.572,97.572,2.428,828.41,224,0.875,bilinear vit_deit_base_distilled_patch16_384,85.422,14.578,97.332,2.668,87.63,384,1.000,bicubic @@ -24,12 +27,14 @@ vit_large_patch16_384,85.158,14.842,97.356,2.644,304.72,384,1.000,bicubic tf_efficientnet_b7_ap,85.120,14.880,97.252,2.748,66.35,600,0.949,bicubic ig_resnext101_32x32d,85.094,14.906,97.438,2.562,468.53,224,0.875,bilinear cait_s24_384,85.046,14.954,97.346,2.654,47.06,384,1.000,bicubic +tf_efficientnetv2_m,85.044,14.956,97.278,2.722,54.14,480,1.000,bicubic resnetrs420,85.008,14.992,97.124,2.876,191.89,416,1.000,bicubic dm_nfnet_f2,84.990,15.010,97.144,2.856,193.78,352,0.920,bicubic ecaresnet269d,84.976,15.024,97.226,2.774,102.09,352,1.000,bicubic vit_base_r50_s16_384,84.972,15.028,97.288,2.712,98.95,384,1.000,bicubic tf_efficientnet_b7,84.936,15.064,97.204,2.796,66.35,600,0.949,bicubic resnetv2_152x4_bitm,84.932,15.068,97.436,2.564,936.53,480,1.000,bilinear +efficientnetv2_rw_m,84.808,15.192,97.148,2.852,53.24,416,1.000,bicubic tf_efficientnet_b6_ap,84.788,15.212,97.138,2.862,43.04,528,0.942,bicubic resnetrs350,84.720,15.280,96.988,3.012,163.96,384,1.000,bicubic dm_nfnet_f1,84.604,15.396,97.068,2.932,132.63,320,0.910,bicubic @@ -38,6 +43,7 @@ resnetv2_152x2_bitm,84.440,15.560,97.446,2.554,236.34,480,1.000,bilinear resnetrs270,84.434,15.566,96.970,3.030,129.86,352,1.000,bicubic resnetv2_101x3_bitm,84.394,15.606,97.362,2.638,387.93,480,1.000,bilinear seresnet152d,84.362,15.638,97.040,2.960,66.84,320,1.000,bicubic +tf_efficientnetv2_s_in21ft1k,84.302,15.698,97.252,2.748,21.46,384,1.000,bicubic swsl_resnext101_32x8d,84.284,15.716,97.176,2.824,88.79,224,0.875,bilinear vit_base_patch16_224_miil,84.268,15.732,96.802,3.198,86.54,224,0.875,bilinear tf_efficientnet_b5_ap,84.252,15.748,96.974,3.026,30.39,456,0.934,bicubic @@ -50,13 +56,15 @@ cait_xs24_384,84.062,15.938,96.888,3.112,26.67,384,1.000,bicubic tf_efficientnet_b3_ns,84.048,15.952,96.910,3.090,12.23,300,0.904,bicubic eca_nfnet_l1,84.008,15.992,97.028,2.972,41.41,320,1.000,bicubic resnet200d,83.962,16.038,96.824,3.176,64.69,320,1.000,bicubic +tf_efficientnetv2_s,83.894,16.106,96.698,3.302,21.46,384,1.000,bicubic resnest200e,83.832,16.168,96.894,3.106,70.20,320,0.909,bicubic tf_efficientnet_b5,83.812,16.188,96.748,3.252,30.39,456,0.934,bicubic -efficientnet_v2s,83.808,16.192,96.724,3.276,23.94,384,1.000,bicubic +efficientnetv2_rw_s,83.808,16.192,96.724,3.276,23.94,384,1.000,bicubic resnetv2_50x3_bitm,83.784,16.216,97.106,2.894,217.32,480,1.000,bilinear resnetrs152,83.712,16.288,96.614,3.386,86.62,320,1.000,bicubic regnety_160,83.686,16.314,96.776,3.224,83.59,288,1.000,bicubic resnet152d,83.680,16.320,96.738,3.262,60.21,320,1.000,bicubic +twins_svt_large,83.678,16.322,96.594,3.406,99.27,224,0.900,bicubic cait_s24_224,83.452,16.548,96.564,3.436,46.92,224,1.000,bicubic efficientnet_b4,83.428,16.572,96.596,3.404,19.34,384,1.000,bicubic vit_deit_base_distilled_patch16_224,83.388,16.612,96.488,3.512,87.34,224,0.900,bicubic @@ -65,6 +73,8 @@ dm_nfnet_f0,83.342,16.658,96.560,3.440,71.49,256,0.900,bicubic tf_efficientnet_b4_ap,83.248,16.752,96.392,3.608,19.34,380,0.922,bicubic swsl_resnext101_32x4d,83.230,16.770,96.760,3.240,44.18,224,0.875,bilinear swin_small_patch4_window7_224,83.212,16.788,96.322,3.678,49.61,224,0.900,bicubic +twins_pcpvt_large,83.140,16.860,96.598,3.402,60.99,224,0.900,bicubic +twins_svt_base,83.136,16.864,96.418,3.582,56.07,224,0.900,bicubic vit_deit_base_patch16_384,83.106,16.894,96.372,3.628,86.86,384,1.000,bicubic tresnet_m,83.080,16.920,96.118,3.882,31.39,224,0.875,bilinear vit_large_patch16_224,83.062,16.938,96.438,3.562,304.33,224,0.900,bicubic @@ -75,12 +85,18 @@ resnest101e,82.890,17.110,96.320,3.680,48.28,256,0.875,bilinear pnasnet5large,82.782,17.218,96.040,3.960,86.06,331,0.911,bicubic nfnet_l0,82.760,17.240,96.498,3.502,35.07,288,1.000,bicubic regnety_032,82.724,17.276,96.424,3.576,19.44,288,1.000,bicubic +twins_pcpvt_base,82.708,17.292,96.346,3.654,43.83,224,0.900,bicubic ig_resnext101_32x8d,82.688,17.312,96.636,3.364,88.79,224,0.875,bilinear nasnetalarge,82.620,17.380,96.046,3.954,88.75,331,0.911,bicubic eca_nfnet_l0,82.588,17.412,96.474,3.526,24.14,288,1.000,bicubic +levit_384,82.586,17.414,96.016,3.984,39.13,224,0.900,bicubic pit_b_224,82.446,17.554,95.710,4.290,73.76,224,0.900,bicubic tf_efficientnet_b2_ns,82.380,17.620,96.248,3.752,9.11,260,0.890,bicubic +resnet51q,82.360,17.640,96.180,3.820,35.70,288,1.000,bilinear ecaresnet50t,82.346,17.654,96.138,3.862,25.57,320,0.950,bicubic +coat_lite_small,82.308,17.692,95.850,4.150,19.84,224,0.900,bicubic +mixer_b16_224_miil,82.308,17.692,95.716,4.284,59.88,224,0.875,bilinear +convit_base,82.290,17.710,95.938,4.062,86.54,224,0.875,bicubic resnetrs101,82.288,17.712,96.008,3.992,63.62,288,0.940,bicubic tresnet_l_448,82.268,17.732,95.976,4.024,55.99,448,0.875,bilinear efficientnet_b3,82.242,17.758,96.114,3.886,12.23,320,1.000,bicubic @@ -88,33 +104,40 @@ resnetv2_101x1_bitm,82.212,17.788,96.472,3.528,44.54,480,1.000,bilinear cait_xxs36_384,82.194,17.806,96.148,3.852,17.37,384,1.000,bicubic swsl_resnext50_32x4d,82.182,17.818,96.230,3.770,25.03,224,0.875,bilinear ecaresnet101d,82.172,17.828,96.046,3.954,44.57,224,0.875,bicubic +visformer_small,82.106,17.894,95.872,4.128,40.22,224,0.900,bicubic tresnet_xl,82.054,17.946,95.936,4.064,78.44,224,0.875,bilinear vit_deit_base_patch16_224,81.998,18.002,95.734,4.266,86.57,224,0.900,bicubic pit_s_distilled_224,81.996,18.004,95.798,4.202,24.04,224,0.900,bicubic +tf_efficientnetv2_b3,81.970,18.030,95.782,4.218,14.36,300,0.904,bicubic ssl_resnext101_32x16d,81.844,18.156,96.096,3.904,194.03,224,0.875,bilinear tf_efficientnet_b3_ap,81.822,18.178,95.624,4.376,12.23,300,0.904,bicubic vit_base_patch16_224,81.786,18.214,96.122,3.878,86.57,224,0.900,bicubic tresnet_m_448,81.714,18.286,95.572,4.428,31.39,448,0.875,bilinear +twins_svt_small,81.682,18.318,95.670,4.330,24.06,224,0.900,bicubic vit_base_patch32_384,81.652,18.348,96.128,3.872,88.30,384,1.000,bicubic tf_efficientnet_b3,81.636,18.364,95.718,4.282,12.23,300,0.904,bicubic rexnet_200,81.632,18.368,95.668,4.332,16.37,224,0.875,bicubic ssl_resnext101_32x8d,81.616,18.384,96.038,3.962,88.79,224,0.875,bilinear tf_efficientnet_lite4,81.536,18.464,95.668,4.332,13.01,380,0.920,bilinear tnt_s_patch16_224,81.518,18.482,95.748,4.252,23.76,224,0.900,bicubic +levit_256,81.510,18.490,95.490,4.510,18.89,224,0.900,bicubic vit_large_patch32_384,81.506,18.494,96.092,3.908,306.63,384,1.000,bicubic tresnet_l,81.490,18.510,95.624,4.376,55.99,224,0.875,bilinear wide_resnet50_2,81.456,18.544,95.532,4.468,68.88,224,0.875,bicubic +convit_small,81.426,18.574,95.744,4.256,27.78,224,0.875,bicubic tf_efficientnet_b1_ns,81.388,18.612,95.738,4.262,7.79,240,0.882,bicubic swin_tiny_patch4_window7_224,81.378,18.622,95.540,4.460,28.29,224,0.900,bicubic gernet_l,81.354,18.646,95.536,4.464,31.08,256,0.875,bilinear efficientnet_el,81.316,18.684,95.526,4.474,10.59,300,0.904,bicubic legacy_senet154,81.310,18.690,95.496,4.504,115.09,224,0.875,bilinear +coat_mini,81.268,18.732,95.392,4.608,10.34,224,0.900,bicubic seresnext50_32x4d,81.266,18.734,95.620,4.380,27.56,224,0.875,bicubic gluon_senet154,81.234,18.766,95.348,4.652,115.09,224,0.875,bicubic vit_deit_small_distilled_patch16_224,81.200,18.800,95.378,4.622,22.44,224,0.900,bicubic swsl_resnet50,81.166,18.834,95.972,4.028,25.56,224,0.875,bilinear resnest50d_4s2x40d,81.108,18.892,95.558,4.442,30.42,224,0.875,bicubic pit_s_224,81.094,18.906,95.332,4.668,23.46,224,0.900,bicubic +twins_pcpvt_small,81.088,18.912,95.642,4.358,24.11,224,0.900,bicubic gluon_resnet152_v1s,81.016,18.984,95.412,4.588,60.32,224,0.875,bicubic resnest50d_1s4x24d,80.988,19.012,95.322,4.678,25.68,224,0.875,bicubic resnest50d,80.974,19.026,95.378,4.622,27.48,224,0.875,bilinear @@ -148,7 +171,8 @@ seresnet50,80.274,19.726,95.070,4.930,28.09,224,0.875,bicubic tf_efficientnet_el,80.250,19.750,95.128,4.872,10.59,300,0.904,bicubic regnetx_320,80.246,19.754,95.026,4.974,107.81,224,0.875,bicubic legacy_seresnext101_32x4d,80.228,19.772,95.018,4.982,48.96,224,0.875,bilinear -repvgg_b3g4,80.212,19.788,95.110,4.890,83.83,224,0.875,bilinear +repvgg_b3g4,80.214,19.786,95.110,4.890,83.83,224,0.875,bilinear +tf_efficientnetv2_b2,80.208,19.792,95.042,4.958,10.10,260,0.890,bicubic resnetv2_50x1_bitm,80.172,19.828,95.626,4.374,25.55,480,1.000,bilinear inception_v4,80.168,19.832,94.968,5.032,42.68,299,0.875,bicubic dpn107,80.156,19.844,94.910,5.090,86.92,224,0.875,bicubic @@ -167,6 +191,7 @@ xception71,79.874,20.126,94.922,5.078,42.34,299,0.903,bicubic regnetx_160,79.856,20.144,94.830,5.170,54.28,224,0.875,bicubic vit_deit_small_patch16_224,79.856,20.144,95.052,4.948,22.05,224,0.900,bicubic ecaresnet26t,79.854,20.146,95.084,4.916,16.01,320,0.950,bicubic +levit_192,79.842,20.158,94.786,5.214,10.95,224,0.900,bicubic dpn131,79.822,20.178,94.710,5.290,79.25,224,0.875,bicubic tf_efficientnet_lite3,79.820,20.180,94.914,5.086,8.20,300,0.904,bilinear resnext50_32x4d,79.768,20.232,94.598,5.402,25.03,224,0.875,bicubic @@ -183,6 +208,7 @@ xception65,79.552,20.448,94.654,5.346,39.92,299,0.903,bicubic gluon_resnet101_v1c,79.534,20.466,94.578,5.422,44.57,224,0.875,bicubic rexnet_130,79.500,20.500,94.682,5.318,7.56,224,0.875,bicubic hrnet_w64,79.474,20.526,94.652,5.348,128.06,224,0.875,bilinear +tf_efficientnetv2_b1,79.462,20.538,94.722,5.278,8.14,240,0.882,bicubic dla102x2,79.448,20.552,94.640,5.360,41.28,224,0.875,bilinear repvgg_b2g4,79.366,20.634,94.688,5.312,61.76,224,0.875,bilinear gluon_resnext50_32x4d,79.354,20.646,94.426,5.574,25.03,224,0.875,bicubic @@ -217,22 +243,25 @@ gluon_inception_v3,78.806,21.194,94.370,5.630,23.83,299,0.875,bicubic efficientnet_b1,78.794,21.206,94.342,5.658,7.79,256,1.000,bicubic repvgg_b2,78.792,21.208,94.414,5.586,89.02,224,0.875,bilinear tf_mixnet_l,78.774,21.226,93.998,6.002,7.33,224,0.875,bicubic -gluon_resnet50_v1s,78.712,21.288,94.238,5.762,25.68,224,0.875,bicubic +gluon_resnet50_v1s,78.710,21.290,94.238,5.762,25.68,224,0.875,bicubic dla169,78.688,21.312,94.336,5.664,53.39,224,0.875,bilinear legacy_seresnet152,78.660,21.340,94.370,5.630,66.82,224,0.875,bilinear tf_efficientnet_b0_ns,78.658,21.342,94.376,5.624,5.29,224,0.875,bicubic res2net50_26w_6s,78.570,21.430,94.124,5.876,37.05,224,0.875,bilinear xception41,78.516,21.484,94.278,5.722,26.97,299,0.903,bicubic dla102x,78.510,21.490,94.228,5.772,26.31,224,0.875,bilinear +levit_128,78.486,21.514,94.010,5.990,9.21,224,0.900,bicubic regnetx_040,78.482,21.518,94.244,5.756,22.12,224,0.875,bicubic resnest26d,78.478,21.522,94.298,5.702,17.07,224,0.875,bilinear dla60_res2net,78.464,21.536,94.206,5.794,20.85,224,0.875,bilinear hrnet_w32,78.450,21.550,94.186,5.814,41.23,224,0.875,bilinear dla60_res2next,78.440,21.560,94.152,5.848,17.03,224,0.875,bilinear +coat_tiny,78.434,21.566,94.038,5.962,5.50,224,0.900,bicubic selecsls60b,78.412,21.588,94.174,5.826,32.77,224,0.875,bicubic cait_xxs24_224,78.386,21.614,94.310,5.690,11.96,224,1.000,bicubic legacy_seresnet101,78.382,21.618,94.264,5.736,49.33,224,0.875,bilinear repvgg_b1,78.366,21.634,94.098,5.902,57.42,224,0.875,bilinear +tf_efficientnetv2_b0,78.356,21.644,94.024,5.976,7.14,224,0.875,bicubic tv_resnet152,78.312,21.688,94.038,5.962,60.19,224,0.875,bilinear dla60x,78.246,21.754,94.018,5.982,17.35,224,0.875,bilinear res2next50,78.246,21.754,93.892,6.108,24.67,224,0.875,bilinear @@ -252,9 +281,9 @@ res2net50_26w_4s,77.964,22.036,93.854,6.146,25.70,224,0.875,bilinear mobilenetv3_large_100_miil,77.916,22.084,92.910,7.090,5.48,224,0.875,bilinear tf_efficientnet_cc_b0_8e,77.908,22.092,93.654,6.346,24.01,224,0.875,bicubic regnety_016,77.862,22.138,93.720,6.280,11.20,224,0.875,bicubic -rexnet_100,77.858,22.142,93.870,6.130,4.80,224,0.875,bicubic +tf_inception_v3,77.858,22.142,93.640,6.360,23.83,299,0.875,bicubic vit_small_patch16_224,77.858,22.142,93.416,6.584,48.75,224,0.900,bicubic -tf_inception_v3,77.856,22.144,93.640,6.360,23.83,299,0.875,bicubic +rexnet_100,77.858,22.142,93.870,6.130,4.80,224,0.875,bicubic hardcorenas_e,77.794,22.206,93.694,6.306,8.07,224,0.875,bilinear efficientnet_b0,77.698,22.302,93.532,6.468,5.29,224,0.875,bicubic legacy_seresnet50,77.630,22.370,93.748,6.252,28.09,224,0.875,bilinear @@ -293,6 +322,7 @@ mixer_b16_224,76.602,23.398,92.228,7.772,59.88,224,0.875,bicubic tf_efficientnet_es,76.594,23.406,93.202,6.798,5.44,224,0.875,bicubic densenetblur121d,76.588,23.412,93.192,6.808,8.00,224,0.875,bicubic hardcorenas_b,76.538,23.462,92.754,7.246,5.18,224,0.875,bilinear +levit_128s,76.530,23.470,92.866,7.134,7.78,224,0.900,bicubic mobilenetv2_140,76.516,23.484,92.996,7.004,6.11,224,0.875,bicubic repvgg_a2,76.460,23.540,93.004,6.996,28.21,224,0.875,bilinear dpn68,76.318,23.682,92.978,7.022,12.61,224,0.875,bicubic @@ -335,13 +365,14 @@ tf_mobilenetv3_large_075,73.438,26.562,91.350,8.650,3.99,224,0.875,bilinear vgg16_bn,73.350,26.650,91.506,8.494,138.37,224,0.875,bilinear tv_resnet34,73.312,26.688,91.426,8.574,21.80,224,0.875,bilinear swsl_resnet18,73.276,26.724,91.734,8.266,11.69,224,0.875,bilinear +convit_tiny,73.116,26.884,91.714,8.286,5.71,224,0.875,bicubic skresnet18,73.038,26.962,91.168,8.832,11.96,224,0.875,bicubic mobilenetv2_100,72.970,27.030,91.016,8.984,3.50,224,0.875,bicubic pit_ti_224,72.912,27.088,91.402,8.598,4.85,224,0.900,bicubic ssl_resnet18,72.610,27.390,91.416,8.584,11.69,224,0.875,bilinear regnetx_004,72.396,27.604,90.830,9.170,5.16,224,0.875,bicubic vgg19,72.368,27.632,90.872,9.128,143.67,224,0.875,bilinear -hrnet_w18_small,72.342,27.658,90.678,9.322,13.19,224,0.875,bilinear +hrnet_w18_small,72.344,27.656,90.678,9.322,13.19,224,0.875,bilinear resnet18d,72.260,27.740,90.696,9.304,11.71,224,0.875,bicubic tf_mobilenetv3_large_minimal_100,72.248,27.752,90.630,9.370,3.92,224,0.875,bilinear vit_deit_tiny_patch16_224,72.168,27.832,91.118,8.882,5.72,224,0.900,bicubic @@ -360,5 +391,5 @@ tf_mobilenetv3_small_100,67.922,32.078,87.664,12.336,2.54,224,0.875,bilinear dla60x_c,67.892,32.108,88.426,11.574,1.32,224,0.875,bilinear dla46x_c,65.970,34.030,86.980,13.020,1.07,224,0.875,bilinear tf_mobilenetv3_small_075,65.716,34.284,86.130,13.870,2.04,224,0.875,bilinear -dla46_c,64.866,35.134,86.292,13.708,1.30,224,0.875,bilinear +dla46_c,64.864,35.136,86.292,13.708,1.30,224,0.875,bilinear tf_mobilenetv3_small_minimal_100,62.906,37.094,84.230,15.770,2.04,224,0.875,bilinear diff --git a/results/results-imagenetv2-matched-frequency.csv b/results/results-imagenetv2-matched-frequency.csv index b8238496..68e64449 100644 --- a/results/results-imagenetv2-matched-frequency.csv +++ b/results/results-imagenetv2-matched-frequency.csv @@ -4,128 +4,151 @@ tf_efficientnet_l2_ns,80.250,19.750,95.840,4.160,480.31,800,0.960,bicubic,-8.102 tf_efficientnet_b7_ns,78.510,21.490,94.380,5.620,66.35,600,0.949,bicubic,-8.330,-3.714,+1 tf_efficientnet_b6_ns,77.280,22.720,93.890,6.110,43.04,528,0.942,bicubic,-9.172,-3.992,+2 swin_large_patch4_window12_384,77.040,22.960,93.750,6.250,196.74,384,1.000,bicubic,-10.108,-4.484,-2 -cait_m48_448,76.870,23.130,93.370,6.630,356.46,448,1.000,bicubic,-9.614,-4.384,-1 -ig_resnext101_32x48d,76.870,23.130,93.310,6.690,828.41,224,0.875,bilinear,-8.558,-4.262,+9 -ig_resnext101_32x32d,76.840,23.160,93.200,6.800,468.53,224,0.875,bilinear,-8.254,-4.238,+16 +tf_efficientnetv2_l_in21ft1k,76.940,23.060,93.950,6.050,118.52,480,1.000,bicubic,-9.364,-4.028,+3 +cait_m48_448,76.870,23.130,93.370,6.630,356.46,448,1.000,bicubic,-9.614,-4.384,-2 +ig_resnext101_32x48d,76.870,23.130,93.310,6.690,828.41,224,0.875,bilinear,-8.558,-4.262,+11 +ig_resnext101_32x32d,76.840,23.160,93.200,6.800,468.53,224,0.875,bilinear,-8.254,-4.238,+18 tf_efficientnet_b5_ns,76.810,23.190,93.580,6.420,30.39,456,0.934,bicubic,-9.278,-4.172,+1 cait_m36_384,76.320,23.680,93.050,6.950,271.22,384,1.000,bicubic,-9.734,-4.680,+1 -swin_base_patch4_window12_384,76.280,23.720,93.320,6.680,87.90,384,1.000,bicubic,-10.152,-4.738,-4 -swin_large_patch4_window7_224,76.270,23.730,93.420,6.580,196.53,224,0.900,bicubic,-10.050,-4.476,-4 -cait_s36_384,76.210,23.790,92.970,7.030,68.37,384,1.000,bicubic,-9.250,-4.510,+2 -dm_nfnet_f6,76.180,23.820,93.220,6.780,438.36,576,0.956,bicubic,-10.116,-4.524,-5 -tf_efficientnet_b7_ap,76.090,23.910,92.970,7.030,66.35,600,0.949,bicubic,-9.030,-4.282,+8 -tf_efficientnet_b8_ap,76.090,23.910,92.730,7.270,87.41,672,0.954,bicubic,-9.280,-4.564,+3 -dm_nfnet_f4,75.750,24.250,92.790,7.210,316.07,512,0.951,bicubic,-9.908,-4.720,-4 -ig_resnext101_32x16d,75.720,24.280,92.910,7.090,194.03,224,0.875,bilinear,-8.450,-4.286,+26 +tf_efficientnetv2_l,76.280,23.720,92.970,7.030,118.52,480,1.000,bicubic,-9.210,-4.402,+4 +swin_base_patch4_window12_384,76.280,23.720,93.320,6.680,87.90,384,1.000,bicubic,-10.152,-4.738,-5 +swin_large_patch4_window7_224,76.270,23.730,93.420,6.580,196.53,224,0.900,bicubic,-10.050,-4.476,-6 +cait_s36_384,76.210,23.790,92.970,7.030,68.37,384,1.000,bicubic,-9.250,-4.510,+3 +dm_nfnet_f6,76.180,23.820,93.220,6.780,438.36,576,0.956,bicubic,-10.116,-4.524,-6 +tf_efficientnet_b7_ap,76.090,23.910,92.970,7.030,66.35,600,0.949,bicubic,-9.030,-4.282,+9 +tf_efficientnet_b8_ap,76.090,23.910,92.730,7.270,87.41,672,0.954,bicubic,-9.280,-4.564,+4 +tf_efficientnetv2_m_in21ft1k,75.920,24.080,93.280,6.720,54.14,480,1.000,bicubic,-9.668,-4.472,-4 +dm_nfnet_f4,75.750,24.250,92.790,7.210,316.07,512,0.951,bicubic,-9.908,-4.720,-6 +ig_resnext101_32x16d,75.720,24.280,92.910,7.090,194.03,224,0.875,bilinear,-8.450,-4.286,+29 tf_efficientnet_b4_ns,75.670,24.330,93.050,6.950,19.34,380,0.922,bicubic,-9.492,-4.420,+2 -vit_base_r50_s16_384,75.590,24.410,92.790,7.210,98.95,384,1.000,bicubic,-9.382,-4.498,+9 +vit_base_r50_s16_384,75.590,24.410,92.790,7.210,98.95,384,1.000,bicubic,-9.382,-4.498,+10 vit_deit_base_distilled_patch16_384,75.550,24.450,92.500,7.500,87.63,384,1.000,bicubic,-9.872,-4.832,-4 -cait_s24_384,75.480,24.520,92.600,7.400,47.06,384,1.000,bicubic,-9.566,-4.746,+3 -swsl_resnext101_32x8d,75.430,24.570,92.760,7.240,88.79,224,0.875,bilinear,-8.854,-4.416,+17 -dm_nfnet_f3,75.410,24.590,92.830,7.170,254.92,416,0.940,bicubic,-10.150,-4.576,-10 -tf_efficientnet_b6_ap,75.380,24.620,92.440,7.560,43.04,528,0.942,bicubic,-9.408,-4.698,+7 -vit_large_patch16_384,75.150,24.850,92.660,7.340,304.72,384,1.000,bicubic,-10.008,-4.696,-4 -ecaresnet269d,75.120,24.880,92.840,7.160,102.09,352,1.000,bicubic,-9.856,-4.386,+1 -tf_efficientnet_b8,74.940,25.060,92.310,7.690,87.41,672,0.954,bicubic,-10.430,-5.080,-10 -dm_nfnet_f5,74.790,25.210,92.460,7.540,377.21,544,0.954,bicubic,-10.924,-4.982,-17 -tf_efficientnet_b7,74.720,25.280,92.220,7.780,66.35,600,0.949,bicubic,-10.216,-4.984,0 -tf_efficientnet_b5_ap,74.600,25.400,91.990,8.010,30.39,456,0.934,bicubic,-9.652,-4.984,+11 -swin_base_patch4_window7_224,74.570,25.430,92.560,7.440,87.77,224,0.900,bicubic,-10.682,-5.002,-12 +tf_efficientnetv2_m,75.520,24.480,92.620,7.380,54.14,480,1.000,bicubic,-9.524,-4.658,+4 +cait_s24_384,75.480,24.520,92.600,7.400,47.06,384,1.000,bicubic,-9.566,-4.746,+2 +swsl_resnext101_32x8d,75.430,24.570,92.760,7.240,88.79,224,0.875,bilinear,-8.854,-4.416,+19 +dm_nfnet_f3,75.410,24.590,92.830,7.170,254.92,416,0.940,bicubic,-10.150,-4.576,-12 +tf_efficientnet_b6_ap,75.380,24.620,92.440,7.560,43.04,528,0.942,bicubic,-9.408,-4.698,+8 +efficientnetv2_rw_m,75.170,24.830,92.570,7.430,53.24,416,1.000,bicubic,-9.638,-4.578,+6 +vit_large_patch16_384,75.150,24.850,92.660,7.340,304.72,384,1.000,bicubic,-10.008,-4.696,-6 +ecaresnet269d,75.120,24.880,92.840,7.160,102.09,352,1.000,bicubic,-9.856,-4.386,0 +tf_efficientnet_b8,74.940,25.060,92.310,7.690,87.41,672,0.954,bicubic,-10.430,-5.080,-12 +dm_nfnet_f5,74.790,25.210,92.460,7.540,377.21,544,0.954,bicubic,-10.924,-4.982,-21 +tf_efficientnet_b7,74.720,25.280,92.220,7.780,66.35,600,0.949,bicubic,-10.216,-4.984,-1 +tf_efficientnet_b5_ap,74.600,25.400,91.990,8.010,30.39,456,0.934,bicubic,-9.652,-4.984,+12 +swin_base_patch4_window7_224,74.570,25.430,92.560,7.440,87.77,224,0.900,bicubic,-10.682,-5.002,-14 seresnet152d,74.510,25.490,92.080,7.920,66.84,320,1.000,bicubic,-9.852,-4.960,+6 -resnest200e,74.480,25.520,91.860,8.140,70.20,320,0.909,bicubic,-9.352,-5.034,+18 -dm_nfnet_f2,74.450,25.550,92.230,7.770,193.78,352,0.920,bicubic,-10.540,-4.914,-8 -dm_nfnet_f1,74.400,25.600,92.350,7.650,132.63,320,0.910,bicubic,-10.204,-4.718,-2 -efficientnet_v2s,74.170,25.830,91.710,8.290,23.94,384,1.000,bicubic,-9.638,-5.014,+17 -resnest269e,74.170,25.830,91.950,8.050,110.93,416,0.928,bicubic,-10.348,-5.036,-3 +resnest200e,74.480,25.520,91.860,8.140,70.20,320,0.909,bicubic,-9.352,-5.034,+20 +dm_nfnet_f2,74.450,25.550,92.230,7.770,193.78,352,0.920,bicubic,-10.540,-4.914,-9 +tf_efficientnetv2_s_in21ft1k,74.450,25.550,92.510,7.490,21.46,384,1.000,bicubic,-9.852,-4.742,+4 +dm_nfnet_f1,74.400,25.600,92.350,7.650,132.63,320,0.910,bicubic,-10.204,-4.718,-3 +efficientnetv2_rw_s,74.170,25.830,91.710,8.290,23.94,384,1.000,bicubic,-9.638,-5.014,+18 +resnest269e,74.170,25.830,91.950,8.050,110.93,416,0.928,bicubic,-10.348,-5.036,-4 cait_xs24_384,74.160,25.840,91.910,8.090,26.67,384,1.000,bicubic,-9.902,-4.978,+9 pit_b_distilled_224,74.160,25.840,91.680,8.320,74.79,224,0.900,bicubic,-9.984,-5.176,+5 -swsl_resnext101_32x4d,74.140,25.860,91.990,8.010,44.18,224,0.875,bilinear,-9.090,-4.770,+24 +swsl_resnext101_32x4d,74.140,25.860,91.990,8.010,44.18,224,0.875,bilinear,-9.090,-4.770,+26 vit_base_patch16_384,74.130,25.870,92.360,7.640,86.86,384,1.000,bicubic,-10.080,-4.858,+1 eca_nfnet_l1,74.060,25.940,92.120,7.880,41.41,320,1.000,bicubic,-9.948,-4.908,+7 vit_base_patch16_224_miil,74.040,25.960,91.700,8.300,86.54,224,0.875,bilinear,-10.228,-5.102,-3 -swsl_resnext101_32x16d,74.020,25.980,92.160,7.840,194.03,224,0.875,bilinear,-9.326,-4.686,+17 -resnetv2_152x4_bitm,74.000,26.000,92.340,7.660,936.53,480,1.000,bilinear,-10.932,-5.096,-15 -resnetrs420,73.920,26.080,91.760,8.240,191.89,416,1.000,bicubic,-11.088,-5.364,-21 -tf_efficientnet_b6,73.900,26.100,91.750,8.250,43.04,528,0.942,bicubic,-10.210,-5.136,-2 -tf_efficientnet_b3_ns,73.890,26.110,91.870,8.130,12.23,300,0.904,bicubic,-10.158,-5.040,0 -resnetrs270,73.710,26.290,91.580,8.420,129.86,352,1.000,bicubic,-10.724,-5.390,-13 -resnet200d,73.680,26.320,91.570,8.430,64.69,320,1.000,bicubic,-10.282,-5.254,0 -ig_resnext101_32x8d,73.650,26.350,92.190,7.810,88.79,224,0.875,bilinear,-9.038,-4.446,+25 -resnetv2_152x2_bitm,73.630,26.370,92.590,7.410,236.34,480,1.000,bilinear,-10.810,-4.856,-17 +swsl_resnext101_32x16d,74.020,25.980,92.160,7.840,194.03,224,0.875,bilinear,-9.326,-4.686,+19 +tf_efficientnetv2_s,74.000,26.000,91.530,8.470,21.46,384,1.000,bicubic,-9.894,-5.168,+5 +resnetv2_152x4_bitm,74.000,26.000,92.340,7.660,936.53,480,1.000,bilinear,-10.932,-5.096,-17 +resnetrs420,73.920,26.080,91.760,8.240,191.89,416,1.000,bicubic,-11.088,-5.364,-24 +tf_efficientnet_b6,73.900,26.100,91.750,8.250,43.04,528,0.942,bicubic,-10.210,-5.136,-3 +tf_efficientnet_b3_ns,73.890,26.110,91.870,8.130,12.23,300,0.904,bicubic,-10.158,-5.040,-1 +resnetrs270,73.710,26.290,91.580,8.420,129.86,352,1.000,bicubic,-10.724,-5.390,-15 +resnet200d,73.680,26.320,91.570,8.430,64.69,320,1.000,bicubic,-10.282,-5.254,-1 +ig_resnext101_32x8d,73.650,26.350,92.190,7.810,88.79,224,0.875,bilinear,-9.038,-4.446,+29 +resnetv2_152x2_bitm,73.630,26.370,92.590,7.410,236.34,480,1.000,bilinear,-10.810,-4.856,-19 tf_efficientnet_b5,73.550,26.450,91.460,8.540,30.39,456,0.934,bicubic,-10.262,-5.288,-1 -resnetv2_101x3_bitm,73.530,26.470,92.570,7.430,387.93,480,1.000,bilinear,-10.864,-4.792,-17 +resnetv2_101x3_bitm,73.530,26.470,92.570,7.430,387.93,480,1.000,bilinear,-10.864,-4.792,-19 resnet152d,73.520,26.480,91.230,8.770,60.21,320,1.000,bicubic,-10.160,-5.508,+2 -resnetrs200,73.500,26.500,91.250,8.750,93.21,320,1.000,bicubic,-10.566,-5.624,-10 -resnetrs350,73.400,26.600,91.310,8.690,163.96,384,1.000,bicubic,-11.320,-5.678,-25 -regnety_160,73.360,26.640,91.690,8.310,83.59,288,1.000,bicubic,-10.326,-5.086,-2 +resnetrs200,73.500,26.500,91.250,8.750,93.21,320,1.000,bicubic,-10.566,-5.624,-11 +resnetrs350,73.400,26.600,91.310,8.690,163.96,384,1.000,bicubic,-11.320,-5.678,-27 +twins_svt_large,73.390,26.610,90.910,9.090,99.27,224,0.900,bicubic,-10.288,-5.684,0 +regnety_160,73.360,26.640,91.690,8.310,83.59,288,1.000,bicubic,-10.326,-5.086,-3 efficientnet_b4,73.320,26.680,91.280,8.720,19.34,384,1.000,bicubic,-10.108,-5.316,0 vit_deit_base_distilled_patch16_224,73.240,26.760,91.000,9.000,87.34,224,0.900,bicubic,-10.148,-5.488,0 -resnetrs152,73.200,26.800,91.260,8.740,86.62,320,1.000,bicubic,-10.512,-5.354,-6 +resnetrs152,73.200,26.800,91.260,8.740,86.62,320,1.000,bicubic,-10.512,-5.354,-7 cait_s24_224,73.070,26.930,91.130,8.870,46.92,224,1.000,bicubic,-10.382,-5.434,-4 tf_efficientnet_b4_ap,72.890,27.110,90.980,9.020,19.34,380,0.922,bicubic,-10.358,-5.412,0 dm_nfnet_f0,72.790,27.210,91.040,8.960,71.49,256,0.900,bicubic,-10.552,-5.520,-2 -regnety_032,72.770,27.230,90.950,9.050,19.44,288,1.000,bicubic,-9.954,-5.474,+10 -nfnet_l0,72.610,27.390,91.010,8.990,35.07,288,1.000,bicubic,-10.150,-5.488,+8 -pnasnet5large,72.610,27.390,90.510,9.490,86.06,331,0.911,bicubic,-10.172,-5.530,+6 -resnest101e,72.570,27.430,90.820,9.180,48.28,256,0.875,bilinear,-10.320,-5.500,+4 -swsl_resnext50_32x4d,72.560,27.440,90.870,9.130,25.03,224,0.875,bilinear,-9.622,-5.360,+18 -tresnet_xl_448,72.550,27.450,90.310,9.690,78.44,448,0.875,bilinear,-10.500,-5.864,-1 +regnety_032,72.770,27.230,90.950,9.050,19.44,288,1.000,bicubic,-9.954,-5.474,+12 +pnasnet5large,72.610,27.390,90.510,9.490,86.06,331,0.911,bicubic,-10.172,-5.530,+8 +nfnet_l0,72.610,27.390,91.010,8.990,35.07,288,1.000,bicubic,-10.150,-5.488,+10 +twins_pcpvt_large,72.580,27.420,90.700,9.300,60.99,224,0.900,bicubic,-10.560,-5.898,-2 +resnest101e,72.570,27.430,90.820,9.180,48.28,256,0.875,bilinear,-10.320,-5.500,+5 +swsl_resnext50_32x4d,72.560,27.440,90.870,9.130,25.03,224,0.875,bilinear,-9.622,-5.360,+25 +tresnet_xl_448,72.550,27.450,90.310,9.690,78.44,448,0.875,bilinear,-10.500,-5.864,0 +twins_svt_base,72.550,27.450,90.460,9.540,56.07,224,0.900,bicubic,-10.586,-5.958,-5 vit_deit_base_patch16_384,72.530,27.470,90.250,9.750,86.86,384,1.000,bicubic,-10.576,-6.122,-5 resnet101d,72.410,27.590,90.650,9.350,44.57,320,1.000,bicubic,-10.612,-5.796,-2 tf_efficientnet_b4,72.290,27.710,90.590,9.410,19.34,380,0.922,bicubic,-10.732,-5.710,-2 -tf_efficientnet_b2_ns,72.280,27.720,91.090,8.910,9.11,260,0.890,bicubic,-10.100,-5.158,+6 +tf_efficientnet_b2_ns,72.280,27.720,91.090,8.910,9.11,260,0.890,bicubic,-10.100,-5.158,+8 tresnet_m,72.270,27.730,90.240,9.760,31.39,224,0.875,bilinear,-10.810,-5.878,-8 vit_large_patch16_224,72.250,27.750,90.990,9.010,304.33,224,0.900,bicubic,-10.812,-5.448,-8 -nasnetalarge,72.230,27.770,90.470,9.530,88.75,331,0.911,bicubic,-10.390,-5.576,0 -cait_xxs36_384,72.190,27.810,90.840,9.160,17.37,384,1.000,bicubic,-10.004,-5.308,+8 -resnetv2_50x3_bitm,72.180,27.820,91.790,8.210,217.32,480,1.000,bilinear,-11.604,-5.316,-25 +nasnetalarge,72.230,27.770,90.470,9.530,88.75,331,0.911,bicubic,-10.390,-5.576,+1 +cait_xxs36_384,72.190,27.810,90.840,9.160,17.37,384,1.000,bicubic,-10.004,-5.308,+14 +resnetv2_50x3_bitm,72.180,27.820,91.790,8.210,217.32,480,1.000,bilinear,-11.604,-5.316,-28 +twins_pcpvt_base,72.180,27.820,90.510,9.490,43.83,224,0.900,bicubic,-10.528,-5.836,-4 eca_nfnet_l0,71.850,28.150,91.130,8.870,24.14,288,1.000,bicubic,-10.738,-5.344,-2 -swin_small_patch4_window7_224,71.740,28.260,90.240,9.760,49.61,224,0.900,bicubic,-11.472,-6.082,-16 -pit_b_224,71.700,28.300,89.250,10.750,73.76,224,0.900,bicubic,-10.746,-6.460,-3 -swsl_resnet50,71.700,28.300,90.500,9.500,25.56,224,0.875,bilinear,-9.466,-5.472,+30 -tresnet_xl,71.660,28.340,89.630,10.370,78.44,224,0.875,bilinear,-10.394,-6.306,+5 -tresnet_l_448,71.600,28.400,90.050,9.950,55.99,448,0.875,bilinear,-10.668,-5.926,-2 -ssl_resnext101_32x8d,71.500,28.500,90.460,9.540,88.79,224,0.875,bilinear,-10.116,-5.578,+13 -ecaresnet101d,71.490,28.510,90.330,9.670,44.57,224,0.875,bicubic,-10.682,-5.716,+1 -efficientnet_b3,71.480,28.520,90.060,9.940,12.23,320,1.000,bicubic,-10.762,-6.054,-4 -ssl_resnext101_32x16d,71.410,28.590,90.560,9.440,194.03,224,0.875,bilinear,-10.434,-5.536,+3 -pit_s_distilled_224,71.380,28.620,89.780,10.220,24.04,224,0.900,bicubic,-10.616,-6.018,+1 -vit_base_patch16_224,71.330,28.670,90.460,9.540,86.57,224,0.900,bicubic,-10.456,-5.662,+3 -ecaresnet50t,71.280,28.720,90.420,9.580,25.57,320,0.950,bicubic,-11.066,-5.718,-11 -vit_base_patch32_384,71.180,28.820,90.630,9.370,88.30,384,1.000,bicubic,-10.472,-5.498,+3 -vit_deit_base_patch16_224,71.170,28.830,89.200,10.800,86.57,224,0.900,bicubic,-10.828,-6.534,-4 -tresnet_m_448,70.990,29.010,88.680,11.320,31.39,448,0.875,bilinear,-10.724,-6.892,0 -resnest50d_4s2x40d,70.950,29.050,89.710,10.290,30.42,224,0.875,bicubic,-10.158,-5.848,+18 -wide_resnet50_2,70.950,29.050,89.230,10.770,68.88,224,0.875,bicubic,-10.506,-6.302,+7 -tnt_s_patch16_224,70.930,29.070,89.600,10.400,23.76,224,0.900,bicubic,-10.588,-6.148,+3 -tf_efficientnet_b3_ap,70.920,29.080,89.430,10.570,12.23,300,0.904,bicubic,-10.902,-6.194,-6 -tf_efficientnet_b1_ns,70.870,29.130,90.120,9.880,7.79,240,0.882,bicubic,-10.518,-5.618,+5 -vit_large_patch32_384,70.860,29.140,90.570,9.430,306.63,384,1.000,bicubic,-10.646,-5.522,+1 -tresnet_l,70.840,29.160,89.630,10.370,55.99,224,0.875,bilinear,-10.650,-5.994,-1 -rexnet_200,70.840,29.160,89.700,10.300,16.37,224,0.875,bicubic,-10.792,-5.968,-5 -resnetrs101,70.840,29.160,89.830,10.170,63.62,288,0.940,bicubic,-11.448,-6.178,-20 -resnetv2_101x1_bitm,70.710,29.290,90.800,9.200,44.54,480,1.000,bilinear,-11.502,-5.672,-20 -tf_efficientnet_b3,70.640,29.360,89.440,10.560,12.23,300,0.904,bicubic,-10.996,-6.278,-9 -cait_xxs24_384,70.600,29.400,89.720,10.280,12.03,384,1.000,bicubic,-10.366,-5.926,+12 -gluon_senet154,70.600,29.400,88.920,11.080,115.09,224,0.875,bicubic,-10.634,-6.428,+3 -ssl_resnext101_32x4d,70.530,29.470,89.760,10.240,44.18,224,0.875,bilinear,-10.394,-5.968,+11 -vit_deit_small_distilled_patch16_224,70.520,29.480,89.470,10.530,22.44,224,0.900,bicubic,-10.680,-5.908,+2 -legacy_senet154,70.500,29.500,89.010,10.990,115.09,224,0.875,bilinear,-10.810,-6.486,-2 -gluon_seresnext101_64x4d,70.430,29.570,89.350,10.650,88.23,224,0.875,bicubic,-10.464,-5.958,+10 -tf_efficientnet_lite4,70.430,29.570,89.110,10.890,13.01,380,0.920,bilinear,-11.106,-6.558,-13 -resnest50d,70.410,29.590,88.760,11.240,27.48,224,0.875,bilinear,-10.564,-6.618,+4 -resnest50d_1s4x24d,70.400,29.600,89.220,10.780,25.68,224,0.875,bicubic,-10.588,-6.102,+2 -seresnext50_32x4d,70.400,29.600,89.110,10.890,27.56,224,0.875,bicubic,-10.866,-6.510,-6 +swin_small_patch4_window7_224,71.740,28.260,90.240,9.760,49.61,224,0.900,bicubic,-11.472,-6.082,-19 +pit_b_224,71.700,28.300,89.250,10.750,73.76,224,0.900,bicubic,-10.746,-6.460,-2 +swsl_resnet50,71.700,28.300,90.500,9.500,25.56,224,0.875,bilinear,-9.466,-5.472,+41 +tresnet_xl,71.660,28.340,89.630,10.370,78.44,224,0.875,bilinear,-10.394,-6.306,+11 +convit_base,71.600,28.400,90.150,9.850,86.54,224,0.875,bicubic,-10.690,-5.788,+1 +tresnet_l_448,71.600,28.400,90.050,9.950,55.99,448,0.875,bilinear,-10.668,-5.926,+2 +ssl_resnext101_32x8d,71.500,28.500,90.460,9.540,88.79,224,0.875,bilinear,-10.116,-5.578,+20 +ecaresnet101d,71.490,28.510,90.330,9.670,44.57,224,0.875,bicubic,-10.682,-5.716,+5 +efficientnet_b3,71.480,28.520,90.060,9.940,12.23,320,1.000,bicubic,-10.762,-6.054,0 +resnet51q,71.430,28.570,90.180,9.820,35.70,288,1.000,bilinear,-10.930,-6.000,-8 +ssl_resnext101_32x16d,71.410,28.590,90.560,9.440,194.03,224,0.875,bilinear,-10.434,-5.536,+8 +pit_s_distilled_224,71.380,28.620,89.780,10.220,24.04,224,0.900,bicubic,-10.616,-6.018,+5 +vit_base_patch16_224,71.330,28.670,90.460,9.540,86.57,224,0.900,bicubic,-10.456,-5.662,+8 +mixer_b16_224_miil,71.300,28.700,89.650,10.350,59.88,224,0.875,bilinear,-11.008,-6.066,-9 +ecaresnet50t,71.280,28.720,90.420,9.580,25.57,320,0.950,bicubic,-11.066,-5.718,-12 +vit_base_patch32_384,71.180,28.820,90.630,9.370,88.30,384,1.000,bicubic,-10.472,-5.498,+8 +vit_deit_base_patch16_224,71.170,28.830,89.200,10.800,86.57,224,0.900,bicubic,-10.828,-6.534,-1 +visformer_small,71.010,28.990,89.460,10.540,40.22,224,0.900,bicubic,-11.096,-6.412,-4 +tresnet_m_448,70.990,29.010,88.680,11.320,31.39,448,0.875,bilinear,-10.724,-6.892,+3 +wide_resnet50_2,70.950,29.050,89.230,10.770,68.88,224,0.875,bicubic,-10.506,-6.302,+12 +resnest50d_4s2x40d,70.950,29.050,89.710,10.290,30.42,224,0.875,bicubic,-10.158,-5.848,+25 +tnt_s_patch16_224,70.930,29.070,89.600,10.400,23.76,224,0.900,bicubic,-10.588,-6.148,+7 +tf_efficientnet_b3_ap,70.920,29.080,89.430,10.570,12.23,300,0.904,bicubic,-10.902,-6.194,-3 +tf_efficientnet_b1_ns,70.870,29.130,90.120,9.880,7.79,240,0.882,bicubic,-10.518,-5.618,+11 +vit_large_patch32_384,70.860,29.140,90.570,9.430,306.63,384,1.000,bicubic,-10.646,-5.522,+6 +resnetrs101,70.840,29.160,89.830,10.170,63.62,288,0.940,bicubic,-11.448,-6.178,-19 +rexnet_200,70.840,29.160,89.700,10.300,16.37,224,0.875,bicubic,-10.792,-5.968,-1 +tresnet_l,70.840,29.160,89.630,10.370,55.99,224,0.875,bilinear,-10.650,-5.994,+4 +tf_efficientnetv2_b3,70.830,29.170,89.500,10.500,14.36,300,0.904,bicubic,-11.140,-6.282,-11 +coat_lite_small,70.800,29.200,89.570,10.430,19.84,224,0.900,bicubic,-11.508,-6.280,-26 +levit_384,70.750,29.250,89.300,10.700,39.13,224,0.900,bicubic,-11.836,-6.716,-32 +resnetv2_101x1_bitm,70.710,29.290,90.800,9.200,44.54,480,1.000,bilinear,-11.502,-5.672,-22 +tf_efficientnet_b3,70.640,29.360,89.440,10.560,12.23,300,0.904,bicubic,-10.996,-6.278,-8 +gluon_senet154,70.600,29.400,88.920,11.080,115.09,224,0.875,bicubic,-10.634,-6.428,+7 +cait_xxs24_384,70.600,29.400,89.720,10.280,12.03,384,1.000,bicubic,-10.366,-5.926,+17 +convit_small,70.580,29.420,89.580,10.420,27.78,224,0.875,bicubic,-10.846,-6.164,-2 +twins_pcpvt_small,70.550,29.450,89.070,10.930,24.11,224,0.900,bicubic,-10.538,-6.572,+10 +ssl_resnext101_32x4d,70.530,29.470,89.760,10.240,44.18,224,0.875,bilinear,-10.394,-5.968,+14 +vit_deit_small_distilled_patch16_224,70.520,29.480,89.470,10.530,22.44,224,0.900,bicubic,-10.680,-5.908,+4 +legacy_senet154,70.500,29.500,89.010,10.990,115.09,224,0.875,bilinear,-10.810,-6.486,-1 +twins_svt_small,70.440,29.560,89.360,10.640,24.06,224,0.900,bicubic,-11.242,-6.310,-18 +gluon_seresnext101_64x4d,70.430,29.570,89.350,10.650,88.23,224,0.875,bicubic,-10.464,-5.958,+12 +tf_efficientnet_lite4,70.430,29.570,89.110,10.890,13.01,380,0.920,bilinear,-11.106,-6.558,-15 +resnest50d,70.410,29.590,88.760,11.240,27.48,224,0.875,bilinear,-10.564,-6.618,+6 +seresnext50_32x4d,70.400,29.600,89.110,10.890,27.56,224,0.875,bicubic,-10.866,-6.510,-5 +resnest50d_1s4x24d,70.400,29.600,89.220,10.780,25.68,224,0.875,bicubic,-10.588,-6.102,+4 gernet_l,70.350,29.650,88.980,11.020,31.08,256,0.875,bilinear,-11.004,-6.556,-10 -gluon_resnet152_v1s,70.290,29.710,88.850,11.150,60.32,224,0.875,bicubic,-10.726,-6.562,-2 -repvgg_b3,70.250,29.750,88.730,11.270,123.09,224,0.875,bilinear,-10.242,-6.530,+13 -ecaresnet101d_pruned,70.130,29.870,89.590,10.410,24.88,224,0.875,bicubic,-10.688,-6.038,+4 -efficientnet_el,70.120,29.880,89.290,10.710,10.59,300,0.904,bicubic,-11.196,-6.236,-13 -inception_resnet_v2,70.120,29.880,88.700,11.300,55.84,299,0.897,bicubic,-10.338,-6.606,+14 -gluon_seresnext101_32x4d,70.010,29.990,88.900,11.100,48.96,224,0.875,bicubic,-10.894,-6.394,-2 -regnety_320,70.000,30.000,88.890,11.110,145.05,224,0.875,bicubic,-10.812,-6.354,+1 +gluon_resnet152_v1s,70.290,29.710,88.850,11.150,60.32,224,0.875,bicubic,-10.726,-6.562,0 +repvgg_b3,70.250,29.750,88.730,11.270,123.09,224,0.875,bilinear,-10.242,-6.530,+15 +coat_mini,70.220,29.780,89.440,10.560,10.34,224,0.900,bicubic,-11.048,-5.952,-10 +ecaresnet101d_pruned,70.130,29.870,89.590,10.410,24.88,224,0.875,bicubic,-10.688,-6.038,+5 +efficientnet_el,70.120,29.880,89.290,10.710,10.59,300,0.904,bicubic,-11.196,-6.236,-14 +inception_resnet_v2,70.120,29.880,88.700,11.300,55.84,299,0.897,bicubic,-10.338,-6.606,+15 +gluon_seresnext101_32x4d,70.010,29.990,88.900,11.100,48.96,224,0.875,bicubic,-10.894,-6.394,-1 +regnety_320,70.000,30.000,88.890,11.110,145.05,224,0.875,bicubic,-10.812,-6.354,+2 +levit_256,69.970,30.030,89.250,10.750,18.89,224,0.900,bicubic,-11.540,-6.240,-26 gluon_resnet152_v1d,69.960,30.040,88.490,11.510,60.21,224,0.875,bicubic,-10.514,-6.716,+9 -pit_s_224,69.890,30.110,88.930,11.070,23.46,224,0.900,bicubic,-11.204,-6.402,-11 +pit_s_224,69.890,30.110,88.930,11.070,23.46,224,0.900,bicubic,-11.204,-6.402,-12 ecaresnet50d,69.840,30.160,89.400,10.600,25.58,224,0.875,bicubic,-10.752,-5.920,+3 ssl_resnext50_32x4d,69.710,30.290,89.440,10.560,25.03,224,0.875,bilinear,-10.608,-5.966,+12 gluon_resnext101_64x4d,69.680,30.320,88.270,11.730,83.46,224,0.875,bicubic,-10.924,-6.718,0 @@ -133,62 +156,65 @@ efficientnet_b3_pruned,69.580,30.420,88.980,11.020,9.86,300,0.904,bicubic,-11.27 nf_resnet50,69.580,30.420,88.730,11.270,25.56,288,0.940,bicubic,-11.114,-6.626,-4 gernet_m,69.530,30.470,88.690,11.310,21.14,224,0.875,bilinear,-11.202,-6.494,-6 efficientnet_el_pruned,69.520,30.480,88.930,11.070,10.59,300,0.904,bicubic,-10.780,-6.288,+10 -ens_adv_inception_resnet_v2,69.520,30.480,88.510,11.490,55.84,299,0.897,bicubic,-10.462,-6.426,+24 -repvgg_b3g4,69.520,30.480,88.450,11.550,83.83,224,0.875,bilinear,-10.692,-6.660,+14 +ens_adv_inception_resnet_v2,69.520,30.480,88.510,11.490,55.84,299,0.897,bicubic,-10.462,-6.426,+25 +repvgg_b3g4,69.520,30.480,88.450,11.550,83.83,224,0.875,bilinear,-10.694,-6.660,+14 efficientnet_b2,69.500,30.500,88.680,11.320,9.11,288,1.000,bicubic,-11.112,-6.638,-8 rexnet_150,69.470,30.530,88.980,11.020,9.73,224,0.875,bicubic,-10.840,-6.186,+4 -swin_tiny_patch4_window7_224,69.450,30.550,89.020,10.980,28.29,224,0.900,bicubic,-11.928,-6.520,-32 +swin_tiny_patch4_window7_224,69.450,30.550,89.020,10.980,28.29,224,0.900,bicubic,-11.928,-6.520,-34 regnetx_320,69.440,30.560,88.270,11.730,107.81,224,0.875,bicubic,-10.806,-6.756,+8 -inception_v4,69.360,30.640,88.780,11.220,42.68,299,0.875,bicubic,-10.808,-6.188,+11 +inception_v4,69.360,30.640,88.780,11.220,42.68,299,0.875,bicubic,-10.808,-6.188,+12 legacy_seresnext101_32x4d,69.360,30.640,88.070,11.930,48.96,224,0.875,bilinear,-10.868,-6.948,+7 ecaresnetlight,69.340,30.660,89.220,10.780,30.16,224,0.875,bicubic,-11.122,-6.030,-7 resnet50d,69.330,30.670,88.220,11.780,25.58,224,0.875,bicubic,-11.200,-6.940,-12 -xception71,69.320,30.680,88.260,11.740,42.34,299,0.903,bicubic,-10.554,-6.662,+20 -gluon_xception65,69.160,30.840,88.090,11.910,39.92,299,0.903,bicubic,-10.556,-6.770,+29 -gluon_resnet152_v1c,69.140,30.860,87.870,12.130,60.21,224,0.875,bicubic,-10.770,-6.970,+15 +xception71,69.320,30.680,88.260,11.740,42.34,299,0.903,bicubic,-10.554,-6.662,+21 +gluon_xception65,69.160,30.840,88.090,11.910,39.92,299,0.903,bicubic,-10.556,-6.770,+31 +gluon_resnet152_v1c,69.140,30.860,87.870,12.130,60.21,224,0.875,bicubic,-10.770,-6.970,+16 mixnet_xl,69.100,30.900,88.310,11.690,11.90,224,0.875,bicubic,-11.376,-6.626,-14 -gluon_resnet101_v1d,69.010,30.990,88.100,11.900,44.57,224,0.875,bicubic,-11.404,-6.914,-11 -repvgg_b2g4,69.000,31.000,88.360,11.640,61.76,224,0.875,bilinear,-10.366,-6.328,+36 -seresnet50,68.980,31.020,88.710,11.290,28.09,224,0.875,bicubic,-11.294,-6.360,-5 -xception65,68.980,31.020,88.480,11.520,39.92,299,0.903,bicubic,-10.572,-6.174,+29 -gluon_resnext101_32x4d,68.960,31.040,88.360,11.640,44.18,224,0.875,bicubic,-11.374,-6.566,-13 -tf_efficientnet_b2_ap,68.920,31.080,88.350,11.650,9.11,260,0.890,bicubic,-11.380,-6.678,-9 +tf_efficientnetv2_b2,69.090,30.910,88.220,11.780,10.10,260,0.890,bicubic,-11.118,-6.822,+2 +gluon_resnet101_v1d,69.010,30.990,88.100,11.900,44.57,224,0.875,bicubic,-11.404,-6.914,-12 +repvgg_b2g4,69.000,31.000,88.360,11.640,61.76,224,0.875,bilinear,-10.366,-6.328,+38 +seresnet50,68.980,31.020,88.710,11.290,28.09,224,0.875,bicubic,-11.294,-6.360,-6 +xception65,68.980,31.020,88.480,11.520,39.92,299,0.903,bicubic,-10.572,-6.174,+30 +gluon_resnext101_32x4d,68.960,31.040,88.360,11.640,44.18,224,0.875,bicubic,-11.374,-6.566,-14 +tf_efficientnet_b2_ap,68.920,31.080,88.350,11.650,9.11,260,0.890,bicubic,-11.380,-6.678,-10 cspdarknet53,68.890,31.110,88.600,11.400,27.64,256,0.887,bilinear,-11.168,-6.484,+1 -regnety_120,68.850,31.150,88.330,11.670,51.82,224,0.875,bicubic,-11.516,-6.796,-17 -gluon_resnet152_v1b,68.820,31.180,87.710,12.290,60.19,224,0.875,bicubic,-10.866,-7.026,+19 -dpn131,68.770,31.230,87.470,12.530,79.25,224,0.875,bicubic,-11.052,-7.240,+11 +regnety_120,68.850,31.150,88.330,11.670,51.82,224,0.875,bicubic,-11.516,-6.796,-18 +gluon_resnet152_v1b,68.820,31.180,87.710,12.290,60.19,224,0.875,bicubic,-10.866,-7.026,+20 +dpn131,68.770,31.230,87.470,12.530,79.25,224,0.875,bicubic,-11.052,-7.240,+12 cspresnext50,68.760,31.240,87.950,12.050,20.57,224,0.875,bilinear,-11.280,-6.994,-2 tf_efficientnet_b2,68.750,31.250,87.990,12.010,9.11,260,0.890,bicubic,-11.336,-6.918,-5 -resnext50d_32x4d,68.740,31.260,88.300,11.700,25.05,224,0.875,bicubic,-10.936,-6.566,+16 +resnext50d_32x4d,68.740,31.260,88.300,11.700,25.05,224,0.875,bicubic,-10.936,-6.566,+17 vit_deit_small_patch16_224,68.720,31.280,88.200,11.800,22.05,224,0.900,bicubic,-11.136,-6.852,+5 -gluon_resnet101_v1s,68.710,31.290,87.910,12.090,44.67,224,0.875,bicubic,-11.592,-7.250,-20 +gluon_resnet101_v1s,68.710,31.290,87.910,12.090,44.67,224,0.875,bicubic,-11.592,-7.250,-21 regnety_080,68.700,31.300,87.970,12.030,39.18,224,0.875,bicubic,-11.176,-6.860,0 dpn107,68.690,31.310,88.130,11.870,86.92,224,0.875,bicubic,-11.466,-6.780,-12 gluon_seresnext50_32x4d,68.670,31.330,88.310,11.690,27.56,224,0.875,bicubic,-11.248,-6.512,-6 -hrnet_w64,68.640,31.360,88.050,11.950,128.06,224,0.875,bilinear,-10.834,-6.602,+17 -resnext50_32x4d,68.640,31.360,87.570,12.430,25.03,224,0.875,bicubic,-11.128,-7.028,+3 -dpn98,68.590,31.410,87.680,12.320,61.57,224,0.875,bicubic,-11.052,-6.918,+9 +hrnet_w64,68.640,31.360,88.050,11.950,128.06,224,0.875,bilinear,-10.834,-6.602,+18 +resnext50_32x4d,68.640,31.360,87.570,12.430,25.03,224,0.875,bicubic,-11.128,-7.028,+4 +dpn98,68.590,31.410,87.680,12.320,61.57,224,0.875,bicubic,-11.052,-6.918,+10 regnetx_160,68.530,31.470,88.450,11.550,54.28,224,0.875,bicubic,-11.326,-6.380,-4 -cspresnet50,68.460,31.540,88.010,11.990,21.62,256,0.887,bilinear,-11.114,-6.702,+9 -rexnet_130,68.450,31.550,88.040,11.960,7.56,224,0.875,bicubic,-11.050,-6.642,+11 -ecaresnet50d_pruned,68.420,31.580,88.370,11.630,19.94,224,0.875,bicubic,-11.296,-6.510,+1 -regnety_064,68.420,31.580,88.080,11.920,30.58,224,0.875,bicubic,-11.302,-6.688,-1 -tf_efficientnet_el,68.420,31.580,88.210,11.790,10.59,300,0.904,bicubic,-11.830,-6.918,-28 -cait_xxs36_224,68.410,31.590,88.630,11.370,17.30,224,1.000,bicubic,-11.340,-6.236,-4 -ssl_resnet50,68.410,31.590,88.560,11.440,25.56,224,0.875,bilinear,-10.812,-6.272,+21 +cspresnet50,68.460,31.540,88.010,11.990,21.62,256,0.887,bilinear,-11.114,-6.702,+10 +rexnet_130,68.450,31.550,88.040,11.960,7.56,224,0.875,bicubic,-11.050,-6.642,+12 +tf_efficientnet_el,68.420,31.580,88.210,11.790,10.59,300,0.904,bicubic,-11.830,-6.918,-29 +regnety_064,68.420,31.580,88.080,11.920,30.58,224,0.875,bicubic,-11.302,-6.688,0 +ecaresnet50d_pruned,68.420,31.580,88.370,11.630,19.94,224,0.875,bicubic,-11.296,-6.510,+2 +ssl_resnet50,68.410,31.590,88.560,11.440,25.56,224,0.875,bilinear,-10.812,-6.272,+23 +cait_xxs36_224,68.410,31.590,88.630,11.370,17.30,224,1.000,bicubic,-11.340,-6.236,-3 skresnext50_32x4d,68.350,31.650,87.570,12.430,27.48,224,0.875,bicubic,-11.806,-7.072,-24 -dla102x2,68.330,31.670,87.890,12.110,41.28,224,0.875,bilinear,-11.118,-6.750,+6 +dla102x2,68.330,31.670,87.890,12.110,41.28,224,0.875,bilinear,-11.118,-6.750,+8 efficientnet_b2_pruned,68.320,31.680,88.100,11.900,8.31,260,0.890,bicubic,-11.596,-6.756,-19 -gluon_resnext50_32x4d,68.310,31.690,87.300,12.700,25.03,224,0.875,bicubic,-11.044,-7.126,+6 -tf_efficientnet_lite3,68.230,31.770,87.740,12.260,8.20,300,0.904,bilinear,-11.590,-7.174,-13 +gluon_resnext50_32x4d,68.310,31.690,87.300,12.700,25.03,224,0.875,bicubic,-11.044,-7.126,+8 ecaresnet26t,68.230,31.770,88.790,11.210,16.01,320,0.950,bicubic,-11.624,-6.294,-14 -ese_vovnet39b,68.210,31.790,88.250,11.750,24.57,224,0.875,bicubic,-11.110,-6.462,+4 -regnetx_120,68.150,31.850,87.660,12.340,46.11,224,0.875,bicubic,-11.446,-7.078,-6 +tf_efficientnet_lite3,68.230,31.770,87.740,12.260,8.20,300,0.904,bilinear,-11.590,-7.174,-12 +ese_vovnet39b,68.210,31.790,88.250,11.750,24.57,224,0.875,bicubic,-11.110,-6.462,+6 +regnetx_120,68.150,31.850,87.660,12.340,46.11,224,0.875,bicubic,-11.446,-7.078,-5 resnetrs50,68.030,31.970,87.710,12.290,35.69,224,0.910,bicubic,-11.862,-7.258,-23 -pit_xs_distilled_224,68.020,31.980,87.720,12.280,11.00,224,0.900,bicubic,-11.286,-6.644,+6 +pit_xs_distilled_224,68.020,31.980,87.720,12.280,11.00,224,0.900,bicubic,-11.286,-6.644,+8 dpn92,67.990,32.010,87.580,12.420,37.67,224,0.875,bicubic,-12.018,-7.256,-30 -nf_regnet_b1,67.980,32.020,88.180,11.820,10.22,288,0.900,bicubic,-11.326,-6.568,+3 -gluon_resnet50_v1d,67.940,32.060,87.130,12.870,25.58,224,0.875,bicubic,-11.134,-7.340,+16 +nf_regnet_b1,67.980,32.020,88.180,11.820,10.22,288,0.900,bicubic,-11.326,-6.568,+5 +gluon_resnet50_v1d,67.940,32.060,87.130,12.870,25.58,224,0.875,bicubic,-11.134,-7.340,+18 +levit_192,67.900,32.100,87.890,12.110,10.95,224,0.900,bicubic,-11.942,-6.896,-22 +tf_efficientnetv2_b1,67.890,32.110,87.800,12.200,8.14,240,0.882,bicubic,-11.572,-6.922,-6 regnetx_080,67.880,32.120,86.990,13.010,39.57,224,0.875,bicubic,-11.314,-7.570,+12 resnext101_32x8d,67.860,32.140,87.490,12.510,88.79,224,0.875,bilinear,-11.448,-7.028,-3 efficientnet_em,67.840,32.160,88.120,11.880,6.90,240,0.882,bicubic,-11.412,-6.674,+4 @@ -202,12 +228,12 @@ xception,67.650,32.350,87.570,12.430,22.86,299,0.897,bicubic,-11.402,-6.822,+8 dpn68b,67.630,32.370,87.660,12.340,12.61,224,0.875,bicubic,-11.586,-6.754,-1 dla169,67.610,32.390,87.590,12.410,53.39,224,0.875,bilinear,-11.078,-6.746,+18 gluon_inception_v3,67.590,32.410,87.470,12.530,23.83,299,0.875,bicubic,-11.216,-6.900,+12 -gluon_resnet101_v1c,67.580,32.420,87.180,12.820,44.57,224,0.875,bicubic,-11.954,-7.398,-22 +gluon_resnet101_v1c,67.580,32.420,87.180,12.820,44.57,224,0.875,bicubic,-11.954,-7.398,-23 regnety_040,67.580,32.420,87.510,12.490,20.65,224,0.875,bicubic,-11.640,-7.146,-6 res2net50_26w_8s,67.570,32.430,87.280,12.720,48.40,224,0.875,bilinear,-11.628,-7.088,-5 hrnet_w40,67.560,32.440,87.140,12.860,57.56,224,0.875,bilinear,-11.360,-7.330,+4 legacy_seresnet152,67.520,32.480,87.390,12.610,66.82,224,0.875,bilinear,-11.140,-6.980,+13 -resnetv2_50x1_bitm,67.520,32.480,89.250,10.750,25.55,480,1.000,bilinear,-12.652,-6.376,-58 +resnetv2_50x1_bitm,67.520,32.480,89.250,10.750,25.55,480,1.000,bilinear,-12.652,-6.376,-60 tf_efficientnet_b1_ap,67.520,32.480,87.760,12.240,7.79,240,0.882,bicubic,-11.760,-6.546,-14 efficientnet_b1,67.470,32.530,87.510,12.490,7.79,256,1.000,bicubic,-11.324,-6.832,+5 gluon_resnet101_v1b,67.460,32.540,87.240,12.760,44.55,224,0.875,bicubic,-11.846,-7.284,-21 @@ -215,37 +241,40 @@ tf_efficientnet_cc_b1_8e,67.450,32.550,87.310,12.690,39.72,240,0.882,bicubic,-11 res2net101_26w_4s,67.440,32.560,87.010,12.990,45.21,224,0.875,bilinear,-11.758,-7.422,-12 resnet50,67.440,32.560,87.420,12.580,25.56,224,0.875,bicubic,-11.598,-6.970,-6 resnetblur50,67.430,32.570,87.440,12.560,25.56,224,0.875,bicubic,-11.856,-7.198,-21 -cait_xxs24_224,67.330,32.670,87.510,12.490,11.96,224,1.000,bicubic,-11.056,-6.800,+15 -regnetx_032,67.290,32.710,87.000,13.000,15.30,224,0.875,bicubic,-10.882,-7.088,+23 -xception41,67.250,32.750,87.200,12.800,26.97,299,0.903,bicubic,-11.266,-7.078,+5 +cait_xxs24_224,67.330,32.670,87.510,12.490,11.96,224,1.000,bicubic,-11.056,-6.800,+17 +regnetx_032,67.290,32.710,87.000,13.000,15.30,224,0.875,bicubic,-10.882,-7.088,+26 +coat_tiny,67.250,32.750,87.340,12.660,5.50,224,0.900,bicubic,-11.184,-6.698,+13 +xception41,67.250,32.750,87.200,12.800,26.97,299,0.903,bicubic,-11.266,-7.078,+4 resnest26d,67.200,32.800,87.170,12.830,17.07,224,0.875,bilinear,-11.278,-7.128,+7 -legacy_seresnet101,67.160,32.840,87.060,12.940,49.33,224,0.875,bilinear,-11.222,-7.204,+12 -repvgg_b2,67.160,32.840,87.330,12.670,89.02,224,0.875,bilinear,-11.632,-7.084,-5 -dla60x,67.100,32.900,87.190,12.810,17.35,224,0.875,bilinear,-11.146,-6.828,+13 -gluon_resnet50_v1s,67.060,32.940,86.860,13.140,25.68,224,0.875,bicubic,-11.652,-7.378,-5 -tv_resnet152,67.050,32.950,87.550,12.450,60.19,224,0.875,bilinear,-11.262,-6.488,+10 +repvgg_b2,67.160,32.840,87.330,12.670,89.02,224,0.875,bilinear,-11.632,-7.084,-6 +legacy_seresnet101,67.160,32.840,87.060,12.940,49.33,224,0.875,bilinear,-11.222,-7.204,+13 +dla60x,67.100,32.900,87.190,12.810,17.35,224,0.875,bilinear,-11.146,-6.828,+15 +gluon_resnet50_v1s,67.060,32.940,86.860,13.140,25.68,224,0.875,bicubic,-11.650,-7.378,-6 +tv_resnet152,67.050,32.950,87.550,12.450,60.19,224,0.875,bilinear,-11.262,-6.488,+12 dla60_res2net,67.020,32.980,87.160,12.840,20.85,224,0.875,bilinear,-11.444,-7.046,+2 -dla102x,67.010,32.990,86.770,13.230,26.31,224,0.875,bilinear,-11.500,-7.458,-2 -mixnet_l,66.940,33.060,86.910,13.090,7.33,224,0.875,bicubic,-12.036,-7.272,-18 -pit_xs_224,66.920,33.080,87.280,12.720,10.62,224,0.900,bicubic,-11.262,-6.888,+11 -res2net50_26w_6s,66.910,33.090,86.860,13.140,37.05,224,0.875,bilinear,-11.660,-7.264,-7 -repvgg_b1,66.900,33.100,86.780,13.220,57.42,224,0.875,bilinear,-11.466,-7.318,+3 -efficientnet_es,66.880,33.120,86.730,13.270,5.44,224,0.875,bicubic,-11.186,-7.196,+13 -tf_efficientnet_b1,66.880,33.120,87.010,12.990,7.79,240,0.882,bicubic,-11.946,-7.188,-19 +dla102x,67.010,32.990,86.770,13.230,26.31,224,0.875,bilinear,-11.500,-7.458,-3 +mixnet_l,66.940,33.060,86.910,13.090,7.33,224,0.875,bicubic,-12.036,-7.272,-19 +pit_xs_224,66.920,33.080,87.280,12.720,10.62,224,0.900,bicubic,-11.262,-6.888,+13 +res2net50_26w_6s,66.910,33.090,86.860,13.140,37.05,224,0.875,bilinear,-11.660,-7.264,-8 +repvgg_b1,66.900,33.100,86.780,13.220,57.42,224,0.875,bilinear,-11.466,-7.318,+4 +efficientnet_es,66.880,33.120,86.730,13.270,5.44,224,0.875,bicubic,-11.186,-7.196,+15 +tf_efficientnet_b1,66.880,33.120,87.010,12.990,7.79,240,0.882,bicubic,-11.946,-7.188,-20 regnetx_040,66.840,33.160,86.730,13.270,22.12,224,0.875,bicubic,-11.642,-7.514,-8 -hrnet_w30,66.780,33.220,86.800,13.200,37.71,224,0.875,bilinear,-11.426,-7.422,+4 -tf_mixnet_l,66.780,33.220,86.470,13.530,7.33,224,0.875,bicubic,-11.994,-7.528,-18 -selecsls60b,66.760,33.240,86.530,13.470,32.77,224,0.875,bicubic,-11.652,-7.644,-6 +hrnet_w30,66.780,33.220,86.800,13.200,37.71,224,0.875,bilinear,-11.426,-7.422,+6 +tf_mixnet_l,66.780,33.220,86.470,13.530,7.33,224,0.875,bicubic,-11.994,-7.528,-19 +selecsls60b,66.760,33.240,86.530,13.470,32.77,224,0.875,bicubic,-11.652,-7.644,-5 hrnet_w32,66.750,33.250,87.300,12.700,41.23,224,0.875,bilinear,-11.700,-6.886,-9 -wide_resnet101_2,66.730,33.270,87.030,12.970,126.89,224,0.875,bilinear,-12.126,-7.252,-26 -adv_inception_v3,66.650,33.350,86.540,13.460,23.83,299,0.875,bicubic,-10.932,-7.196,+23 -dla60_res2next,66.640,33.360,87.030,12.970,17.03,224,0.875,bilinear,-11.800,-7.122,-11 -gluon_resnet50_v1c,66.560,33.440,86.180,13.820,25.58,224,0.875,bicubic,-11.452,-7.808,+5 +wide_resnet101_2,66.730,33.270,87.030,12.970,126.89,224,0.875,bilinear,-12.126,-7.252,-27 +tf_efficientnetv2_b0,66.700,33.300,86.710,13.290,7.14,224,0.875,bicubic,-11.656,-7.314,-4 +adv_inception_v3,66.650,33.350,86.540,13.460,23.83,299,0.875,bicubic,-10.932,-7.196,+24 +dla60_res2next,66.640,33.360,87.030,12.970,17.03,224,0.875,bilinear,-11.800,-7.122,-12 +gluon_resnet50_v1c,66.560,33.440,86.180,13.820,25.58,224,0.875,bicubic,-11.452,-7.808,+6 +levit_128,66.550,33.450,86.750,13.250,9.21,224,0.900,bicubic,-11.936,-7.260,-19 dla102,66.540,33.460,86.910,13.090,33.27,224,0.875,bilinear,-11.492,-7.036,+3 -tf_inception_v3,66.410,33.590,86.660,13.340,23.83,299,0.875,bicubic,-11.446,-6.980,+12 +tf_inception_v3,66.410,33.590,86.660,13.340,23.83,299,0.875,bicubic,-11.448,-7.210,+10 hardcorenas_f,66.370,33.630,86.200,13.800,8.20,224,0.875,bilinear,-11.734,-7.602,-1 -coat_lite_tiny,66.290,33.710,86.980,13.020,5.72,224,0.900,bicubic,-11.222,-6.936,+20 efficientnet_b0,66.290,33.710,85.960,14.040,5.29,224,0.875,bicubic,-11.408,-7.572,+11 +coat_lite_tiny,66.290,33.710,86.980,13.020,5.72,224,0.900,bicubic,-11.222,-6.936,+20 legacy_seresnet50,66.250,33.750,86.330,13.670,28.09,224,0.875,bilinear,-11.380,-7.418,+11 selecsls60,66.210,33.790,86.340,13.660,30.67,224,0.875,bicubic,-11.772,-7.488,0 tf_efficientnet_em,66.180,33.820,86.360,13.640,6.90,240,0.882,bicubic,-11.950,-7.684,-7 @@ -254,19 +283,19 @@ tf_efficientnet_cc_b0_8e,66.170,33.830,86.240,13.760,24.01,224,0.875,bicubic,-11 inception_v3,66.160,33.840,86.320,13.680,23.83,299,0.875,bicubic,-11.278,-7.156,+15 res2net50_26w_4s,66.140,33.860,86.600,13.400,25.70,224,0.875,bilinear,-11.824,-7.254,-4 efficientnet_b1_pruned,66.090,33.910,86.570,13.430,6.33,240,0.882,bicubic,-12.146,-7.264,-17 +rexnet_100,66.070,33.930,86.490,13.510,4.80,224,0.875,bicubic,-11.788,-6.926,-1 gluon_resnet50_v1b,66.070,33.930,86.260,13.740,25.56,224,0.875,bicubic,-11.510,-7.456,+8 -rexnet_100,66.070,33.930,86.490,13.510,4.80,224,0.875,bicubic,-11.788,-7.380,-3 regnety_016,66.060,33.940,86.380,13.620,11.20,224,0.875,bicubic,-11.802,-7.340,-5 res2net50_14w_8s,66.020,33.980,86.250,13.750,25.06,224,0.875,bilinear,-12.130,-7.598,-17 seresnext26t_32x4d,65.880,34.120,85.680,14.320,16.81,224,0.875,bicubic,-12.106,-8.066,-12 repvgg_b1g4,65.850,34.150,86.120,13.880,39.97,224,0.875,bilinear,-11.744,-7.706,+1 res2next50,65.850,34.150,85.840,14.160,24.67,224,0.875,bilinear,-12.396,-8.052,-25 -hardcorenas_e,65.840,34.160,85.980,14.020,8.07,224,0.875,bilinear,-11.954,-7.714,-7 densenet161,65.840,34.160,86.450,13.550,28.68,224,0.875,bicubic,-11.518,-7.188,+8 +hardcorenas_e,65.840,34.160,85.980,14.020,8.07,224,0.875,bilinear,-11.954,-7.714,-7 resnet34d,65.780,34.220,86.720,13.280,21.82,224,0.875,bicubic,-11.336,-6.662,+12 mobilenetv3_large_100_miil,65.760,34.240,85.200,14.800,5.48,224,0.875,bilinear,-12.156,-7.710,-15 skresnet34,65.750,34.250,85.960,14.040,22.28,224,0.875,bicubic,-11.162,-7.362,+18 -vit_small_patch16_224,65.740,34.260,86.120,13.880,48.75,224,0.900,bicubic,-12.118,-7.296,-13 +vit_small_patch16_224,65.740,34.260,86.120,13.880,48.75,224,0.900,bicubic,-12.118,-7.520,-13 tv_resnet101,65.690,34.310,85.980,14.020,44.55,224,0.875,bilinear,-11.684,-7.560,+1 hardcorenas_d,65.630,34.370,85.460,14.540,7.50,224,0.875,bilinear,-11.802,-8.024,-1 selecsls42b,65.610,34.390,85.810,14.190,32.46,224,0.875,bicubic,-11.564,-7.580,+5 @@ -284,17 +313,18 @@ legacy_seresnext26_32x4d,65.050,34.950,85.660,14.340,16.79,224,0.875,bicubic,-12 mobilenetv2_120d,65.030,34.970,85.960,14.040,5.83,224,0.875,bicubic,-12.254,-7.532,-9 hrnet_w18,64.920,35.080,85.740,14.260,21.30,224,0.875,bilinear,-11.838,-7.704,+4 hardcorenas_c,64.860,35.140,85.250,14.750,5.52,224,0.875,bilinear,-12.194,-7.908,-5 -densenet169,64.760,35.240,85.240,14.760,14.15,224,0.875,bicubic,-11.146,-7.786,+16 +densenet169,64.760,35.240,85.240,14.760,14.15,224,0.875,bicubic,-11.146,-7.786,+17 mixnet_m,64.700,35.300,85.450,14.550,5.01,224,0.875,bicubic,-12.560,-7.974,-12 resnet26d,64.680,35.320,85.120,14.880,16.01,224,0.875,bicubic,-12.016,-8.030,+1 +levit_128s,64.610,35.390,84.730,15.270,7.78,224,0.900,bicubic,-11.920,-8.136,+6 repvgg_a2,64.450,35.550,85.130,14.870,28.21,224,0.875,bilinear,-12.010,-7.874,+7 -hardcorenas_b,64.420,35.580,84.870,15.130,5.18,224,0.875,bilinear,-12.118,-7.884,+4 -regnetx_016,64.380,35.620,85.470,14.530,9.19,224,0.875,bicubic,-12.570,-7.950,-9 -tf_efficientnet_lite1,64.380,35.620,85.470,14.530,5.42,240,0.882,bicubic,-12.262,-7.756,-2 -tf_efficientnet_b0,64.310,35.690,85.280,14.720,5.29,224,0.875,bicubic,-12.538,-7.948,-7 -tf_mixnet_m,64.270,35.730,85.090,14.910,5.01,224,0.875,bicubic,-12.672,-8.062,-11 +hardcorenas_b,64.420,35.580,84.870,15.130,5.18,224,0.875,bilinear,-12.118,-7.884,+3 +tf_efficientnet_lite1,64.380,35.620,85.470,14.530,5.42,240,0.882,bicubic,-12.262,-7.756,-3 +regnetx_016,64.380,35.620,85.470,14.530,9.19,224,0.875,bicubic,-12.570,-7.950,-10 +tf_efficientnet_b0,64.310,35.690,85.280,14.720,5.29,224,0.875,bicubic,-12.538,-7.948,-8 +tf_mixnet_m,64.270,35.730,85.090,14.910,5.01,224,0.875,bicubic,-12.672,-8.062,-12 dpn68,64.230,35.770,85.180,14.820,12.61,224,0.875,bicubic,-12.088,-7.798,+2 -tf_efficientnet_es,64.230,35.770,84.740,15.260,5.44,224,0.875,bicubic,-12.364,-8.462,-4 +tf_efficientnet_es,64.230,35.770,84.740,15.260,5.44,224,0.875,bicubic,-12.364,-8.462,-5 regnety_008,64.160,35.840,85.270,14.730,6.26,224,0.875,bicubic,-12.156,-7.796,+1 mobilenetv2_140,64.060,35.940,85.040,14.960,6.11,224,0.875,bicubic,-12.456,-7.956,-3 densenet121,63.750,36.250,84.590,15.410,7.98,224,0.875,bicubic,-11.828,-8.062,+7 @@ -306,7 +336,7 @@ mixnet_s,63.390,36.610,84.740,15.260,4.13,224,0.875,bicubic,-12.602,-8.056,-4 mobilenetv3_large_100,63.360,36.640,84.090,15.910,5.48,224,0.875,bicubic,-12.406,-8.452,-2 efficientnet_es_pruned,63.330,36.670,84.950,15.050,5.44,224,0.875,bicubic,-11.670,-7.498,+13 tv_resnet50,63.330,36.670,84.640,15.360,25.56,224,0.875,bilinear,-12.808,-8.224,-8 -mixer_b16_224,63.280,36.720,83.310,16.690,59.88,224,0.875,bicubic,-13.322,-8.918,-17 +mixer_b16_224,63.280,36.720,83.310,16.690,59.88,224,0.875,bicubic,-13.322,-8.918,-18 efficientnet_lite0,63.240,36.760,84.440,15.560,4.65,224,0.875,bicubic,-12.244,-8.070,0 mobilenetv3_rw,63.220,36.780,84.510,15.490,5.48,224,0.875,bicubic,-12.414,-8.198,-5 pit_ti_distilled_224,63.150,36.850,83.960,16.040,5.10,224,0.900,bicubic,-11.380,-8.136,+15 @@ -329,15 +359,16 @@ fbnetc_100,62.440,37.560,83.380,16.620,5.57,224,0.875,bilinear,-12.684,-9.006,-1 mnasnet_100,61.900,38.100,83.710,16.290,4.38,224,0.875,bicubic,-12.758,-8.404,-5 regnety_004,61.870,38.130,83.430,16.570,4.34,224,0.875,bicubic,-12.164,-8.322,+1 vgg19_bn,61.860,38.140,83.450,16.550,143.68,224,0.875,bilinear,-12.354,-8.392,-2 +convit_tiny,61.590,38.410,84.120,15.880,5.71,224,0.875,bicubic,-11.526,-7.594,+6 ssl_resnet18,61.480,38.520,83.300,16.700,11.69,224,0.875,bilinear,-11.130,-8.116,+9 -regnetx_006,61.350,38.650,83.450,16.550,6.20,224,0.875,bicubic,-12.502,-8.222,0 -spnasnet_100,61.220,38.780,82.790,17.210,4.42,224,0.875,bilinear,-12.864,-9.028,-4 -tv_resnet34,61.190,38.810,82.710,17.290,21.80,224,0.875,bilinear,-12.122,-8.716,+1 +regnetx_006,61.350,38.650,83.450,16.550,6.20,224,0.875,bicubic,-12.502,-8.222,-1 +spnasnet_100,61.220,38.780,82.790,17.210,4.42,224,0.875,bilinear,-12.864,-9.028,-5 +tv_resnet34,61.190,38.810,82.710,17.290,21.80,224,0.875,bilinear,-12.122,-8.716,0 pit_ti_224,60.980,39.020,83.860,16.140,4.85,224,0.900,bicubic,-11.932,-7.542,+4 skresnet18,60.860,39.140,82.880,17.120,11.96,224,0.875,bicubic,-12.178,-8.288,+1 -ghostnet_100,60.830,39.170,82.360,17.640,5.18,224,0.875,bilinear,-13.148,-9.096,-6 -vgg16_bn,60.760,39.240,82.950,17.050,138.37,224,0.875,bilinear,-12.590,-8.556,-4 -tf_mobilenetv3_large_075,60.400,39.600,81.950,18.050,3.99,224,0.875,bilinear,-13.038,-9.400,-6 +ghostnet_100,60.830,39.170,82.360,17.640,5.18,224,0.875,bilinear,-13.148,-9.096,-7 +vgg16_bn,60.760,39.240,82.950,17.050,138.37,224,0.875,bilinear,-12.590,-8.556,-5 +tf_mobilenetv3_large_075,60.400,39.600,81.950,18.050,3.99,224,0.875,bilinear,-13.038,-9.400,-7 mobilenetv2_100,60.190,39.810,82.240,17.760,3.50,224,0.875,bicubic,-12.780,-8.776,-2 resnet18d,60.160,39.840,82.300,17.700,11.71,224,0.875,bicubic,-12.100,-8.396,+3 vit_deit_tiny_patch16_224,59.830,40.170,82.670,17.330,5.72,224,0.900,bicubic,-12.338,-8.448,+4 @@ -346,7 +377,7 @@ vgg19,59.710,40.290,81.450,18.550,143.67,224,0.875,bilinear,-12.658,-9.422,-2 regnetx_004,59.410,40.590,81.690,18.310,5.16,224,0.875,bicubic,-12.986,-9.140,-4 tf_mobilenetv3_large_minimal_100,59.070,40.930,81.150,18.850,3.92,224,0.875,bilinear,-13.178,-9.480,-1 vgg13_bn,59.000,41.000,81.070,18.930,133.05,224,0.875,bilinear,-12.594,-9.306,+2 -hrnet_w18_small,58.950,41.050,81.340,18.660,13.19,224,0.875,bilinear,-13.392,-9.338,-5 +hrnet_w18_small,58.950,41.050,81.340,18.660,13.19,224,0.875,bilinear,-13.394,-9.338,-5 vgg16,58.830,41.170,81.660,18.340,138.36,224,0.875,bilinear,-12.764,-8.722,+1 gluon_resnet18_v1b,58.340,41.660,80.970,19.030,11.69,224,0.875,bicubic,-12.496,-8.792,+1 vgg11_bn,57.410,42.590,80.020,19.980,132.87,224,0.875,bilinear,-12.950,-9.782,+1 @@ -360,5 +391,5 @@ vgg11,55.800,44.200,78.830,21.170,132.86,224,0.875,bilinear,-13.224,-9.798,-2 tf_mobilenetv3_small_100,54.530,45.470,77.060,22.940,2.54,224,0.875,bilinear,-13.392,-10.604,-1 dla46x_c,53.050,46.950,76.870,23.130,1.07,224,0.875,bilinear,-12.920,-10.110,0 tf_mobilenetv3_small_075,52.160,47.840,75.470,24.530,2.04,224,0.875,bilinear,-13.556,-10.660,0 -dla46_c,52.130,47.870,75.690,24.310,1.30,224,0.875,bilinear,-12.736,-10.602,0 +dla46_c,52.130,47.870,75.690,24.310,1.30,224,0.875,bilinear,-12.734,-10.602,0 tf_mobilenetv3_small_minimal_100,49.500,50.500,73.050,26.950,2.04,224,0.875,bilinear,-13.406,-11.180,0 diff --git a/results/results-sketch.csv b/results/results-sketch.csv index f66179a6..6d7c9c94 100644 --- a/results/results-sketch.csv +++ b/results/results-sketch.csv @@ -1,353 +1,384 @@ model,top1,top1_err,top5,top5_err,param_count,img_size,cropt_pct,interpolation,top1_diff,top5_diff,rank_diff -ig_resnext101_32x48d,58.810,41.190,81.076,18.924,828.41,224,0.875,bilinear,-26.618,-16.496,+15 -ig_resnext101_32x32d,58.386,41.614,80.381,19.619,468.53,224,0.875,bilinear,-26.708,-17.057,+22 -ig_resnext101_32x16d,57.690,42.310,79.905,20.095,194.03,224,0.875,bilinear,-26.480,-17.291,+41 -swsl_resnext101_32x16d,57.458,42.542,80.385,19.615,194.03,224,0.875,bilinear,-25.888,-16.461,+58 -swsl_resnext101_32x8d,56.438,43.562,78.944,21.056,88.79,224,0.875,bilinear,-27.846,-18.232,+35 -ig_resnext101_32x8d,54.918,45.082,77.534,22.466,88.79,224,0.875,bilinear,-27.770,-19.102,+71 -swsl_resnext101_32x4d,53.603,46.397,76.347,23.653,44.18,224,0.875,bilinear,-29.627,-20.413,+58 +ig_resnext101_32x48d,58.810,41.190,81.076,18.924,828.41,224,0.875,bilinear,-26.618,-16.496,+18 +ig_resnext101_32x32d,58.386,41.614,80.381,19.619,468.53,224,0.875,bilinear,-26.708,-17.057,+25 +ig_resnext101_32x16d,57.690,42.310,79.905,20.095,194.03,224,0.875,bilinear,-26.480,-17.291,+47 +swsl_resnext101_32x16d,57.458,42.542,80.385,19.615,194.03,224,0.875,bilinear,-25.888,-16.461,+66 +swsl_resnext101_32x8d,56.438,43.562,78.944,21.056,88.79,224,0.875,bilinear,-27.846,-18.232,+41 +ig_resnext101_32x8d,54.918,45.082,77.534,22.466,88.79,224,0.875,bilinear,-27.770,-19.102,+82 +swsl_resnext101_32x4d,53.603,46.397,76.347,23.653,44.18,224,0.875,bilinear,-29.627,-20.413,+66 tf_efficientnet_l2_ns_475,51.494,48.506,73.928,26.072,480.31,475,0.936,bicubic,-36.740,-24.618,-6 -swsl_resnext50_32x4d,50.437,49.563,73.368,26.633,25.03,224,0.875,bilinear,-31.745,-22.862,+79 +swsl_resnext50_32x4d,50.437,49.563,73.368,26.633,25.03,224,0.875,bilinear,-31.745,-22.862,+95 swin_large_patch4_window12_384,50.404,49.596,72.564,27.436,196.74,384,1.000,bicubic,-36.744,-25.670,-7 -swsl_resnet50,49.541,50.459,72.334,27.666,25.56,224,0.875,bilinear,-31.625,-23.638,+103 +swsl_resnet50,49.541,50.459,72.334,27.666,25.56,224,0.875,bilinear,-31.625,-23.638,+125 swin_large_patch4_window7_224,48.991,51.009,71.391,28.609,196.53,224,0.900,bicubic,-37.329,-26.505,-4 swin_base_patch4_window12_384,48.553,51.447,71.813,28.187,87.90,384,1.000,bicubic,-37.879,-26.245,-6 tf_efficientnet_b7_ns,47.800,52.200,69.640,30.360,66.35,600,0.949,bicubic,-39.040,-28.454,-10 tf_efficientnet_b6_ns,47.761,52.239,69.968,30.032,43.04,528,0.942,bicubic,-38.691,-27.914,-9 tf_efficientnet_l2_ns,47.570,52.430,70.019,29.981,480.31,800,0.960,bicubic,-40.782,-28.631,-15 -tf_efficientnet_b8_ap,45.774,54.226,67.911,32.089,87.41,672,0.954,bicubic,-39.596,-29.383,+2 +tf_efficientnetv2_l_in21ft1k,46.939,53.061,70.310,29.690,118.52,480,1.000,bicubic,-39.365,-27.668,-8 +tf_efficientnet_b8_ap,45.774,54.226,67.911,32.089,87.41,672,0.954,bicubic,-39.596,-29.383,+4 tf_efficientnet_b5_ns,45.615,54.385,67.842,32.158,30.39,456,0.934,bicubic,-40.473,-29.910,-8 -swin_base_patch4_window7_224,45.560,54.440,68.512,31.488,87.77,224,0.900,bicubic,-39.692,-29.050,+1 -cait_m48_448,44.245,55.755,64.653,35.347,356.46,448,1.000,bicubic,-42.239,-33.102,-15 -vit_base_r50_s16_384,43.512,56.488,66.781,33.219,98.95,384,1.000,bicubic,-41.460,-30.507,+8 -tf_efficientnet_b4_ns,43.450,56.550,65.519,34.481,19.34,380,0.922,bicubic,-41.713,-31.951,-1 -vit_large_patch16_384,43.300,56.700,66.454,33.546,304.72,384,1.000,bicubic,-41.858,-30.902,-1 -tf_efficientnet_b8,42.508,57.492,64.857,35.143,87.41,672,0.954,bicubic,-42.862,-32.533,-6 -cait_m36_384,42.398,57.602,63.324,36.676,271.22,384,1.000,bicubic,-43.656,-34.406,-14 -dm_nfnet_f6,41.593,58.407,63.192,36.808,438.36,576,0.956,bicubic,-44.704,-34.552,-17 -tf_efficientnet_b7,41.431,58.569,63.017,36.983,66.35,600,0.949,bicubic,-43.505,-34.186,+3 -tf_efficientnet_b7_ap,41.429,58.571,62.874,37.126,66.35,600,0.949,bicubic,-43.691,-34.378,-5 -tf_efficientnet_b5_ap,41.418,58.582,62.084,37.916,30.39,456,0.934,bicubic,-42.834,-34.890,+13 -resnetv2_152x4_bitm,41.241,58.759,64.238,35.762,936.53,480,1.000,bilinear,-43.691,-33.198,+1 -tf_efficientnet_b6_ap,41.099,58.901,62.355,37.645,43.04,528,0.942,bicubic,-43.689,-34.783,+1 -dm_nfnet_f5,41.003,58.997,61.911,38.089,377.21,544,0.954,bicubic,-44.711,-35.531,-20 -dm_nfnet_f3,40.920,59.080,61.949,38.051,254.92,416,0.940,bicubic,-44.640,-35.457,-19 -vit_large_patch16_224,40.732,59.268,63.593,36.407,304.33,224,0.900,bicubic,-42.330,-32.845,+35 -tf_efficientnet_b4_ap,40.484,59.516,61.723,38.277,19.34,380,0.922,bicubic,-42.764,-34.669,+29 -vit_base_patch16_224_miil,40.168,59.832,60.887,39.113,86.54,224,0.875,bilinear,-44.100,-35.915,+5 -cait_s36_384,39.765,60.235,60.475,39.525,68.37,384,1.000,bicubic,-45.695,-37.005,-22 -ecaresnet269d,39.594,60.406,60.343,39.657,102.09,352,1.000,bicubic,-45.382,-36.883,-10 -tf_efficientnet_b3_ns,39.584,60.416,61.453,38.547,12.23,300,0.904,bicubic,-44.464,-35.457,+10 -dm_nfnet_f4,39.474,60.526,60.420,39.580,316.07,512,0.951,bicubic,-46.184,-37.090,-27 -efficientnet_b4,39.079,60.921,59.608,40.392,19.34,384,1.000,bicubic,-44.349,-36.988,+19 -tf_efficientnet_b5,38.356,61.644,59.913,40.087,30.39,456,0.934,bicubic,-45.456,-36.835,+11 -vit_deit_base_distilled_patch16_384,38.260,61.740,57.783,42.217,87.63,384,1.000,bicubic,-47.162,-39.549,-26 -vit_base_patch16_384,38.099,61.901,60.428,39.572,86.86,384,1.000,bicubic,-46.111,-36.790,-1 -cait_s24_384,37.873,62.127,58.079,41.921,47.06,384,1.000,bicubic,-47.173,-39.267,-20 -resnet152d,37.857,62.143,58.356,41.644,60.21,320,1.000,bicubic,-45.823,-38.382,+12 -resnetrs420,37.747,62.253,58.215,41.785,191.89,416,1.000,bicubic,-47.261,-38.909,-21 -resnetrs350,37.676,62.324,58.083,41.917,163.96,384,1.000,bicubic,-47.044,-38.905,-15 +tf_efficientnetv2_m_in21ft1k,45.582,54.418,69.150,30.849,54.14,480,1.000,bicubic,-40.006,-28.602,-5 +swin_base_patch4_window7_224,45.560,54.440,68.512,31.488,87.77,224,0.900,bicubic,-39.692,-29.050,+2 +cait_m48_448,44.245,55.755,64.653,35.347,356.46,448,1.000,bicubic,-42.239,-33.102,-17 +vit_base_r50_s16_384,43.512,56.488,66.781,33.219,98.95,384,1.000,bicubic,-41.460,-30.507,+10 +tf_efficientnet_b4_ns,43.450,56.550,65.519,34.481,19.34,380,0.922,bicubic,-41.713,-31.951,0 +vit_large_patch16_384,43.300,56.700,66.454,33.546,304.72,384,1.000,bicubic,-41.858,-30.902,0 +tf_efficientnet_b8,42.508,57.492,64.857,35.143,87.41,672,0.954,bicubic,-42.862,-32.533,-5 +cait_m36_384,42.398,57.602,63.324,36.676,271.22,384,1.000,bicubic,-43.656,-34.406,-15 +dm_nfnet_f6,41.593,58.407,63.192,36.808,438.36,576,0.956,bicubic,-44.704,-34.552,-18 +tf_efficientnet_b7,41.431,58.569,63.017,36.983,66.35,600,0.949,bicubic,-43.505,-34.186,+5 +tf_efficientnet_b7_ap,41.429,58.571,62.874,37.126,66.35,600,0.949,bicubic,-43.691,-34.378,-4 +tf_efficientnet_b5_ap,41.418,58.582,62.084,37.916,30.39,456,0.934,bicubic,-42.834,-34.890,+17 +resnetv2_152x4_bitm,41.241,58.759,64.238,35.762,936.53,480,1.000,bilinear,-43.691,-33.198,+3 +tf_efficientnet_b6_ap,41.099,58.901,62.355,37.645,43.04,528,0.942,bicubic,-43.689,-34.783,+4 +dm_nfnet_f5,41.003,58.997,61.911,38.089,377.21,544,0.954,bicubic,-44.711,-35.531,-21 +tf_efficientnetv2_s_in21ft1k,40.950,59.050,63.849,36.151,21.46,384,1.000,bicubic,-43.352,-33.403,+10 +dm_nfnet_f3,40.920,59.080,61.949,38.051,254.92,416,0.940,bicubic,-44.640,-35.457,-20 +vit_large_patch16_224,40.732,59.268,63.593,36.407,304.33,224,0.900,bicubic,-42.330,-32.845,+42 +tf_efficientnet_b4_ap,40.484,59.516,61.723,38.277,19.34,380,0.922,bicubic,-42.764,-34.669,+34 +vit_base_patch16_224_miil,40.168,59.832,60.887,39.113,86.54,224,0.875,bilinear,-44.100,-35.915,+8 +tf_efficientnetv2_l,39.830,60.170,60.801,39.199,118.52,480,1.000,bicubic,-45.660,-36.571,-23 +cait_s36_384,39.765,60.235,60.475,39.525,68.37,384,1.000,bicubic,-45.695,-37.005,-23 +efficientnetv2_rw_m,39.667,60.333,59.687,40.313,53.24,416,1.000,bicubic,-45.141,-37.461,-6 +ecaresnet269d,39.594,60.406,60.343,39.657,102.09,352,1.000,bicubic,-45.382,-36.883,-11 +tf_efficientnet_b3_ns,39.584,60.416,61.453,38.547,12.23,300,0.904,bicubic,-44.464,-35.457,+11 +dm_nfnet_f4,39.474,60.526,60.420,39.580,316.07,512,0.951,bicubic,-46.184,-37.090,-31 +efficientnet_b4,39.079,60.921,59.608,40.392,19.34,384,1.000,bicubic,-44.349,-36.988,+22 +tf_efficientnet_b5,38.356,61.644,59.913,40.087,30.39,456,0.934,bicubic,-45.456,-36.835,+13 +vit_deit_base_distilled_patch16_384,38.260,61.740,57.783,42.217,87.63,384,1.000,bicubic,-47.162,-39.549,-28 +vit_base_patch16_384,38.099,61.901,60.428,39.572,86.86,384,1.000,bicubic,-46.111,-36.790,0 +cait_s24_384,37.873,62.127,58.079,41.921,47.06,384,1.000,bicubic,-47.173,-39.267,-22 +resnet152d,37.857,62.143,58.356,41.644,60.21,320,1.000,bicubic,-45.823,-38.382,+14 +tf_efficientnetv2_m,37.824,62.176,58.710,41.290,54.14,480,1.000,bicubic,-47.220,-38.568,-23 +resnetrs420,37.747,62.253,58.215,41.785,191.89,416,1.000,bicubic,-47.261,-38.909,-23 +resnetrs350,37.676,62.324,58.083,41.917,163.96,384,1.000,bicubic,-47.044,-38.905,-16 pit_b_distilled_224,37.590,62.410,57.238,42.762,74.79,224,0.900,bicubic,-46.554,-39.618,-4 resnet200d,37.505,62.495,58.297,41.703,64.69,320,1.000,bicubic,-46.457,-38.526,+1 -resnest269e,37.315,62.685,57.468,42.532,110.93,416,0.928,bicubic,-47.203,-39.518,-16 -cait_s24_224,37.153,62.847,56.724,43.276,46.92,224,1.000,bicubic,-46.299,-39.840,+7 -tf_efficientnet_b3_ap,37.055,62.945,57.240,42.760,12.23,300,0.904,bicubic,-44.767,-38.384,+41 -efficientnet_v2s,37.049,62.951,56.814,43.186,23.94,384,1.000,bicubic,-46.759,-39.910,0 -resnetv2_152x2_bitm,36.847,63.153,59.899,40.101,236.34,480,1.000,bilinear,-47.593,-37.547,-19 -seresnet152d,36.790,63.210,56.718,43.282,66.84,320,1.000,bicubic,-47.572,-40.322,-17 +resnest269e,37.315,62.685,57.468,42.532,110.93,416,0.928,bicubic,-47.203,-39.518,-17 +cait_s24_224,37.153,62.847,56.724,43.276,46.92,224,1.000,bicubic,-46.299,-39.840,+9 +tf_efficientnet_b3_ap,37.055,62.945,57.240,42.760,12.23,300,0.904,bicubic,-44.767,-38.384,+53 +efficientnetv2_rw_s,37.049,62.951,56.814,43.186,23.94,384,1.000,bicubic,-46.759,-39.910,+1 +resnetv2_152x2_bitm,36.847,63.153,59.899,40.101,236.34,480,1.000,bilinear,-47.593,-37.547,-20 +seresnet152d,36.790,63.210,56.718,43.282,66.84,320,1.000,bicubic,-47.572,-40.322,-18 resnetrs200,36.639,63.361,56.828,43.172,93.21,320,1.000,bicubic,-47.427,-40.046,-10 -efficientnet_b3,36.420,63.580,56.845,43.155,12.23,320,1.000,bicubic,-45.822,-39.269,+27 +efficientnet_b3,36.420,63.580,56.845,43.155,12.23,320,1.000,bicubic,-45.822,-39.269,+37 cait_xs24_384,36.416,63.584,56.944,43.056,26.67,384,1.000,bicubic,-47.645,-39.945,-11 -vit_deit_base_distilled_patch16_224,36.397,63.603,56.617,43.383,87.34,224,0.900,bicubic,-46.991,-39.871,+1 -resnetrs270,36.320,63.680,56.562,43.438,129.86,352,1.000,bicubic,-48.114,-40.408,-24 -tresnet_m,36.285,63.715,55.796,44.204,31.39,224,0.875,bilinear,-46.795,-40.322,+6 -dm_nfnet_f2,36.257,63.743,55.847,44.153,193.78,352,0.920,bicubic,-48.733,-41.297,-36 -tf_efficientnet_b2_ns,36.183,63.817,57.551,42.449,9.11,260,0.890,bicubic,-46.197,-38.697,+17 -ecaresnet101d,36.004,63.996,56.165,43.835,44.57,224,0.875,bicubic,-46.168,-39.881,+24 +vit_deit_base_distilled_patch16_224,36.397,63.603,56.617,43.383,87.34,224,0.900,bicubic,-46.991,-39.871,+3 +resnetrs270,36.320,63.680,56.562,43.438,129.86,352,1.000,bicubic,-48.114,-40.408,-25 +tresnet_m,36.285,63.715,55.796,44.204,31.39,224,0.875,bilinear,-46.795,-40.322,+10 +mixer_b16_224_miil,36.269,63.731,55.965,44.035,59.88,224,0.875,bilinear,-46.039,-39.751,+28 +dm_nfnet_f2,36.257,63.743,55.847,44.153,193.78,352,0.920,bicubic,-48.733,-41.297,-39 +tf_efficientnet_b2_ns,36.183,63.817,57.551,42.449,9.11,260,0.890,bicubic,-46.197,-38.697,+22 +ecaresnet101d,36.004,63.996,56.165,43.835,44.57,224,0.875,bicubic,-46.168,-39.881,+33 resnest200e,35.931,64.069,55.849,44.151,70.20,320,0.909,bicubic,-47.901,-41.045,-14 -swsl_resnet18,35.858,64.142,58.455,41.545,11.69,224,0.875,bilinear,-37.418,-33.279,+269 -eca_nfnet_l1,35.856,64.144,55.955,44.045,41.41,320,1.000,bicubic,-48.151,-41.073,-18 -vit_base_patch16_224,35.768,64.232,57.390,42.610,86.57,224,0.900,bicubic,-46.018,-38.732,+26 -resnest101e,35.373,64.627,55.780,44.220,48.28,256,0.875,bilinear,-47.517,-40.540,+3 -resnetv2_101x3_bitm,35.261,64.739,57.851,42.149,387.93,480,1.000,bilinear,-49.133,-39.511,-33 -dm_nfnet_f1,35.192,64.808,54.413,45.587,132.63,320,0.910,bicubic,-49.412,-42.655,-38 -repvgg_b3,35.043,64.957,54.542,45.458,123.09,224,0.875,bilinear,-45.449,-40.718,+60 -repvgg_b3g4,35.043,64.957,54.772,45.228,83.83,224,0.875,bilinear,-45.169,-40.338,+76 -resnet101d,34.872,65.128,54.202,45.798,44.57,320,1.000,bicubic,-48.150,-42.244,-4 -vit_large_patch32_384,34.673,65.326,55.729,44.271,306.63,384,1.000,bicubic,-46.833,-40.363,+27 -dm_nfnet_f0,34.642,65.358,54.762,45.238,71.49,256,0.900,bicubic,-48.700,-41.798,-14 -ssl_resnext101_32x16d,34.603,65.397,55.931,44.069,194.03,224,0.875,bilinear,-47.241,-40.165,+15 -repvgg_b2g4,34.587,65.413,54.782,45.218,61.76,224,0.875,bilinear,-44.779,-39.906,+107 -resnetrs152,34.355,65.645,53.562,46.438,86.62,320,1.000,bicubic,-49.357,-43.052,-25 -resnest50d_4s2x40d,34.355,65.645,54.725,45.275,30.42,224,0.875,bicubic,-46.753,-40.833,+35 -tf_efficientnet_b1_ns,34.157,65.843,55.489,44.511,7.79,240,0.882,bicubic,-47.231,-40.249,+24 +swsl_resnet18,35.858,64.142,58.455,41.545,11.69,224,0.875,bilinear,-37.418,-33.279,+292 +eca_nfnet_l1,35.856,64.144,55.955,44.045,41.41,320,1.000,bicubic,-48.151,-41.073,-19 +vit_base_patch16_224,35.768,64.232,57.390,42.610,86.57,224,0.900,bicubic,-46.018,-38.732,+37 +resnest101e,35.373,64.627,55.780,44.220,48.28,256,0.875,bilinear,-47.517,-40.540,+6 +convit_base,35.314,64.686,54.927,45.073,86.54,224,0.875,bicubic,-46.976,-41.011,+20 +resnetv2_101x3_bitm,35.261,64.739,57.851,42.149,387.93,480,1.000,bilinear,-49.133,-39.511,-36 +dm_nfnet_f1,35.192,64.808,54.413,45.587,132.63,320,0.910,bicubic,-49.412,-42.655,-41 +twins_svt_large,35.086,64.914,54.721,45.279,99.27,224,0.900,bicubic,-48.592,-41.873,-15 +repvgg_b3,35.043,64.957,54.542,45.458,123.09,224,0.875,bilinear,-45.449,-40.718,+74 +repvgg_b3g4,35.043,64.957,54.772,45.228,83.83,224,0.875,bilinear,-45.171,-40.338,+90 +resnet101d,34.872,65.128,54.202,45.798,44.57,320,1.000,bicubic,-48.150,-42.244,-3 +vit_large_patch32_384,34.673,65.326,55.729,44.271,306.63,384,1.000,bicubic,-46.833,-40.363,+38 +dm_nfnet_f0,34.642,65.358,54.762,45.238,71.49,256,0.900,bicubic,-48.700,-41.798,-15 +ssl_resnext101_32x16d,34.605,65.395,55.931,44.069,194.03,224,0.875,bilinear,-47.239,-40.165,+24 +repvgg_b2g4,34.587,65.413,54.782,45.218,61.76,224,0.875,bilinear,-44.779,-39.906,+124 +resnest50d_4s2x40d,34.355,65.645,54.725,45.275,30.42,224,0.875,bicubic,-46.753,-40.833,+48 +resnetrs152,34.355,65.645,53.562,46.438,86.62,320,1.000,bicubic,-49.357,-43.052,-27 +tf_efficientnet_b1_ns,34.157,65.843,55.489,44.511,7.79,240,0.882,bicubic,-47.231,-40.249,+36 +twins_pcpvt_large,34.111,65.888,54.128,45.872,60.99,224,0.900,bicubic,-49.029,-42.470,-17 tf_efficientnet_b4,34.064,65.936,54.198,45.802,19.34,380,0.922,bicubic,-48.958,-42.102,-11 nfnet_l0,34.029,65.971,54.418,45.582,35.07,288,1.000,bicubic,-48.731,-42.080,-9 -ssl_resnext101_32x8d,34.017,65.983,55.601,44.399,88.79,224,0.875,bilinear,-47.599,-40.437,+15 -tf_efficientnet_b6,33.998,66.002,54.544,45.456,43.04,528,0.942,bicubic,-50.112,-42.342,-40 -efficientnet_b3_pruned,33.996,66.004,54.108,45.892,9.86,300,0.904,bicubic,-46.862,-41.134,+37 -regnety_160,33.976,66.024,53.546,46.454,83.59,288,1.000,bicubic,-49.710,-43.230,-31 -pit_s_distilled_224,33.939,66.061,53.265,46.735,24.04,224,0.900,bicubic,-48.057,-42.533,+3 -regnety_032,33.412,66.588,52.754,47.246,19.44,288,1.000,bicubic,-49.312,-43.670,-14 -gernet_l,33.357,66.643,51.901,48.099,31.08,256,0.875,bilinear,-47.997,-43.635,+17 -tresnet_xl,33.257,66.743,52.294,47.706,78.44,224,0.875,bilinear,-48.797,-43.642,-2 -resnest50d_1s4x24d,33.147,66.853,52.839,47.161,25.68,224,0.875,bicubic,-47.841,-42.483,+25 -rexnet_200,32.987,67.013,52.939,47.061,16.37,224,0.875,bicubic,-48.645,-42.729,+5 -resnest50d,32.972,67.028,52.713,47.287,27.48,224,0.875,bilinear,-48.002,-42.665,+24 -tf_efficientnet_b3,32.860,67.140,52.950,47.050,12.23,300,0.904,bicubic,-48.776,-42.768,+2 -pnasnet5large,32.848,67.152,50.500,49.500,86.06,331,0.911,bicubic,-49.934,-45.540,-23 -nasnetalarge,32.775,67.225,50.141,49.859,88.75,331,0.911,bicubic,-49.845,-45.906,-20 -gernet_m,32.740,67.260,51.913,48.087,21.14,224,0.875,bilinear,-47.992,-43.271,+28 -inception_resnet_v2,32.738,67.262,50.648,49.352,55.84,299,0.897,bicubic,-47.720,-44.658,+37 -gluon_resnet152_v1d,32.734,67.266,51.088,48.912,60.21,224,0.875,bicubic,-47.740,-44.118,+34 -pit_b_224,32.718,67.282,49.852,50.148,73.76,224,0.900,bicubic,-49.728,-45.858,-22 -tf_efficientnet_b2_ap,32.681,67.319,52.239,47.761,9.11,260,0.890,bicubic,-47.619,-42.789,+42 -tresnet_l,32.559,67.441,51.139,48.861,55.99,224,0.875,bilinear,-48.931,-44.485,0 -cait_xxs36_384,32.549,67.451,52.233,47.767,17.37,384,1.000,bicubic,-49.645,-43.915,-18 -vit_base_patch32_384,32.461,67.539,52.444,47.556,88.30,384,1.000,bicubic,-49.191,-43.684,-9 -wide_resnet50_2,32.439,67.561,51.459,48.541,68.88,224,0.875,bicubic,-49.017,-44.073,-2 -resnetv2_50x3_bitm,32.410,67.590,54.314,45.686,217.32,480,1.000,bilinear,-51.374,-42.792,-53 -ens_adv_inception_resnet_v2,32.370,67.629,50.427,49.573,55.84,299,0.897,bicubic,-47.611,-44.509,+50 -vit_deit_base_patch16_224,32.363,67.637,51.011,48.989,86.57,224,0.900,bicubic,-49.635,-44.723,-19 -swin_small_patch4_window7_224,32.341,67.659,50.905,49.095,49.61,224,0.900,bicubic,-50.871,-45.417,-45 -gluon_resnet152_v1s,32.331,67.669,50.526,49.474,60.32,224,0.875,bicubic,-48.685,-44.886,+5 -vit_deit_small_distilled_patch16_224,32.284,67.716,52.102,47.898,22.44,224,0.900,bicubic,-48.916,-43.276,0 -gluon_seresnext101_64x4d,32.205,67.795,50.319,49.681,88.23,224,0.875,bicubic,-48.689,-44.989,+9 -gluon_seresnext101_32x4d,32.107,67.893,51.237,48.763,48.96,224,0.875,bicubic,-48.797,-44.057,+7 -vit_deit_base_patch16_384,31.989,68.011,50.547,49.453,86.86,384,1.000,bicubic,-51.117,-45.825,-49 -seresnext50_32x4d,31.985,68.015,51.231,48.769,27.56,224,0.875,bicubic,-49.281,-44.389,-6 -resnetrs101,31.858,68.142,51.017,48.983,63.62,288,0.940,bicubic,-50.430,-44.991,-35 -cspresnext50,31.822,68.178,51.602,48.398,20.57,224,0.875,bilinear,-48.218,-43.342,+38 -eca_nfnet_l0,31.657,68.343,51.654,48.346,24.14,288,1.000,bicubic,-50.931,-44.820,-41 -tnt_s_patch16_224,31.643,68.357,51.143,48.857,23.76,224,0.900,bicubic,-49.875,-44.605,-19 -resnet50,31.547,68.453,50.170,49.830,25.56,224,0.875,bicubic,-47.491,-44.220,+87 -ssl_resnext101_32x4d,31.423,68.577,52.121,47.879,44.18,224,0.875,bilinear,-49.501,-43.607,-2 -inception_v4,31.378,68.622,49.244,50.756,42.68,299,0.875,bicubic,-48.790,-45.724,+28 -rexnet_150,31.366,68.634,51.288,48.712,9.73,224,0.875,bicubic,-48.944,-43.878,+17 -pit_s_224,31.333,68.667,49.661,50.339,23.46,224,0.900,bicubic,-49.761,-45.671,-10 -cait_xxs36_224,31.278,68.722,50.616,49.384,17.30,224,1.000,bicubic,-48.472,-44.250,+45 -cspresnet50,31.270,68.730,51.223,48.777,21.62,256,0.887,bilinear,-48.304,-43.489,+52 -ecaresnetlight,31.121,68.879,50.243,49.757,30.16,224,0.875,bicubic,-49.341,-45.007,+7 -gluon_resnet101_v1s,31.115,68.885,49.793,50.207,44.67,224,0.875,bicubic,-49.187,-45.367,+13 -tf_efficientnet_cc_b0_8e,31.087,68.913,50.761,49.239,24.01,224,0.875,bicubic,-46.821,-42.892,+121 -ecaresnet50d,31.058,68.942,50.848,49.152,25.58,224,0.875,bicubic,-49.534,-44.472,-1 -ecaresnet50t,31.058,68.942,50.577,49.423,25.57,320,0.950,bicubic,-51.288,-45.561,-51 -resnet50d,31.020,68.980,49.808,50.192,25.58,224,0.875,bicubic,-49.510,-45.352,-2 -cspdarknet53,31.018,68.981,50.390,49.610,27.64,256,0.887,bilinear,-49.040,-44.694,+21 -gluon_resnet152_v1c,30.991,69.009,48.924,51.076,60.21,224,0.875,bicubic,-48.919,-45.916,+26 -gluon_resnext101_64x4d,30.987,69.013,48.549,51.451,83.46,224,0.875,bicubic,-49.617,-46.439,-7 -tf_efficientnet_cc_b1_8e,30.899,69.101,50.080,49.920,39.72,240,0.882,bicubic,-48.409,-44.290,+52 -ecaresnet101d_pruned,30.897,69.103,50.013,49.987,24.88,224,0.875,bicubic,-49.921,-45.615,-14 -gluon_resnext101_32x4d,30.877,69.123,48.537,51.463,44.18,224,0.875,bicubic,-49.457,-46.389,0 -tf_efficientnet_lite4,30.830,69.170,50.386,49.614,13.01,380,0.920,bilinear,-50.706,-45.282,-40 -nf_resnet50,30.775,69.225,50.074,49.926,25.56,288,0.940,bicubic,-49.919,-45.282,-14 -dpn107,30.678,69.322,48.810,51.190,86.92,224,0.875,bicubic,-49.478,-46.100,+10 -ese_vovnet39b,30.657,69.343,49.875,50.125,24.57,224,0.875,bicubic,-48.663,-44.837,+44 -gluon_resnet152_v1b,30.623,69.376,48.521,51.479,60.19,224,0.875,bicubic,-49.063,-46.215,+31 -tresnet_xl_448,30.614,69.386,49.069,50.931,78.44,448,0.875,bilinear,-52.436,-47.105,-76 -ssl_resnext50_32x4d,30.594,69.406,50.657,49.343,25.03,224,0.875,bilinear,-49.724,-44.749,-6 -gluon_resnet101_v1d,30.523,69.477,47.950,52.050,44.57,224,0.875,bicubic,-49.891,-47.064,-10 -dpn68b,30.517,69.483,49.158,50.842,12.61,224,0.875,bicubic,-48.699,-45.256,+51 -resnest26d,30.490,69.510,50.677,49.323,17.07,224,0.875,bilinear,-47.988,-43.621,+77 -efficientnet_b2,30.435,69.565,49.698,50.302,9.11,288,1.000,bicubic,-50.177,-45.620,-22 -tf_efficientnet_b1_ap,30.421,69.579,49.553,50.447,7.79,240,0.882,bicubic,-48.859,-44.753,+44 -pit_xs_distilled_224,30.278,69.722,49.836,50.164,11.00,224,0.900,bicubic,-49.028,-44.528,+40 -seresnet50,30.077,69.923,49.292,50.708,28.09,224,0.875,bicubic,-50.197,-45.778,-8 -dpn98,30.067,69.933,48.244,51.756,61.57,224,0.875,bicubic,-49.575,-46.354,+23 -tf_efficientnet_b2,30.026,69.974,49.581,50.419,9.11,260,0.890,bicubic,-50.060,-45.328,-1 -dpn131,30.024,69.976,48.146,51.854,79.25,224,0.875,bicubic,-49.798,-46.564,+12 -efficientnet_el,30.018,69.982,48.834,51.166,10.59,300,0.904,bicubic,-51.298,-46.692,-49 -legacy_senet154,30.001,69.999,48.034,51.966,115.09,224,0.875,bilinear,-51.309,-47.462,-49 -dpn92,29.953,70.047,49.162,50.838,37.67,224,0.875,bicubic,-50.055,-45.674,-2 +ssl_resnext101_32x8d,34.017,65.983,55.601,44.399,88.79,224,0.875,bilinear,-47.599,-40.437,+24 +tf_efficientnet_b6,33.998,66.002,54.544,45.456,43.04,528,0.942,bicubic,-50.112,-42.342,-44 +efficientnet_b3_pruned,33.996,66.004,54.108,45.892,9.86,300,0.904,bicubic,-46.862,-41.134,+50 +regnety_160,33.976,66.024,53.546,46.454,83.59,288,1.000,bicubic,-49.710,-43.230,-34 +pit_s_distilled_224,33.939,66.061,53.265,46.735,24.04,224,0.900,bicubic,-48.057,-42.533,+10 +resnet51q,33.563,66.437,53.021,46.979,35.70,288,1.000,bilinear,-48.797,-43.159,-6 +regnety_032,33.412,66.588,52.754,47.246,19.44,288,1.000,bicubic,-49.312,-43.670,-15 +gernet_l,33.357,66.643,51.901,48.099,31.08,256,0.875,bilinear,-47.997,-43.635,+27 +tresnet_xl,33.257,66.743,52.294,47.706,78.44,224,0.875,bilinear,-48.797,-43.642,+4 +resnest50d_1s4x24d,33.147,66.853,52.839,47.161,25.68,224,0.875,bicubic,-47.841,-42.483,+37 +twins_pcpvt_base,33.021,66.979,52.485,47.515,43.83,224,0.900,bicubic,-49.687,-43.861,-18 +rexnet_200,32.987,67.013,52.939,47.061,16.37,224,0.875,bicubic,-48.645,-42.729,+12 +resnest50d,32.972,67.028,52.713,47.287,27.48,224,0.875,bilinear,-48.002,-42.665,+35 +tf_efficientnetv2_s,32.915,67.085,51.726,48.274,21.46,384,1.000,bicubic,-50.979,-44.972,-50 +convit_small,32.913,67.087,52.123,47.877,27.78,224,0.875,bicubic,-48.513,-43.621,+17 +tf_efficientnet_b3,32.860,67.140,52.950,47.050,12.23,300,0.904,bicubic,-48.776,-42.768,+7 +pnasnet5large,32.848,67.152,50.500,49.500,86.06,331,0.911,bicubic,-49.934,-45.540,-27 +twins_svt_base,32.836,67.164,51.559,48.441,56.07,224,0.900,bicubic,-50.300,-44.859,-36 +nasnetalarge,32.775,67.225,50.141,49.859,88.75,331,0.911,bicubic,-49.845,-45.906,-24 +gernet_m,32.740,67.260,51.913,48.087,21.14,224,0.875,bilinear,-47.992,-43.271,+36 +inception_resnet_v2,32.738,67.262,50.648,49.352,55.84,299,0.897,bicubic,-47.720,-44.658,+45 +gluon_resnet152_v1d,32.734,67.266,51.088,48.912,60.21,224,0.875,bicubic,-47.740,-44.118,+42 +pit_b_224,32.718,67.282,49.852,50.148,73.76,224,0.900,bicubic,-49.728,-45.858,-25 +tf_efficientnet_b2_ap,32.681,67.319,52.239,47.761,9.11,260,0.890,bicubic,-47.619,-42.789,+50 +tresnet_l,32.559,67.441,51.139,48.861,55.99,224,0.875,bilinear,-48.931,-44.485,+5 +cait_xxs36_384,32.549,67.451,52.233,47.767,17.37,384,1.000,bicubic,-49.645,-43.915,-17 +vit_base_patch32_384,32.461,67.539,52.444,47.556,88.30,384,1.000,bicubic,-49.191,-43.684,-5 +wide_resnet50_2,32.439,67.561,51.459,48.541,68.88,224,0.875,bicubic,-49.017,-44.073,+3 +resnetv2_50x3_bitm,32.410,67.590,54.314,45.686,217.32,480,1.000,bilinear,-51.374,-42.792,-61 +ens_adv_inception_resnet_v2,32.370,67.629,50.427,49.573,55.84,299,0.897,bicubic,-47.611,-44.509,+59 +vit_deit_base_patch16_224,32.363,67.637,51.011,48.989,86.57,224,0.900,bicubic,-49.635,-44.723,-17 +swin_small_patch4_window7_224,32.341,67.659,50.905,49.095,49.61,224,0.900,bicubic,-50.871,-45.417,-52 +gluon_resnet152_v1s,32.331,67.669,50.526,49.474,60.32,224,0.875,bicubic,-48.685,-44.886,+13 +vit_deit_small_distilled_patch16_224,32.284,67.716,52.102,47.898,22.44,224,0.900,bicubic,-48.916,-43.276,+7 +gluon_seresnext101_64x4d,32.205,67.795,50.319,49.681,88.23,224,0.875,bicubic,-48.689,-44.989,+17 +coat_lite_small,32.127,67.873,49.934,50.066,19.84,224,0.900,bicubic,-50.181,-45.916,-34 +gluon_seresnext101_32x4d,32.107,67.893,51.237,48.763,48.96,224,0.875,bicubic,-48.797,-44.057,+14 +vit_deit_base_patch16_384,31.989,68.011,50.547,49.453,86.86,384,1.000,bicubic,-51.117,-45.825,-55 +seresnext50_32x4d,31.985,68.015,51.231,48.769,27.56,224,0.875,bicubic,-49.281,-44.389,0 +levit_384,31.877,68.123,50.598,49.402,39.13,224,0.900,bicubic,-50.709,-45.418,-43 +resnetrs101,31.858,68.142,51.017,48.983,63.62,288,0.940,bicubic,-50.430,-44.991,-36 +cspresnext50,31.822,68.178,51.602,48.398,20.57,224,0.875,bilinear,-48.218,-43.342,+45 +eca_nfnet_l0,31.657,68.343,51.654,48.346,24.14,288,1.000,bicubic,-50.931,-44.820,-47 +tnt_s_patch16_224,31.643,68.357,51.143,48.857,23.76,224,0.900,bicubic,-49.875,-44.605,-17 +resnet50,31.547,68.453,50.170,49.830,25.56,224,0.875,bicubic,-47.491,-44.220,+96 +ssl_resnext101_32x4d,31.423,68.577,52.121,47.879,44.18,224,0.875,bilinear,-49.501,-43.607,+4 +inception_v4,31.378,68.622,49.244,50.756,42.68,299,0.875,bicubic,-48.790,-45.724,+35 +rexnet_150,31.366,68.634,51.288,48.712,9.73,224,0.875,bicubic,-48.944,-43.878,+23 +pit_s_224,31.333,68.667,49.661,50.339,23.46,224,0.900,bicubic,-49.761,-45.671,-5 +cait_xxs36_224,31.278,68.722,50.616,49.384,17.30,224,1.000,bicubic,-48.472,-44.250,+53 +cspresnet50,31.270,68.730,51.223,48.777,21.62,256,0.887,bilinear,-48.304,-43.489,+60 +coat_mini,31.203,68.797,49.773,50.227,10.34,224,0.900,bicubic,-50.065,-45.619,-14 +ecaresnetlight,31.121,68.879,50.243,49.757,30.16,224,0.875,bicubic,-49.341,-45.007,+12 +gluon_resnet101_v1s,31.115,68.885,49.793,50.207,44.67,224,0.875,bicubic,-49.187,-45.367,+18 +tf_efficientnet_cc_b0_8e,31.087,68.913,50.761,49.239,24.01,224,0.875,bicubic,-46.821,-42.892,+132 +ecaresnet50d,31.058,68.942,50.848,49.152,25.58,224,0.875,bicubic,-49.534,-44.472,+4 +ecaresnet50t,31.058,68.942,50.577,49.423,25.57,320,0.950,bicubic,-51.288,-45.561,-56 +resnet50d,31.020,68.980,49.808,50.192,25.58,224,0.875,bicubic,-49.510,-45.352,+3 +cspdarknet53,31.018,68.981,50.390,49.610,27.64,256,0.887,bilinear,-49.040,-44.694,+27 +gluon_resnet152_v1c,30.991,69.009,48.924,51.076,60.21,224,0.875,bicubic,-48.919,-45.916,+32 +gluon_resnext101_64x4d,30.987,69.013,48.549,51.451,83.46,224,0.875,bicubic,-49.617,-46.439,-2 +twins_svt_small,30.985,69.015,49.223,50.777,24.06,224,0.900,bicubic,-50.697,-46.447,-41 +tf_efficientnet_cc_b1_8e,30.899,69.101,50.080,49.920,39.72,240,0.882,bicubic,-48.409,-44.290,+59 +ecaresnet101d_pruned,30.897,69.103,50.013,49.987,24.88,224,0.875,bicubic,-49.921,-45.615,-10 +gluon_resnext101_32x4d,30.877,69.123,48.537,51.463,44.18,224,0.875,bicubic,-49.457,-46.389,+4 +tf_efficientnetv2_b3,30.861,69.139,49.814,50.186,14.36,300,0.904,bicubic,-51.109,-45.968,-50 +tf_efficientnet_lite4,30.830,69.170,50.386,49.614,13.01,380,0.920,bilinear,-50.706,-45.282,-41 +nf_resnet50,30.775,69.225,50.074,49.926,25.56,288,0.940,bicubic,-49.919,-45.282,-11 +dpn107,30.678,69.322,48.810,51.190,86.92,224,0.875,bicubic,-49.478,-46.100,+14 +ese_vovnet39b,30.657,69.343,49.875,50.125,24.57,224,0.875,bicubic,-48.663,-44.837,+50 +gluon_resnet152_v1b,30.623,69.376,48.521,51.479,60.19,224,0.875,bicubic,-49.063,-46.215,+36 +tresnet_xl_448,30.614,69.386,49.069,50.931,78.44,448,0.875,bilinear,-52.436,-47.105,-86 +ssl_resnext50_32x4d,30.594,69.406,50.657,49.343,25.03,224,0.875,bilinear,-49.724,-44.749,-3 +gluon_resnet101_v1d,30.523,69.477,47.950,52.050,44.57,224,0.875,bicubic,-49.891,-47.064,-7 +dpn68b,30.517,69.483,49.158,50.842,12.61,224,0.875,bicubic,-48.699,-45.256,+57 +resnest26d,30.490,69.510,50.677,49.323,17.07,224,0.875,bilinear,-47.988,-43.621,+84 +efficientnet_b2,30.435,69.565,49.698,50.302,9.11,288,1.000,bicubic,-50.177,-45.620,-19 +tf_efficientnet_b1_ap,30.421,69.579,49.553,50.447,7.79,240,0.882,bicubic,-48.859,-44.753,+50 +twins_pcpvt_small,30.382,69.618,49.386,50.614,24.11,224,0.900,bicubic,-50.706,-46.256,-34 +visformer_small,30.329,69.671,48.285,51.715,40.22,224,0.900,bicubic,-51.777,-47.587,-68 +pit_xs_distilled_224,30.278,69.722,49.836,50.164,11.00,224,0.900,bicubic,-49.028,-44.528,+44 +seresnet50,30.077,69.923,49.292,50.708,28.09,224,0.875,bicubic,-50.197,-45.778,-7 +dpn98,30.067,69.933,48.244,51.756,61.57,224,0.875,bicubic,-49.575,-46.354,+26 +tf_efficientnet_b2,30.026,69.974,49.581,50.419,9.11,260,0.890,bicubic,-50.060,-45.328,+1 +dpn131,30.024,69.976,48.146,51.854,79.25,224,0.875,bicubic,-49.798,-46.564,+15 +efficientnet_el,30.018,69.982,48.834,51.166,10.59,300,0.904,bicubic,-51.298,-46.692,-50 +legacy_senet154,30.001,69.999,48.034,51.966,115.09,224,0.875,bilinear,-51.309,-47.462,-50 +dpn92,29.953,70.047,49.162,50.838,37.67,224,0.875,bicubic,-50.055,-45.674,0 gluon_senet154,29.877,70.123,47.894,52.106,115.09,224,0.875,bicubic,-51.357,-47.454,-49 -xception,29.865,70.135,48.686,51.314,22.86,299,0.897,bicubic,-49.187,-45.706,+46 -adv_inception_v3,29.816,70.184,47.847,52.153,23.83,299,0.875,bicubic,-47.766,-45.889,+100 -gluon_xception65,29.784,70.216,47.755,52.245,39.92,299,0.903,bicubic,-49.932,-47.105,+11 -resnetblur50,29.625,70.375,48.248,51.752,25.56,224,0.875,bicubic,-49.661,-46.390,+30 -efficientnet_em,29.486,70.514,48.946,51.054,6.90,240,0.882,bicubic,-49.766,-45.848,+31 -resnext101_32x8d,29.439,70.561,48.486,51.514,88.79,224,0.875,bilinear,-49.869,-46.032,+22 -coat_lite_mini,29.433,70.567,47.724,52.276,11.01,224,0.900,bicubic,-49.655,-46.880,+36 -ssl_resnet50,29.423,70.577,49.781,50.219,25.56,224,0.875,bilinear,-49.799,-45.051,+29 -vit_deit_small_patch16_224,29.421,70.579,48.256,51.744,22.05,224,0.900,bicubic,-50.435,-46.796,-3 -nf_regnet_b1,29.397,70.603,49.445,50.555,10.22,288,0.900,bicubic,-49.909,-45.303,+21 -cait_xxs24_384,29.387,70.612,48.753,51.247,12.03,384,1.000,bicubic,-51.578,-46.893,-52 -swin_tiny_patch4_window7_224,29.334,70.666,47.602,52.398,28.29,224,0.900,bicubic,-52.044,-47.938,-66 -resnext50_32x4d,29.331,70.669,47.397,52.603,25.03,224,0.875,bicubic,-50.438,-47.201,-3 -resnet34d,29.328,70.671,48.409,51.591,21.82,224,0.875,bicubic,-47.788,-44.973,+102 -cait_xxs24_224,29.303,70.697,48.535,51.465,11.96,224,1.000,bicubic,-49.083,-45.775,+56 -ecaresnet50d_pruned,29.215,70.785,48.453,51.547,19.94,224,0.875,bicubic,-50.501,-46.427,-3 -tresnet_l_448,29.165,70.835,47.232,52.768,55.99,448,0.875,bilinear,-53.103,-48.744,-94 -gluon_inception_v3,29.124,70.876,46.957,53.043,23.83,299,0.875,bicubic,-49.682,-47.413,+36 -xception71,29.047,70.953,47.405,52.595,42.34,299,0.903,bicubic,-50.826,-47.517,-15 -hrnet_w64,28.989,71.011,47.142,52.858,128.06,224,0.875,bilinear,-50.485,-47.510,+3 -resnetv2_101x1_bitm,28.910,71.090,49.502,50.498,44.54,480,1.000,bilinear,-53.302,-46.970,-96 -tf_efficientnet_b0_ns,28.902,71.098,49.011,50.989,5.29,224,0.875,bicubic,-49.756,-45.365,+39 -xception65,28.896,71.104,47.167,52.833,39.92,299,0.903,bicubic,-50.656,-47.487,-3 -tf_efficientnet_b1,28.886,71.114,47.503,52.497,7.79,240,0.882,bicubic,-49.940,-46.695,+29 -gluon_resnet101_v1b,28.878,71.121,46.389,53.611,44.55,224,0.875,bicubic,-50.427,-48.135,+5 -skresnext50_32x4d,28.818,71.182,46.497,53.503,27.48,224,0.875,bicubic,-51.338,-48.145,-33 -tf_efficientnet_lite3,28.660,71.340,47.354,52.646,8.20,300,0.904,bilinear,-51.160,-47.560,-18 -gluon_seresnext50_32x4d,28.651,71.349,46.436,53.564,27.56,224,0.875,bicubic,-51.267,-48.386,-29 -skresnet34,28.645,71.355,47.953,52.047,22.28,224,0.875,bicubic,-48.267,-45.369,+95 -hrnet_w40,28.641,71.359,47.454,52.546,57.56,224,0.875,bilinear,-50.279,-47.016,+20 -tv_resnet152,28.533,71.467,47.118,52.882,60.19,224,0.875,bilinear,-49.779,-46.920,+43 -repvgg_b2,28.427,71.573,47.038,52.962,89.02,224,0.875,bilinear,-50.365,-47.376,+24 -hrnet_w48,28.413,71.587,47.586,52.414,77.47,224,0.875,bilinear,-50.887,-46.926,0 -gluon_resnext50_32x4d,28.375,71.624,45.328,54.672,25.03,224,0.875,bicubic,-50.978,-49.098,-8 +xception,29.865,70.135,48.686,51.314,22.86,299,0.897,bicubic,-49.187,-45.706,+50 +adv_inception_v3,29.816,70.184,47.847,52.153,23.83,299,0.875,bicubic,-47.766,-45.889,+107 +gluon_xception65,29.784,70.216,47.755,52.245,39.92,299,0.903,bicubic,-49.932,-47.105,+14 +resnetblur50,29.625,70.375,48.248,51.752,25.56,224,0.875,bicubic,-49.661,-46.390,+34 +efficientnet_em,29.486,70.514,48.946,51.054,6.90,240,0.882,bicubic,-49.766,-45.848,+35 +resnext101_32x8d,29.439,70.561,48.486,51.514,88.79,224,0.875,bilinear,-49.869,-46.032,+26 +coat_lite_mini,29.433,70.567,47.724,52.276,11.01,224,0.900,bicubic,-49.655,-46.880,+40 +ssl_resnet50,29.423,70.577,49.781,50.219,25.56,224,0.875,bilinear,-49.799,-45.051,+33 +vit_deit_small_patch16_224,29.421,70.579,48.256,51.744,22.05,224,0.900,bicubic,-50.435,-46.796,-1 +nf_regnet_b1,29.397,70.603,49.445,50.555,10.22,288,0.900,bicubic,-49.909,-45.303,+25 +cait_xxs24_384,29.387,70.612,48.753,51.247,12.03,384,1.000,bicubic,-51.578,-46.893,-51 +swin_tiny_patch4_window7_224,29.334,70.666,47.602,52.398,28.29,224,0.900,bicubic,-52.044,-47.938,-67 +resnext50_32x4d,29.331,70.669,47.397,52.603,25.03,224,0.875,bicubic,-50.438,-47.201,0 +resnet34d,29.328,70.671,48.409,51.591,21.82,224,0.875,bicubic,-47.788,-44.973,+109 +cait_xxs24_224,29.303,70.697,48.535,51.465,11.96,224,1.000,bicubic,-49.083,-45.775,+62 +ecaresnet50d_pruned,29.215,70.785,48.453,51.547,19.94,224,0.875,bicubic,-50.501,-46.427,0 +tresnet_l_448,29.165,70.835,47.232,52.768,55.99,448,0.875,bilinear,-53.103,-48.744,-100 +gluon_inception_v3,29.122,70.878,46.957,53.043,23.83,299,0.875,bicubic,-49.684,-47.413,+40 +xception71,29.047,70.953,47.405,52.595,42.34,299,0.903,bicubic,-50.826,-47.517,-13 +hrnet_w64,28.989,71.011,47.142,52.858,128.06,224,0.875,bilinear,-50.485,-47.510,+6 +resnetv2_101x1_bitm,28.910,71.090,49.502,50.498,44.54,480,1.000,bilinear,-53.302,-46.970,-102 +tf_efficientnet_b0_ns,28.902,71.098,49.011,50.989,5.29,224,0.875,bicubic,-49.756,-45.365,+43 +xception65,28.896,71.104,47.167,52.833,39.92,299,0.903,bicubic,-50.656,-47.487,0 +tf_efficientnet_b1,28.886,71.114,47.503,52.497,7.79,240,0.882,bicubic,-49.940,-46.695,+33 +gluon_resnet101_v1b,28.878,71.121,46.389,53.611,44.55,224,0.875,bicubic,-50.427,-48.135,+9 +skresnext50_32x4d,28.818,71.182,46.497,53.503,27.48,224,0.875,bicubic,-51.338,-48.145,-31 +levit_256,28.745,71.255,46.723,53.277,18.89,224,0.900,bicubic,-52.765,-48.767,-88 +tf_efficientnet_lite3,28.660,71.340,47.354,52.646,8.20,300,0.904,bilinear,-51.160,-47.560,-16 +gluon_seresnext50_32x4d,28.651,71.349,46.436,53.564,27.56,224,0.875,bicubic,-51.267,-48.386,-28 +skresnet34,28.645,71.355,47.953,52.047,22.28,224,0.875,bicubic,-48.267,-45.369,+101 +hrnet_w40,28.641,71.359,47.454,52.546,57.56,224,0.875,bilinear,-50.279,-47.016,+23 +tf_efficientnetv2_b0,28.566,71.434,47.079,52.921,7.14,224,0.875,bicubic,-49.790,-46.945,+48 +tv_resnet152,28.533,71.467,47.118,52.882,60.19,224,0.875,bilinear,-49.779,-46.920,+48 +repvgg_b2,28.427,71.573,47.038,52.962,89.02,224,0.875,bilinear,-50.365,-47.376,+26 +hrnet_w48,28.413,71.587,47.586,52.414,77.47,224,0.875,bilinear,-50.887,-46.926,+2 +gluon_resnext50_32x4d,28.375,71.624,45.328,54.672,25.03,224,0.875,bicubic,-50.978,-49.098,-6 efficientnet_b2_pruned,28.362,71.638,47.051,52.949,8.31,260,0.890,bicubic,-51.554,-47.805,-35 -tf_efficientnet_b0_ap,28.346,71.654,47.531,52.469,5.29,224,0.875,bicubic,-48.740,-45.725,+82 -tf_efficientnet_cc_b0_4e,28.315,71.685,47.364,52.636,13.31,224,0.875,bicubic,-48.991,-45.970,+74 -dla102x2,28.313,71.687,46.761,53.239,41.28,224,0.875,bilinear,-51.135,-47.879,-14 -dla169,28.313,71.687,47.391,52.609,53.39,224,0.875,bilinear,-50.375,-46.945,+20 -mixnet_xl,28.287,71.713,46.702,53.298,11.90,224,0.875,bicubic,-52.189,-48.234,-67 -gluon_resnet50_v1d,28.246,71.754,45.878,54.122,25.58,224,0.875,bicubic,-50.828,-48.592,+4 -wide_resnet101_2,28.108,71.892,46.401,53.599,126.89,224,0.875,bilinear,-50.748,-47.881,+10 -gluon_resnet101_v1c,28.104,71.896,45.961,54.039,44.57,224,0.875,bicubic,-51.430,-48.617,-22 -regnetx_320,28.093,71.907,45.126,54.874,107.81,224,0.875,bicubic,-52.153,-49.900,-57 -densenet161,28.081,71.919,46.641,53.359,28.68,224,0.875,bicubic,-49.277,-46.997,+65 -regnety_320,28.059,71.941,45.444,54.556,145.05,224,0.875,bicubic,-52.753,-49.800,-81 -gernet_s,28.022,71.978,46.723,53.277,8.17,224,0.875,bilinear,-48.894,-46.409,+76 -efficientnet_el_pruned,28.016,71.984,46.790,53.210,10.59,300,0.904,bicubic,-52.284,-48.428,-65 -xception41,27.888,72.112,45.890,54.110,26.97,299,0.903,bicubic,-50.628,-48.388,+14 -regnetx_160,27.817,72.183,45.617,54.383,54.28,224,0.875,bicubic,-52.039,-49.213,-45 -tf_inception_v3,27.782,72.218,45.719,54.281,23.83,299,0.875,bicubic,-50.074,-47.921,+44 -res2net101_26w_4s,27.768,72.232,45.179,54.821,45.21,224,0.875,bilinear,-51.430,-49.253,-11 -repvgg_b1,27.656,72.344,46.531,53.469,57.42,224,0.875,bilinear,-50.710,-47.567,+20 +tf_efficientnet_b0_ap,28.346,71.654,47.531,52.469,5.29,224,0.875,bicubic,-48.740,-45.725,+87 +tf_efficientnet_cc_b0_4e,28.315,71.685,47.364,52.636,13.31,224,0.875,bicubic,-48.991,-45.970,+79 +dla169,28.313,71.687,47.391,52.609,53.39,224,0.875,bilinear,-50.375,-46.945,+22 +dla102x2,28.313,71.687,46.761,53.239,41.28,224,0.875,bilinear,-51.135,-47.879,-12 +mixnet_xl,28.287,71.713,46.702,53.298,11.90,224,0.875,bicubic,-52.189,-48.234,-68 +gluon_resnet50_v1d,28.246,71.754,45.878,54.122,25.58,224,0.875,bicubic,-50.828,-48.592,+6 +wide_resnet101_2,28.108,71.892,46.401,53.599,126.89,224,0.875,bilinear,-50.748,-47.881,+12 +gluon_resnet101_v1c,28.104,71.896,45.961,54.039,44.57,224,0.875,bicubic,-51.430,-48.617,-21 +regnetx_320,28.093,71.907,45.126,54.874,107.81,224,0.875,bicubic,-52.153,-49.900,-58 +densenet161,28.081,71.919,46.641,53.359,28.68,224,0.875,bicubic,-49.277,-46.997,+70 +regnety_320,28.059,71.941,45.444,54.556,145.05,224,0.875,bicubic,-52.753,-49.800,-82 +gernet_s,28.022,71.978,46.723,53.277,8.17,224,0.875,bilinear,-48.894,-46.409,+81 +efficientnet_el_pruned,28.016,71.984,46.790,53.210,10.59,300,0.904,bicubic,-52.284,-48.428,-66 +levit_192,28.016,71.984,45.880,54.120,10.95,224,0.900,bicubic,-51.826,-48.906,-41 +xception41,27.888,72.112,45.890,54.110,26.97,299,0.903,bicubic,-50.628,-48.388,+15 +regnetx_160,27.817,72.183,45.617,54.383,54.28,224,0.875,bicubic,-52.039,-49.213,-46 +tf_inception_v3,27.780,72.220,45.721,54.279,23.83,299,0.875,bicubic,-50.078,-48.149,+46 +res2net101_26w_4s,27.768,72.232,45.179,54.821,45.21,224,0.875,bilinear,-51.430,-49.253,-10 +tf_efficientnetv2_b1,27.760,72.240,46.578,53.422,8.14,240,0.882,bicubic,-51.702,-48.144,-29 +repvgg_b1,27.656,72.344,46.531,53.469,57.42,224,0.875,bilinear,-50.710,-47.567,+22 hrnet_w44,27.621,72.379,45.837,54.163,67.06,224,0.875,bilinear,-51.275,-48.531,-3 -inception_v3,27.556,72.444,45.263,54.737,23.83,299,0.875,bicubic,-49.882,-48.213,+52 -pit_xs_224,27.491,72.509,45.900,54.100,10.62,224,0.900,bicubic,-50.691,-48.268,+23 +inception_v3,27.556,72.444,45.263,54.737,23.83,299,0.875,bicubic,-49.882,-48.213,+55 +pit_xs_224,27.491,72.509,45.900,54.100,10.62,224,0.900,bicubic,-50.691,-48.268,+26 regnetx_080,27.405,72.595,45.002,54.998,39.57,224,0.875,bicubic,-51.789,-49.558,-15 -hrnet_w30,27.381,72.619,46.554,53.446,37.71,224,0.875,bilinear,-50.825,-47.668,+20 -hrnet_w32,27.369,72.631,45.994,54.006,41.23,224,0.875,bilinear,-51.081,-48.192,+9 -gluon_resnet50_v1s,27.326,72.674,45.222,54.778,25.68,224,0.875,bicubic,-51.386,-49.016,-2 -densenet201,27.265,72.735,46.222,53.778,20.01,224,0.875,bicubic,-50.021,-47.256,+51 -densenetblur121d,27.228,72.772,46.299,53.701,8.00,224,0.875,bicubic,-49.360,-46.893,+70 -regnety_064,27.220,72.780,44.847,55.153,30.58,224,0.875,bicubic,-52.502,-49.921,-51 -efficientnet_b1_pruned,27.181,72.819,45.872,54.128,6.33,240,0.882,bicubic,-51.055,-47.962,+13 -resnetrs50,27.110,72.890,45.029,54.971,35.69,224,0.910,bicubic,-52.782,-49.939,-63 -rexnet_130,27.094,72.906,45.933,54.067,7.56,224,0.875,bicubic,-52.406,-48.749,-44 -vit_small_patch16_224,27.086,72.914,45.701,54.299,48.75,224,0.900,bicubic,-50.772,-47.715,+27 -res2net50_26w_8s,27.078,72.921,44.428,55.572,48.40,224,0.875,bilinear,-52.119,-49.940,-28 -dla102x,27.061,72.939,45.475,54.525,26.31,224,0.875,bilinear,-51.449,-48.753,-5 -tv_resnet101,26.963,73.037,45.234,54.766,44.55,224,0.875,bilinear,-50.411,-48.306,+39 -resnext50d_32x4d,26.876,73.124,44.436,55.564,25.05,224,0.875,bicubic,-52.800,-50.430,-55 -regnetx_120,26.868,73.132,44.682,55.318,46.11,224,0.875,bicubic,-52.728,-50.056,-54 -rexnet_100,26.831,73.169,45.369,54.631,4.80,224,0.875,bicubic,-51.027,-48.501,+20 -densenet169,26.829,73.171,45.373,54.627,14.15,224,0.875,bicubic,-49.077,-47.653,+67 -legacy_seresnext101_32x4d,26.811,73.189,43.497,56.503,48.96,224,0.875,bilinear,-53.417,-51.521,-87 -regnety_120,26.788,73.212,44.454,55.546,51.82,224,0.875,bicubic,-53.578,-50.672,-98 -regnetx_064,26.784,73.216,44.927,55.073,26.21,224,0.875,bicubic,-52.288,-49.531,-31 -regnetx_032,26.703,73.297,45.236,54.764,15.30,224,0.875,bicubic,-51.469,-48.852,+2 -legacy_seresnet152,26.676,73.324,43.947,56.053,66.82,224,0.875,bilinear,-51.984,-50.423,-19 -densenet121,26.664,73.336,45.900,54.100,7.98,224,0.875,bicubic,-48.914,-46.752,+65 -efficientnet_es,26.621,73.379,45.112,54.888,5.44,224,0.875,bicubic,-51.445,-48.814,+3 -res2net50_26w_6s,26.595,73.405,43.990,56.010,37.05,224,0.875,bilinear,-51.975,-50.134,-20 -repvgg_b1g4,26.579,73.421,45.084,54.916,39.97,224,0.875,bilinear,-51.015,-48.742,+18 -dla60x,26.552,73.448,45.023,54.977,17.35,224,0.875,bilinear,-51.694,-48.995,-9 -regnety_080,26.524,73.476,44.359,55.641,39.18,224,0.875,bicubic,-53.352,-50.471,-82 -coat_lite_tiny,26.507,73.493,44.644,55.356,5.72,224,0.900,bicubic,-51.005,-49.272,+19 -tf_efficientnet_b0,26.485,73.515,45.646,54.354,5.29,224,0.875,bicubic,-50.363,-47.582,+38 -res2net50_14w_8s,26.483,73.517,44.371,55.629,25.06,224,0.875,bilinear,-51.667,-49.477,-7 -mobilenetv3_large_100_miil,26.481,73.519,44.473,55.527,5.48,224,0.875,bilinear,-51.435,-48.437,+1 -gluon_resnet50_v1b,26.436,73.564,44.035,55.965,25.56,224,0.875,bicubic,-51.144,-49.681,+13 -tf_efficientnet_el,26.357,73.643,44.175,55.825,10.59,300,0.904,bicubic,-53.893,-50.953,-105 -regnetx_040,26.243,73.757,44.438,55.562,22.12,224,0.875,bicubic,-52.239,-49.806,-27 -dpn68,26.129,73.871,44.228,55.772,12.61,224,0.875,bicubic,-50.189,-48.750,+43 -efficientnet_b1,26.061,73.939,44.080,55.920,7.79,256,1.000,bicubic,-52.733,-50.262,-39 -hrnet_w18,25.986,74.014,44.813,55.187,21.30,224,0.875,bilinear,-50.772,-48.631,+32 -hardcorenas_f,25.951,74.049,44.220,55.780,8.20,224,0.875,bilinear,-52.153,-49.582,-13 -regnety_040,25.923,74.077,43.848,56.152,20.65,224,0.875,bicubic,-53.297,-50.808,-59 -resnet34,25.888,74.112,43.982,56.018,21.80,224,0.875,bilinear,-49.222,-48.302,+57 -res2net50_26w_4s,25.866,74.134,43.155,56.845,25.70,224,0.875,bilinear,-52.098,-50.699,-10 -tresnet_m_448,25.852,74.148,42.874,57.126,31.39,448,0.875,bilinear,-55.862,-52.698,-165 +hrnet_w30,27.381,72.619,46.554,53.446,37.71,224,0.875,bilinear,-50.825,-47.668,+23 +hrnet_w32,27.369,72.631,45.994,54.006,41.23,224,0.875,bilinear,-51.081,-48.192,+10 +gluon_resnet50_v1s,27.326,72.674,45.222,54.778,25.68,224,0.875,bicubic,-51.384,-49.016,-2 +densenet201,27.265,72.735,46.222,53.778,20.01,224,0.875,bicubic,-50.021,-47.256,+54 +densenetblur121d,27.228,72.772,46.299,53.701,8.00,224,0.875,bicubic,-49.360,-46.893,+73 +regnety_064,27.220,72.780,44.847,55.153,30.58,224,0.875,bicubic,-52.502,-49.921,-52 +efficientnet_b1_pruned,27.181,72.819,45.872,54.128,6.33,240,0.882,bicubic,-51.055,-47.962,+16 +tf_efficientnetv2_b2,27.163,72.837,44.570,55.430,10.10,260,0.890,bicubic,-53.045,-50.472,-78 +resnetrs50,27.110,72.890,45.029,54.971,35.69,224,0.910,bicubic,-52.782,-49.939,-66 +rexnet_130,27.094,72.906,45.933,54.067,7.56,224,0.875,bicubic,-52.406,-48.749,-46 +vit_small_patch16_224,27.086,72.914,45.701,54.299,48.75,224,0.900,bicubic,-50.772,-47.939,+29 +res2net50_26w_8s,27.078,72.921,44.428,55.572,48.40,224,0.875,bilinear,-52.119,-49.940,-29 +dla102x,27.061,72.939,45.475,54.525,26.31,224,0.875,bilinear,-51.449,-48.753,-6 +tv_resnet101,26.963,73.037,45.234,54.766,44.55,224,0.875,bilinear,-50.411,-48.306,+41 +resnext50d_32x4d,26.876,73.124,44.436,55.564,25.05,224,0.875,bicubic,-52.800,-50.430,-57 +regnetx_120,26.868,73.132,44.682,55.318,46.11,224,0.875,bicubic,-52.728,-50.056,-56 +rexnet_100,26.831,73.169,45.369,54.631,4.80,224,0.875,bicubic,-51.027,-48.047,+24 +densenet169,26.829,73.171,45.373,54.627,14.15,224,0.875,bicubic,-49.077,-47.653,+70 +legacy_seresnext101_32x4d,26.811,73.189,43.497,56.503,48.96,224,0.875,bilinear,-53.417,-51.521,-91 +regnety_120,26.788,73.212,44.454,55.546,51.82,224,0.875,bicubic,-53.578,-50.672,-102 +regnetx_064,26.784,73.216,44.927,55.073,26.21,224,0.875,bicubic,-52.288,-49.531,-32 +regnetx_032,26.703,73.297,45.236,54.764,15.30,224,0.875,bicubic,-51.469,-48.852,+4 +legacy_seresnet152,26.676,73.324,43.947,56.053,66.82,224,0.875,bilinear,-51.984,-50.423,-20 +densenet121,26.664,73.336,45.900,54.100,7.98,224,0.875,bicubic,-48.914,-46.752,+68 +efficientnet_es,26.621,73.379,45.112,54.888,5.44,224,0.875,bicubic,-51.445,-48.814,+5 +res2net50_26w_6s,26.595,73.405,43.990,56.010,37.05,224,0.875,bilinear,-51.975,-50.134,-21 +repvgg_b1g4,26.579,73.421,45.084,54.916,39.97,224,0.875,bilinear,-51.015,-48.742,+20 +dla60x,26.552,73.448,45.023,54.977,17.35,224,0.875,bilinear,-51.694,-48.995,-7 +regnety_080,26.524,73.476,44.359,55.641,39.18,224,0.875,bicubic,-53.352,-50.471,-85 +coat_lite_tiny,26.507,73.493,44.644,55.356,5.72,224,0.900,bicubic,-51.005,-49.272,+21 +tf_efficientnet_b0,26.485,73.515,45.646,54.354,5.29,224,0.875,bicubic,-50.363,-47.582,+40 +res2net50_14w_8s,26.483,73.517,44.371,55.629,25.06,224,0.875,bilinear,-51.667,-49.477,-5 +mobilenetv3_large_100_miil,26.481,73.519,44.473,55.527,5.48,224,0.875,bilinear,-51.435,-48.437,+3 +gluon_resnet50_v1b,26.436,73.564,44.035,55.965,25.56,224,0.875,bicubic,-51.144,-49.681,+15 +tf_efficientnet_el,26.357,73.643,44.175,55.825,10.59,300,0.904,bicubic,-53.893,-50.953,-109 +levit_128,26.332,73.668,44.096,55.904,9.21,224,0.900,bicubic,-52.154,-49.914,-28 +regnetx_040,26.243,73.757,44.438,55.562,22.12,224,0.875,bicubic,-52.239,-49.806,-28 +dpn68,26.129,73.871,44.228,55.772,12.61,224,0.875,bicubic,-50.189,-48.750,+45 +efficientnet_b1,26.061,73.939,44.080,55.920,7.79,256,1.000,bicubic,-52.733,-50.262,-41 +hrnet_w18,25.986,74.014,44.813,55.187,21.30,224,0.875,bilinear,-50.772,-48.631,+33 +hardcorenas_f,25.951,74.049,44.220,55.780,8.20,224,0.875,bilinear,-52.153,-49.582,-12 +regnety_040,25.923,74.077,43.848,56.152,20.65,224,0.875,bicubic,-53.297,-50.808,-61 +resnet34,25.888,74.112,43.982,56.018,21.80,224,0.875,bilinear,-49.222,-48.302,+59 +res2net50_26w_4s,25.866,74.134,43.155,56.845,25.70,224,0.875,bilinear,-52.098,-50.699,-9 +tresnet_m_448,25.852,74.148,42.874,57.126,31.39,448,0.875,bilinear,-55.862,-52.698,-175 +coat_tiny,25.843,74.157,43.276,56.724,5.50,224,0.900,bicubic,-52.591,-50.761,-32 hardcorenas_c,25.815,74.185,44.772,55.228,5.52,224,0.875,bilinear,-51.239,-48.386,+18 gluon_resnet50_v1c,25.784,74.216,43.031,56.969,25.58,224,0.875,bicubic,-52.228,-50.957,-16 selecsls60,25.729,74.272,44.065,55.935,30.67,224,0.875,bicubic,-52.254,-49.764,-15 hardcorenas_e,25.662,74.338,43.412,56.588,8.07,224,0.875,bilinear,-52.132,-50.282,-8 -dla60_res2net,25.652,74.348,43.599,56.401,20.85,224,0.875,bilinear,-52.812,-50.607,-38 -dla60_res2next,25.640,74.360,43.670,56.330,17.03,224,0.875,bilinear,-52.800,-50.482,-37 -ecaresnet26t,25.538,74.462,43.660,56.340,16.01,320,0.950,bicubic,-54.316,-51.424,-100 -mixnet_l,25.512,74.488,43.455,56.545,7.33,224,0.875,bicubic,-53.464,-50.727,-59 +dla60_res2net,25.652,74.348,43.599,56.401,20.85,224,0.875,bilinear,-52.812,-50.607,-40 +dla60_res2next,25.640,74.360,43.670,56.330,17.03,224,0.875,bilinear,-52.800,-50.482,-39 +ecaresnet26t,25.538,74.462,43.660,56.340,16.01,320,0.950,bicubic,-54.316,-51.424,-105 +mixnet_l,25.512,74.488,43.455,56.545,7.33,224,0.875,bicubic,-53.464,-50.727,-62 tf_efficientnet_lite1,25.499,74.501,43.585,56.415,5.42,240,0.882,bicubic,-51.143,-49.641,+20 tv_resnext50_32x4d,25.455,74.545,42.787,57.213,25.03,224,0.875,bilinear,-52.165,-50.909,-11 -repvgg_a2,25.436,74.564,43.939,56.061,28.21,224,0.875,bilinear,-51.024,-49.065,+24 -tf_mixnet_l,25.422,74.578,42.534,57.466,7.33,224,0.875,bicubic,-53.352,-51.464,-55 +repvgg_a2,25.436,74.564,43.939,56.061,28.21,224,0.875,bilinear,-51.024,-49.065,+25 +tf_mixnet_l,25.422,74.578,42.534,57.466,7.33,224,0.875,bicubic,-53.352,-51.464,-58 hardcorenas_b,25.402,74.598,44.190,55.810,5.18,224,0.875,bilinear,-51.136,-48.564,+20 res2next50,25.389,74.611,42.508,57.492,24.67,224,0.875,bilinear,-52.857,-51.384,-38 -legacy_seresnet101,25.334,74.666,42.825,57.175,49.33,224,0.875,bilinear,-53.048,-51.439,-43 -selecsls60b,25.332,74.668,43.559,56.441,32.77,224,0.875,bicubic,-53.080,-50.615,-46 +legacy_seresnet101,25.334,74.666,42.825,57.175,49.33,224,0.875,bilinear,-53.048,-51.439,-44 +selecsls60b,25.332,74.668,43.559,56.441,32.77,224,0.875,bicubic,-53.080,-50.615,-47 dla102,25.316,74.684,43.827,56.173,33.27,224,0.875,bilinear,-52.716,-50.119,-32 hardcorenas_d,25.300,74.700,43.121,56.879,7.50,224,0.875,bilinear,-52.132,-50.363,-10 -resnest14d,25.284,74.716,44.114,55.886,10.61,224,0.875,bilinear,-50.222,-48.404,+28 -legacy_seresnext50_32x4d,25.210,74.790,41.936,58.064,27.56,224,0.875,bilinear,-53.868,-52.500,-76 +resnest14d,25.284,74.716,44.114,55.886,10.61,224,0.875,bilinear,-50.222,-48.404,+29 +legacy_seresnext50_32x4d,25.210,74.790,41.936,58.064,27.56,224,0.875,bilinear,-53.868,-52.500,-79 mixer_b16_224,25.121,74.879,41.227,58.773,59.88,224,0.875,bicubic,-51.481,-51.001,+9 res2net50_48w_2s,25.027,74.973,42.208,57.792,25.29,224,0.875,bilinear,-52.495,-51.346,-18 efficientnet_b0,25.015,74.985,42.787,57.213,5.29,224,0.875,bicubic,-52.683,-50.745,-26 -gluon_resnet34_v1b,24.939,75.061,42.243,57.757,21.80,224,0.875,bicubic,-49.649,-49.747,+40 +gluon_resnet34_v1b,24.939,75.061,42.243,57.757,21.80,224,0.875,bicubic,-49.649,-49.747,+41 mobilenetv2_120d,24.937,75.063,43.058,56.942,5.83,224,0.875,bicubic,-52.347,-50.434,-12 dla60,24.933,75.067,43.296,56.704,22.04,224,0.875,bilinear,-52.099,-50.022,-6 regnety_016,24.811,75.189,42.616,57.384,11.20,224,0.875,bicubic,-53.051,-51.104,-35 tf_efficientnet_lite2,24.530,75.470,42.280,57.720,6.09,260,0.890,bicubic,-52.938,-51.474,-22 -skresnet18,24.483,75.517,42.536,57.464,11.96,224,0.875,bicubic,-48.555,-48.632,+47 +skresnet18,24.483,75.517,42.536,57.464,11.96,224,0.875,bicubic,-48.555,-48.632,+49 regnetx_016,24.473,75.527,42.514,57.486,9.19,224,0.875,bicubic,-52.477,-50.906,-9 -pit_ti_distilled_224,24.406,75.594,42.730,57.270,5.10,224,0.900,bicubic,-50.124,-49.366,+34 -tf_efficientnet_lite0,24.373,75.627,42.487,57.513,4.65,224,0.875,bicubic,-50.457,-49.689,+27 -hardcorenas_a,24.369,75.631,43.284,56.716,5.26,224,0.875,bilinear,-51.547,-49.230,+7 -resnetv2_50x1_bitm,24.231,75.769,43.477,56.523,25.55,480,1.000,bilinear,-55.941,-52.149,-144 -tv_resnet50,24.070,75.930,41.313,58.687,25.56,224,0.875,bilinear,-52.068,-51.551,+3 +pit_ti_distilled_224,24.406,75.594,42.730,57.270,5.10,224,0.900,bicubic,-50.124,-49.366,+35 +tf_efficientnet_lite0,24.373,75.627,42.487,57.513,4.65,224,0.875,bicubic,-50.457,-49.689,+28 +hardcorenas_a,24.369,75.631,43.284,56.716,5.26,224,0.875,bilinear,-51.547,-49.230,+8 +resnetv2_50x1_bitm,24.233,75.767,43.477,56.523,25.55,480,1.000,bilinear,-55.939,-52.149,-149 +tv_resnet50,24.070,75.930,41.313,58.687,25.56,224,0.875,bilinear,-52.068,-51.551,+4 +levit_128s,24.058,75.942,41.007,58.993,7.78,224,0.900,bicubic,-52.472,-51.859,-2 legacy_seresnet34,24.027,75.973,41.909,58.091,21.96,224,0.875,bilinear,-50.781,-50.215,+24 -resnet18d,23.929,76.071,42.300,57.700,11.71,224,0.875,bicubic,-48.331,-48.396,+46 +resnet18d,23.929,76.071,42.300,57.700,11.71,224,0.875,bicubic,-48.331,-48.396,+47 efficientnet_lite0,23.909,76.091,42.088,57.912,4.65,224,0.875,bicubic,-51.575,-50.422,+10 tv_densenet121,23.844,76.156,41.925,58.075,7.98,224,0.875,bicubic,-50.894,-50.225,+22 efficientnet_es_pruned,23.828,76.172,41.995,58.005,5.44,224,0.875,bicubic,-51.172,-50.453,+18 mobilenetv2_140,23.712,76.288,41.477,58.523,6.11,224,0.875,bicubic,-52.804,-51.519,-7 -mixnet_m,23.710,76.290,41.141,58.859,5.01,224,0.875,bicubic,-53.550,-52.284,-28 +mixnet_m,23.710,76.290,41.141,58.859,5.01,224,0.875,bicubic,-53.550,-52.284,-29 dla34,23.669,76.331,41.551,58.449,15.74,224,0.875,bilinear,-50.961,-50.527,+20 -legacy_seresnet50,23.651,76.349,40.091,59.909,28.09,224,0.875,bilinear,-53.978,-53.657,-46 -ese_vovnet19b_dw,23.535,76.465,41.288,58.712,6.54,224,0.875,bicubic,-53.263,-51.980,-19 -tf_mixnet_m,23.484,76.516,40.989,59.011,5.01,224,0.875,bicubic,-53.458,-52.163,-24 +legacy_seresnet50,23.651,76.349,40.091,59.909,28.09,224,0.875,bilinear,-53.978,-53.657,-47 +ese_vovnet19b_dw,23.535,76.465,41.288,58.712,6.54,224,0.875,bicubic,-53.263,-51.980,-20 +tf_mixnet_m,23.484,76.516,40.989,59.011,5.01,224,0.875,bicubic,-53.458,-52.163,-25 tv_resnet34,23.473,76.527,41.367,58.633,21.80,224,0.875,bilinear,-49.839,-50.059,+27 -tf_efficientnet_em,23.359,76.641,40.404,59.596,6.90,240,0.882,bicubic,-54.771,-53.640,-66 -selecsls42b,23.357,76.643,40.677,59.323,32.46,224,0.875,bicubic,-53.817,-52.713,-34 +tf_efficientnet_em,23.359,76.641,40.404,59.596,6.90,240,0.882,bicubic,-54.771,-53.640,-67 +selecsls42b,23.357,76.643,40.677,59.323,32.46,224,0.875,bicubic,-53.817,-52.713,-35 repvgg_b0,23.316,76.684,41.182,58.818,15.82,224,0.875,bilinear,-51.837,-51.236,+2 mobilenetv2_110d,23.066,76.934,40.716,59.284,4.52,224,0.875,bicubic,-51.970,-51.470,+6 vit_deit_tiny_distilled_patch16_224,22.718,77.282,40.771,59.229,5.91,224,0.900,bicubic,-51.792,-51.119,+14 mobilenetv3_large_100,22.655,77.345,40.781,59.219,5.48,224,0.875,bicubic,-53.111,-51.761,-11 mobilenetv3_rw,22.630,77.370,40.374,59.626,5.48,224,0.875,bicubic,-53.004,-52.334,-10 tf_mobilenetv3_large_100,22.569,77.431,39.767,60.233,5.48,224,0.875,bilinear,-52.949,-52.839,-9 -tf_efficientnet_es,22.413,77.587,39.095,60.905,5.44,224,0.875,bicubic,-54.180,-54.107,-25 +tf_efficientnet_es,22.413,77.587,39.095,60.905,5.44,224,0.875,bicubic,-54.180,-54.107,-26 hrnet_w18_small_v2,22.337,77.663,39.861,60.139,15.60,224,0.875,bilinear,-52.777,-52.555,-3 -regnety_008,22.119,77.881,38.900,61.100,6.26,224,0.875,bicubic,-54.197,-54.166,-21 -seresnext26t_32x4d,21.991,78.009,38.482,61.518,16.81,224,0.875,bicubic,-55.995,-55.264,-72 -regnety_006,21.971,78.029,38.955,61.045,6.06,224,0.875,bicubic,-53.275,-53.577,-9 -regnetx_008,21.940,78.060,38.928,61.072,7.26,224,0.875,bicubic,-53.098,-53.408,-5 -resnet26d,21.907,78.094,38.619,61.381,16.01,224,0.875,bicubic,-54.789,-54.531,-34 -semnasnet_100,21.903,78.097,38.600,61.400,3.89,224,0.875,bicubic,-53.545,-54.004,-14 +convit_tiny,22.282,77.718,39.669,60.331,5.71,224,0.875,bicubic,-50.834,-52.045,+18 +regnety_008,22.119,77.881,38.900,61.100,6.26,224,0.875,bicubic,-54.197,-54.166,-22 +seresnext26t_32x4d,21.991,78.009,38.482,61.518,16.81,224,0.875,bicubic,-55.995,-55.264,-74 +regnety_006,21.971,78.029,38.955,61.045,6.06,224,0.875,bicubic,-53.275,-53.577,-10 +regnetx_008,21.940,78.060,38.928,61.072,7.26,224,0.875,bicubic,-53.098,-53.408,-6 +resnet26d,21.907,78.094,38.619,61.381,16.01,224,0.875,bicubic,-54.789,-54.531,-36 +semnasnet_100,21.903,78.097,38.600,61.400,3.89,224,0.875,bicubic,-53.545,-54.004,-15 pit_ti_224,21.875,78.125,39.541,60.459,4.85,224,0.900,bicubic,-51.037,-51.861,+14 -regnetx_006,21.738,78.263,38.904,61.096,6.20,224,0.875,bicubic,-52.115,-52.768,+6 -vgg19_bn,21.628,78.373,39.283,60.717,143.68,224,0.875,bilinear,-52.587,-52.559,+1 -ghostnet_100,21.620,78.380,38.692,61.308,5.18,224,0.875,bilinear,-52.358,-52.764,+3 +regnetx_006,21.738,78.263,38.904,61.096,6.20,224,0.875,bicubic,-52.115,-52.768,+5 +vgg19_bn,21.628,78.373,39.283,60.717,143.68,224,0.875,bilinear,-52.587,-52.559,0 +ghostnet_100,21.620,78.380,38.692,61.308,5.18,224,0.875,bilinear,-52.358,-52.764,+2 gluon_resnet18_v1b,21.549,78.451,38.869,61.131,11.69,224,0.875,bicubic,-49.287,-50.893,+22 -fbnetc_100,21.484,78.516,38.161,61.839,5.57,224,0.875,bilinear,-53.640,-54.224,-16 -mnasnet_100,21.350,78.650,37.719,62.281,4.38,224,0.875,bicubic,-53.308,-54.395,-8 -resnet26,21.295,78.705,38.018,61.982,16.00,224,0.875,bicubic,-53.997,-54.552,-21 +fbnetc_100,21.484,78.516,38.161,61.839,5.57,224,0.875,bilinear,-53.640,-54.224,-17 +mnasnet_100,21.350,78.650,37.719,62.281,4.38,224,0.875,bicubic,-53.308,-54.395,-9 +resnet26,21.295,78.705,38.018,61.982,16.00,224,0.875,bicubic,-53.997,-54.552,-22 ssl_resnet18,21.278,78.722,39.113,60.887,11.69,224,0.875,bilinear,-51.332,-52.303,+7 -mixnet_s,21.254,78.746,38.187,61.813,4.13,224,0.875,bicubic,-54.738,-54.609,-34 -seresnext26d_32x4d,21.252,78.748,37.311,62.689,16.81,224,0.875,bicubic,-56.350,-56.297,-74 -legacy_seresnext26_32x4d,21.093,78.907,37.633,62.367,16.79,224,0.875,bicubic,-56.011,-55.683,-58 +mixnet_s,21.254,78.746,38.187,61.813,4.13,224,0.875,bicubic,-54.738,-54.609,-35 +seresnext26d_32x4d,21.252,78.748,37.311,62.689,16.81,224,0.875,bicubic,-56.350,-56.297,-76 +legacy_seresnext26_32x4d,21.093,78.907,37.633,62.367,16.79,224,0.875,bicubic,-56.011,-55.683,-60 regnetx_004,20.898,79.102,37.566,62.434,5.16,224,0.875,bicubic,-51.498,-53.264,+4 -spnasnet_100,20.863,79.137,37.896,62.104,4.42,224,0.875,bilinear,-53.221,-53.922,-9 +spnasnet_100,20.863,79.137,37.896,62.104,4.42,224,0.875,bilinear,-53.221,-53.922,-10 legacy_seresnet18,20.837,79.162,37.619,62.381,11.78,224,0.875,bicubic,-50.905,-52.715,+9 mobilenetv2_100,20.773,79.227,37.759,62.241,3.50,224,0.875,bicubic,-52.197,-53.257,-2 -tf_mixnet_s,20.470,79.530,36.607,63.393,4.13,224,0.875,bicubic,-55.180,-56.021,-37 -regnety_004,20.415,79.585,37.002,62.998,4.34,224,0.875,bicubic,-53.619,-54.750,-12 -hrnet_w18_small,20.368,79.632,37.093,62.907,13.19,224,0.875,bilinear,-51.974,-53.585,0 -tf_mobilenetv3_large_075,20.366,79.634,36.764,63.236,3.99,224,0.875,bilinear,-53.072,-54.586,-11 +tf_mixnet_s,20.470,79.530,36.607,63.393,4.13,224,0.875,bicubic,-55.180,-56.021,-38 +regnety_004,20.415,79.585,37.002,62.998,4.34,224,0.875,bicubic,-53.619,-54.750,-13 +hrnet_w18_small,20.368,79.632,37.093,62.907,13.19,224,0.875,bilinear,-51.976,-53.585,0 +tf_mobilenetv3_large_075,20.366,79.634,36.764,63.236,3.99,224,0.875,bilinear,-53.072,-54.586,-12 resnet18,20.228,79.772,37.261,62.739,11.69,224,0.875,bilinear,-49.520,-51.817,+10 mixer_l16_224,20.171,79.829,32.956,67.044,208.20,224,0.875,bicubic,-51.887,-54.712,+1 vit_deit_tiny_patch16_224,20.162,79.838,37.546,62.454,5.72,224,0.900,bicubic,-52.007,-53.572,-1 tf_mobilenetv3_large_minimal_100,20.122,79.878,36.908,63.092,3.92,224,0.875,bilinear,-52.126,-53.722,-3 -vgg16_bn,19.959,80.041,36.301,63.699,138.37,224,0.875,bilinear,-53.391,-55.205,-15 +vgg16_bn,19.959,80.041,36.301,63.699,138.37,224,0.875,bilinear,-53.391,-55.205,-16 vgg19,17.929,82.071,33.054,66.946,143.67,224,0.875,bilinear,-54.439,-57.818,-8 vgg13_bn,17.802,82.198,34.039,65.961,133.05,224,0.875,bilinear,-53.792,-56.337,-2 vgg16,17.540,82.460,32.773,67.227,138.36,224,0.875,bilinear,-54.054,-57.609,-2 @@ -359,6 +390,6 @@ tf_mobilenetv3_small_100,16.227,83.772,31.223,68.777,2.54,224,0.875,bilinear,-51 vgg13,16.100,83.900,30.985,69.015,133.05,224,0.875,bilinear,-53.826,-58.261,-4 vgg11,15.728,84.272,30.453,69.547,132.86,224,0.875,bilinear,-53.296,-58.175,-3 tf_mobilenetv3_small_075,14.944,85.056,29.572,70.428,2.04,224,0.875,bilinear,-50.772,-56.558,+1 -dla46_c,14.657,85.343,29.380,70.620,1.30,224,0.875,bilinear,-50.209,-56.912,+1 +dla46_c,14.657,85.343,29.380,70.620,1.30,224,0.875,bilinear,-50.207,-56.912,+1 dla46x_c,14.382,85.618,29.191,70.809,1.07,224,0.875,bilinear,-51.588,-57.789,-2 tf_mobilenetv3_small_minimal_100,13.964,86.036,27.988,72.012,2.04,224,0.875,bilinear,-48.942,-56.242,0 From ba2ca4b46440c9fcf579fc66ca6df3082db44475 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 12 Jun 2021 12:27:43 -0700 Subject: [PATCH 14/31] One codepath for stdconv, switch layernorm to batchnorm so gain included. Tweak epsilon values for nfnet, resnetv2, vit hybrid. --- timm/models/layers/std_conv.py | 78 ++++++++---------------- timm/models/nfnet.py | 6 +- timm/models/resnetv2.py | 6 +- timm/models/vision_transformer_hybrid.py | 8 +-- 4 files changed, 33 insertions(+), 65 deletions(-) diff --git a/timm/models/layers/std_conv.py b/timm/models/layers/std_conv.py index a1afc653..49b35875 100644 --- a/timm/models/layers/std_conv.py +++ b/timm/models/layers/std_conv.py @@ -18,27 +18,20 @@ class StdConv2d(nn.Conv2d): https://arxiv.org/abs/1903.10520v2 """ def __init__( - self, in_channel, out_channels, kernel_size, stride=1, padding=None, dilation=1, - groups=1, bias=False, eps=1e-5, use_layernorm=True): + self, in_channel, out_channels, kernel_size, stride=1, padding=None, + dilation=1, groups=1, bias=False, eps=1e-6): if padding is None: padding = get_padding(kernel_size, stride, dilation) super().__init__( in_channel, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.eps = eps - self.use_layernorm = use_layernorm - - def get_weight(self): - if self.use_layernorm: - # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op - weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) - else: - std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = (self.weight - mean) / (std + self.eps) - return weight def forward(self, x): - x = F.conv2d(x, self.get_weight(), self.bias, self.stride, self.padding, self.dilation, self.groups) + weight = F.batch_norm( + self.weight.view(1, self.out_channels, -1), None, None, + eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + x = F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return x @@ -49,29 +42,22 @@ class StdConv2dSame(nn.Conv2d): https://arxiv.org/abs/1903.10520v2 """ def __init__( - self, in_channel, out_channels, kernel_size, stride=1, padding='SAME', dilation=1, - groups=1, bias=False, eps=1e-5, use_layernorm=True): + self, in_channel, out_channels, kernel_size, stride=1, padding='SAME', + dilation=1, groups=1, bias=False, eps=1e-6): padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, dilation=dilation) super().__init__( in_channel, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.same_pad = is_dynamic self.eps = eps - self.use_layernorm = use_layernorm - - def get_weight(self): - if self.use_layernorm: - # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op - weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) - else: - std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = (self.weight - mean) / (std + self.eps) - return weight def forward(self, x): if self.same_pad: x = pad_same(x, self.kernel_size, self.stride, self.dilation) - x = F.conv2d(x, self.get_weight(), self.bias, self.stride, self.padding, self.dilation, self.groups) + weight = F.batch_norm( + self.weight.view(1, self.out_channels, -1), None, None, + eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + x = F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return x @@ -85,8 +71,8 @@ class ScaledStdConv2d(nn.Conv2d): """ def __init__( - self, in_channels, out_channels, kernel_size, stride=1, padding=None, dilation=1, groups=1, - bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=True): + self, in_channels, out_channels, kernel_size, stride=1, padding=None, + dilation=1, groups=1, bias=True, gamma=1.0, eps=1e-6, gain_init=1.0): if padding is None: padding = get_padding(kernel_size, stride, dilation) super().__init__( @@ -95,19 +81,13 @@ class ScaledStdConv2d(nn.Conv2d): self.gain = nn.Parameter(torch.full((self.out_channels, 1, 1, 1), gain_init)) self.scale = gamma * self.weight[0].numel() ** -0.5 # gamma * 1 / sqrt(fan-in) self.eps = eps - self.use_layernorm = use_layernorm # experimental, slightly faster/less GPU memory to hijack LN kernel - - def get_weight(self): - if self.use_layernorm: - # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op - weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) - else: - std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = (self.weight - mean) / (std + self.eps) - return weight.mul_(self.gain * self.scale) def forward(self, x): - return F.conv2d(x, self.get_weight(), self.bias, self.stride, self.padding, self.dilation, self.groups) + weight = F.batch_norm( + self.weight.view(1, self.out_channels, -1), None, None, + weight=(self.gain * self.scale).view(-1), + eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) class ScaledStdConv2dSame(nn.Conv2d): @@ -120,8 +100,8 @@ class ScaledStdConv2dSame(nn.Conv2d): """ def __init__( - self, in_channels, out_channels, kernel_size, stride=1, padding='SAME', dilation=1, groups=1, - bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=True): + self, in_channels, out_channels, kernel_size, stride=1, padding='SAME', + dilation=1, groups=1, bias=True, gamma=1.0, eps=1e-6, gain_init=1.0): padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, dilation=dilation) super().__init__( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, @@ -130,18 +110,12 @@ class ScaledStdConv2dSame(nn.Conv2d): self.scale = gamma * self.weight[0].numel() ** -0.5 self.same_pad = is_dynamic self.eps = eps - self.use_layernorm = use_layernorm # experimental, slightly faster/less GPU memory to hijack LN kernel - - def get_weight(self): - if self.use_layernorm: - # NOTE F.layer_norm is being used to compute (self.weight - mean) / (sqrt(var) + self.eps) in one op - weight = F.layer_norm(self.weight, self.weight.shape[1:], eps=self.eps) - else: - std, mean = torch.std_mean(self.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = (self.weight - mean) / (std + self.eps) - return weight.mul_(self.gain * self.scale) def forward(self, x): if self.same_pad: x = pad_same(x, self.kernel_size, self.stride, self.dilation) - return F.conv2d(x, self.get_weight(), self.bias, self.stride, self.padding, self.dilation, self.groups) + weight = F.batch_norm( + self.weight.view(1, self.out_channels, -1), None, None, + weight=(self.gain * self.scale).view(-1), + eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) diff --git a/timm/models/nfnet.py b/timm/models/nfnet.py index 584495c3..fc0a20c2 100644 --- a/timm/models/nfnet.py +++ b/timm/models/nfnet.py @@ -167,7 +167,6 @@ class NfCfg: gamma_in_act: bool = False same_padding: bool = False std_conv_eps: float = 1e-5 - std_conv_ln: bool = True # use layer-norm impl to normalize in std-conv, works in PyTorch XLA, slightly faster skipinit: bool = False # disabled by default, non-trivial performance impact zero_init_fc: bool = False act_layer: str = 'silu' @@ -484,11 +483,10 @@ class NormFreeNet(nn.Module): conv_layer = ScaledStdConv2dSame if cfg.same_padding else ScaledStdConv2d if cfg.gamma_in_act: act_layer = act_with_gamma(cfg.act_layer, gamma=_nonlin_gamma[cfg.act_layer]) - conv_layer = partial(conv_layer, eps=cfg.std_conv_eps, use_layernorm=cfg.std_conv_ln) + conv_layer = partial(conv_layer, eps=cfg.std_conv_eps) else: act_layer = get_act_layer(cfg.act_layer) - conv_layer = partial( - conv_layer, gamma=_nonlin_gamma[cfg.act_layer], eps=cfg.std_conv_eps, use_layernorm=cfg.std_conv_ln) + conv_layer = partial(conv_layer, gamma=_nonlin_gamma[cfg.act_layer], eps=cfg.std_conv_eps) attn_layer = partial(get_attn(cfg.attn_layer), **cfg.attn_kwargs) if cfg.attn_layer else None stem_chs = make_divisible((cfg.stem_chs or cfg.channels[0]) * cfg.width_factor, cfg.ch_div) diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py index 0ca6fba9..250695a8 100644 --- a/timm/models/resnetv2.py +++ b/timm/models/resnetv2.py @@ -276,7 +276,7 @@ class ResNetStage(nn.Module): def create_resnetv2_stem( in_chs, out_chs=64, stem_type='', preact=True, - conv_layer=StdConv2d, norm_layer=partial(GroupNormAct, num_groups=32)): + conv_layer=partial(StdConv2d, eps=1e-8), norm_layer=partial(GroupNormAct, num_groups=32)): stem = OrderedDict() assert stem_type in ('', 'fixed', 'same', 'deep', 'deep_fixed', 'deep_same') @@ -315,8 +315,8 @@ class ResNetV2(nn.Module): def __init__(self, layers, channels=(256, 512, 1024, 2048), num_classes=1000, in_chans=3, global_pool='avg', output_stride=32, width_factor=1, stem_chs=64, stem_type='', avg_down=False, preact=True, - act_layer=nn.ReLU, conv_layer=StdConv2d, norm_layer=partial(GroupNormAct, num_groups=32), - drop_rate=0., drop_path_rate=0.): + act_layer=nn.ReLU, conv_layer=partial(StdConv2d, eps=1e-8), + norm_layer=partial(GroupNormAct, num_groups=32), drop_rate=0., drop_path_rate=0.): super().__init__() self.num_classes = num_classes self.drop_rate = drop_rate diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index a32ce019..7fc0cc88 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -116,12 +116,8 @@ def _create_vision_transformer_hybrid(variant, backbone, pretrained=False, **kwa def _resnetv2(layers=(3, 4, 9), **kwargs): """ ResNet-V2 backbone helper""" padding_same = kwargs.get('padding_same', True) - if padding_same: - stem_type = 'same' - conv_layer = partial(StdConv2dSame, eps=1e-5) - else: - stem_type = '' - conv_layer = StdConv2d + stem_type = 'same' if padding_same else '' + conv_layer = partial(StdConv2dSame, eps=1e-8) if padding_same else partial(StdConv2d, eps=1e-8) if len(layers): backbone = ResNetV2( layers=layers, num_classes=0, global_pool='', in_chans=kwargs.get('in_chans', 3), From 8880f696b6b8368a76296126476ea020fc7c814c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 12 Jun 2021 16:40:02 -0700 Subject: [PATCH 15/31] Refactoring, cleanup, improved test coverage. * Add eca_nfnet_l2 weights, 84.7 @ 384x384 * All 'non-std' (ie transformer / mlp) models have classifier / default_cfg test added * Fix #694 reset_classifer / num_features / forward_features / num_classes=0 consistency for transformer / mlp models * Add direct loading of npz to vision transformer (pure transformer so far, hybrid to come) * Rename vit_deit* to deit_* * Remove some deprecated vit hybrid model defs * Clean up classifier flatten for conv classifiers and unusual cases (mobilenetv3/ghostnet) * Remove explicit model fns for levit conv, just pass in arg --- tests/test_models.py | 55 ++++- timm/models/cait.py | 30 ++- timm/models/coat.py | 8 +- timm/models/convit.py | 23 +- timm/models/dla.py | 6 +- timm/models/dpn.py | 5 +- timm/models/ghostnet.py | 9 +- timm/models/helpers.py | 29 +++ timm/models/layers/adaptive_avgmax_pool.py | 13 +- timm/models/layers/classifier.py | 5 +- timm/models/layers/mlp.py | 6 + timm/models/levit.py | 87 ++++--- timm/models/mlp_mixer.py | 116 +++++---- timm/models/mobilenetv3.py | 5 +- timm/models/nfnet.py | 7 +- timm/models/pit.py | 32 ++- timm/models/registry.py | 13 +- timm/models/resnet.py | 7 +- timm/models/resnetv2.py | 266 ++++++++++----------- timm/models/swin_transformer.py | 43 ++-- timm/models/twins.py | 8 +- timm/models/visformer.py | 63 ++--- timm/models/vision_transformer.py | 228 ++++++++++++------ timm/models/vision_transformer_hybrid.py | 28 +-- 24 files changed, 637 insertions(+), 455 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 5a31935e..ac156806 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -17,7 +17,7 @@ if hasattr(torch._C, '_jit_set_profiling_executor'): # transformer models don't support many of the spatial / feature based model functionalities NON_STD_FILTERS = [ 'vit_*', 'tnt_*', 'pit_*', 'swin_*', 'coat_*', 'cait_*', '*mixer_*', 'gmlp_*', 'resmlp_*', 'twins_*', - 'convit_*', 'levit*', 'visformer*'] + 'convit_*', 'levit*', 'visformer*', 'deit*'] NUM_NON_STD = len(NON_STD_FILTERS) # exclude models that cause specific test failures @@ -120,7 +120,6 @@ def test_model_default_cfgs(model_name, batch_size): state_dict = model.state_dict() cfg = model.default_cfg - classifier = cfg['classifier'] pool_size = cfg['pool_size'] input_size = model.default_cfg['input_size'] @@ -149,7 +148,57 @@ def test_model_default_cfgs(model_name, batch_size): assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2] # check classifier name matches default_cfg - assert classifier + ".weight" in state_dict.keys(), f'{classifier} not in model params' + classifier = cfg['classifier'] + if not isinstance(classifier, (tuple, list)): + classifier = classifier, + for c in classifier: + assert c + ".weight" in state_dict.keys(), f'{c} not in model params' + + # check first conv(s) names match default_cfg + first_conv = cfg['first_conv'] + if isinstance(first_conv, str): + first_conv = (first_conv,) + assert isinstance(first_conv, (tuple, list)) + for fc in first_conv: + assert fc + ".weight" in state_dict.keys(), f'{fc} not in model params' + + +@pytest.mark.timeout(300) +@pytest.mark.parametrize('model_name', list_models(filter=NON_STD_FILTERS)) +@pytest.mark.parametrize('batch_size', [1]) +def test_model_default_cfgs_non_std(model_name, batch_size): + """Run a single forward pass with each model""" + model = create_model(model_name, pretrained=False) + model.eval() + state_dict = model.state_dict() + cfg = model.default_cfg + + input_size = _get_input_size(model_name=model_name, target=TARGET_FWD_SIZE) + if max(input_size) > MAX_FWD_SIZE: + pytest.skip("Fixed input size model > limit.") + + input_tensor = torch.randn((batch_size, *input_size)) + + # test forward_features (always unpooled) + outputs = model.forward_features(input_tensor) + if isinstance(outputs, tuple): + outputs = outputs[0] + assert outputs.shape[1] == model.num_features + + # test forward after deleting the classifier, output should be poooled, size(-1) == model.num_features + model.reset_classifier(0) + outputs = model.forward(input_tensor) + if isinstance(outputs, tuple): + outputs = outputs[0] + assert len(outputs.shape) == 2 + assert outputs.shape[1] == model.num_features + + # check classifier name matches default_cfg + classifier = cfg['classifier'] + if not isinstance(classifier, (tuple, list)): + classifier = classifier, + for c in classifier: + assert c + ".weight" in state_dict.keys(), f'{c} not in model params' # check first conv(s) names match default_cfg first_conv = cfg['first_conv'] diff --git a/timm/models/cait.py b/timm/models/cait.py index aa2e5f07..69b4ba06 100644 --- a/timm/models/cait.py +++ b/timm/models/cait.py @@ -74,11 +74,11 @@ default_cfgs = dict( class ClassAttn(nn.Module): # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py # with slight modifications to do CA - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.): super().__init__() self.num_heads = num_heads head_dim = dim // num_heads - self.scale = qk_scale or head_dim ** -0.5 + self.scale = head_dim ** -0.5 self.q = nn.Linear(dim, dim, bias=qkv_bias) self.k = nn.Linear(dim, dim, bias=qkv_bias) @@ -110,13 +110,13 @@ class LayerScaleBlockClassAttn(nn.Module): # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py # with slight modifications to add CA and LayerScale def __init__( - self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, attn_block=ClassAttn, mlp_block=Mlp, init_values=1e-4): super().__init__() self.norm1 = norm_layer(dim) self.attn = attn_block( - dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) @@ -134,14 +134,14 @@ class LayerScaleBlockClassAttn(nn.Module): class TalkingHeadAttn(nn.Module): # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py # with slight modifications to add Talking Heads Attention (https://arxiv.org/pdf/2003.02436v1.pdf) - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.): super().__init__() self.num_heads = num_heads head_dim = dim // num_heads - self.scale = qk_scale or head_dim ** -0.5 + self.scale = head_dim ** -0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) @@ -177,13 +177,13 @@ class LayerScaleBlock(nn.Module): # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py # with slight modifications to add layerScale def __init__( - self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, attn_block=TalkingHeadAttn, mlp_block=Mlp, init_values=1e-4): super().__init__() self.norm1 = norm_layer(dim) self.attn = attn_block( - dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) @@ -202,7 +202,7 @@ class Cait(nn.Module): # with slight modifications to adapt to our cait models def __init__( self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, - num_heads=12, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., + num_heads=12, mlp_ratio=4., qkv_bias=True, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=partial(nn.LayerNorm, eps=1e-6), global_pool=None, @@ -235,14 +235,14 @@ class Cait(nn.Module): dpr = [drop_path_rate for i in range(depth)] self.blocks = nn.ModuleList([ block_layers( - dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, act_layer=act_layer, attn_block=attn_block, mlp_block=mlp_block, init_values=init_scale) for i in range(depth)]) self.blocks_token_only = nn.ModuleList([ block_layers_token( - dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio_clstk, qkv_bias=qkv_bias, qk_scale=qk_scale, + dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio_clstk, qkv_bias=qkv_bias, drop=0.0, attn_drop=0.0, drop_path=0.0, norm_layer=norm_layer, act_layer=act_layer, attn_block=attn_block_token_only, mlp_block=mlp_block_token_only, init_values=init_scale) @@ -270,6 +270,13 @@ class Cait(nn.Module): def no_weight_decay(self): return {'pos_embed', 'cls_token'} + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() + def forward_features(self, x): B = x.shape[0] x = self.patch_embed(x) @@ -293,7 +300,6 @@ class Cait(nn.Module): def forward(self, x): x = self.forward_features(x) x = self.head(x) - return x diff --git a/timm/models/coat.py b/timm/models/coat.py index 9eb384d8..f071715a 100644 --- a/timm/models/coat.py +++ b/timm/models/coat.py @@ -335,6 +335,8 @@ class CoaT(nn.Module): crpe_window = crpe_window or {3: 2, 5: 3, 7: 3} self.return_interm_layers = return_interm_layers self.out_features = out_features + self.embed_dims = embed_dims + self.num_features = embed_dims[-1] self.num_classes = num_classes # Patch embeddings. @@ -441,10 +443,10 @@ class CoaT(nn.Module): # CoaT series: Aggregate features of last three scales for classification. assert embed_dims[1] == embed_dims[2] == embed_dims[3] self.aggregate = torch.nn.Conv1d(in_channels=3, out_channels=1, kernel_size=1) - self.head = nn.Linear(embed_dims[3], num_classes) + self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() else: # CoaT-Lite series: Use feature of last scale for classification. - self.head = nn.Linear(embed_dims[3], num_classes) + self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() # Initialize weights. trunc_normal_(self.cls_token1, std=.02) @@ -471,7 +473,7 @@ class CoaT(nn.Module): def reset_classifier(self, num_classes, global_pool=''): self.num_classes = num_classes - self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() def insert_cls(self, x, cls_token): """ Insert CLS token. """ diff --git a/timm/models/convit.py b/timm/models/convit.py index b15b46d8..0593ec1c 100644 --- a/timm/models/convit.py +++ b/timm/models/convit.py @@ -57,13 +57,13 @@ default_cfgs = { class GPSA(nn.Module): - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., + def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0., locality_strength=1.): super().__init__() self.num_heads = num_heads self.dim = dim head_dim = dim // num_heads - self.scale = qk_scale or head_dim ** -0.5 + self.scale = head_dim ** -0.5 self.locality_strength = locality_strength self.qk = nn.Linear(dim, dim * 2, bias=qkv_bias) @@ -142,11 +142,11 @@ class GPSA(nn.Module): class MHSA(nn.Module): - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.): super().__init__() self.num_heads = num_heads head_dim = dim // num_heads - self.scale = qk_scale or head_dim ** -0.5 + self.scale = head_dim ** -0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) @@ -191,19 +191,16 @@ class MHSA(nn.Module): class Block(nn.Module): - def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, use_gpsa=True, **kwargs): super().__init__() self.norm1 = norm_layer(dim) self.use_gpsa = use_gpsa if self.use_gpsa: self.attn = GPSA( - dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, - proj_drop=drop, **kwargs) + dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop, **kwargs) else: - self.attn = MHSA( - dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, - proj_drop=drop, **kwargs) + self.attn = MHSA(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) @@ -220,7 +217,7 @@ class ConViT(nn.Module): """ def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, - num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., + num_heads=12, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., hybrid_backbone=None, norm_layer=nn.LayerNorm, global_pool=None, local_up_to_layer=3, locality_strength=1., use_pos_embed=True): super().__init__() @@ -250,13 +247,13 @@ class ConViT(nn.Module): dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule self.blocks = nn.ModuleList([ Block( - dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, use_gpsa=True, locality_strength=locality_strength) if i < local_up_to_layer else Block( - dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, use_gpsa=False) for i in range(depth)]) diff --git a/timm/models/dla.py b/timm/models/dla.py index f0f25b0b..f6e4dd28 100644 --- a/timm/models/dla.py +++ b/timm/models/dla.py @@ -288,6 +288,8 @@ class DLA(nn.Module): self.num_features = channels[-1] self.global_pool, self.fc = create_classifier( self.num_features, self.num_classes, pool_type=global_pool, use_conv=True) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() + for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels @@ -314,6 +316,7 @@ class DLA(nn.Module): self.num_classes = num_classes self.global_pool, self.fc = create_classifier( self.num_features, self.num_classes, pool_type=global_pool, use_conv=True) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() def forward_features(self, x): x = self.base_layer(x) @@ -331,8 +334,7 @@ class DLA(nn.Module): if self.drop_rate > 0.: x = F.dropout(x, p=self.drop_rate, training=self.training) x = self.fc(x) - if not self.global_pool.is_identity(): - x = x.flatten(1) # conv classifier, flatten if pooling isn't pass-through (disabled) + x = self.flatten(x) return x diff --git a/timm/models/dpn.py b/timm/models/dpn.py index 90ef11cc..c4e380b1 100644 --- a/timm/models/dpn.py +++ b/timm/models/dpn.py @@ -237,6 +237,7 @@ class DPN(nn.Module): # Using 1x1 conv for the FC layer to allow the extra pooling scheme self.global_pool, self.classifier = create_classifier( self.num_features, self.num_classes, pool_type=global_pool, use_conv=True) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() def get_classifier(self): return self.classifier @@ -245,6 +246,7 @@ class DPN(nn.Module): self.num_classes = num_classes self.global_pool, self.classifier = create_classifier( self.num_features, self.num_classes, pool_type=global_pool, use_conv=True) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() def forward_features(self, x): return self.features(x) @@ -255,8 +257,7 @@ class DPN(nn.Module): if self.drop_rate > 0.: x = F.dropout(x, p=self.drop_rate, training=self.training) x = self.classifier(x) - if not self.global_pool.is_identity(): - x = x.flatten(1) # conv classifier, flatten if pooling isn't pass-through (disabled) + x = self.flatten(x) return x diff --git a/timm/models/ghostnet.py b/timm/models/ghostnet.py index 48dee6ec..a73047c5 100644 --- a/timm/models/ghostnet.py +++ b/timm/models/ghostnet.py @@ -133,7 +133,7 @@ class GhostBottleneck(nn.Module): class GhostNet(nn.Module): - def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2, in_chans=3, output_stride=32): + def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2, in_chans=3, output_stride=32, global_pool='avg'): super(GhostNet, self).__init__() # setting of inverted residual blocks assert output_stride == 32, 'only output_stride==32 is valid, dilation not supported' @@ -178,9 +178,10 @@ class GhostNet(nn.Module): # building last several layers self.num_features = out_chs = 1280 - self.global_pool = SelectAdaptivePool2d(pool_type='avg') + self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.conv_head = nn.Conv2d(prev_chs, out_chs, 1, 1, 0, bias=True) self.act2 = nn.ReLU(inplace=True) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() # don't flatten if pooling disabled self.classifier = Linear(out_chs, num_classes) def get_classifier(self): @@ -190,6 +191,7 @@ class GhostNet(nn.Module): self.num_classes = num_classes # cannot meaningfully change pooling of efficient head after creation self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() # don't flatten if pooling disabled self.classifier = Linear(self.pool_dim, num_classes) if num_classes > 0 else nn.Identity() def forward_features(self, x): @@ -204,8 +206,7 @@ class GhostNet(nn.Module): def forward(self, x): x = self.forward_features(x) - if not self.global_pool.is_identity(): - x = x.view(x.size(0), -1) + x = self.flatten(x) if self.dropout > 0.: x = F.dropout(x, p=self.dropout, training=self.training) x = self.classifier(x) diff --git a/timm/models/helpers.py b/timm/models/helpers.py index adfef550..662a7a48 100644 --- a/timm/models/helpers.py +++ b/timm/models/helpers.py @@ -45,6 +45,13 @@ def load_state_dict(checkpoint_path, use_ema=False): def load_checkpoint(model, checkpoint_path, use_ema=False, strict=True): + if os.path.splitext(checkpoint_path)[-1].lower() in ('.npz', '.npy'): + # numpy checkpoint, try to load via model specific load_pretrained fn + if hasattr(model, 'load_pretrained'): + model.load_pretrained(checkpoint_path) + else: + raise NotImplementedError('Model cannot load numpy checkpoint') + return state_dict = load_state_dict(checkpoint_path, use_ema) model.load_state_dict(state_dict, strict=strict) @@ -477,3 +484,25 @@ def model_parameters(model, exclude_head=False): return [p for p in model.parameters()][:-2] else: return model.parameters() + + +def named_apply(fn: Callable, module: nn.Module, name='', depth_first=True, include_root=False) -> nn.Module: + if not depth_first and include_root: + fn(module=module, name=name) + for child_name, child_module in module.named_children(): + child_name = '.'.join((name, child_name)) if name else child_name + named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True) + if depth_first and include_root: + fn(module=module, name=name) + return module + + +def named_modules(module: nn.Module, name='', depth_first=True, include_root=False): + if not depth_first and include_root: + yield name, module + for child_name, child_module in module.named_children(): + child_name = '.'.join((name, child_name)) if name else child_name + yield from named_modules( + module=child_module, name=child_name, depth_first=depth_first, include_root=True) + if depth_first and include_root: + yield name, module diff --git a/timm/models/layers/adaptive_avgmax_pool.py b/timm/models/layers/adaptive_avgmax_pool.py index d2bb9f72..ebc6ada8 100644 --- a/timm/models/layers/adaptive_avgmax_pool.py +++ b/timm/models/layers/adaptive_avgmax_pool.py @@ -55,7 +55,7 @@ class FastAdaptiveAvgPool2d(nn.Module): self.flatten = flatten def forward(self, x): - return x.mean((2, 3)) if self.flatten else x.mean((2, 3), keepdim=True) + return x.mean((2, 3), keepdim=not self.flatten) class AdaptiveAvgMaxPool2d(nn.Module): @@ -82,13 +82,13 @@ class SelectAdaptivePool2d(nn.Module): def __init__(self, output_size=1, pool_type='fast', flatten=False): super(SelectAdaptivePool2d, self).__init__() self.pool_type = pool_type or '' # convert other falsy values to empty string for consistent TS typing - self.flatten = flatten + self.flatten = nn.Flatten(1) if flatten else nn.Identity() if pool_type == '': self.pool = nn.Identity() # pass through elif pool_type == 'fast': assert output_size == 1 - self.pool = FastAdaptiveAvgPool2d(self.flatten) - self.flatten = False + self.pool = FastAdaptiveAvgPool2d(flatten) + self.flatten = nn.Identity() elif pool_type == 'avg': self.pool = nn.AdaptiveAvgPool2d(output_size) elif pool_type == 'avgmax': @@ -101,12 +101,11 @@ class SelectAdaptivePool2d(nn.Module): assert False, 'Invalid pool type: %s' % pool_type def is_identity(self): - return self.pool_type == '' + return not self.pool_type def forward(self, x): x = self.pool(x) - if self.flatten: - x = x.flatten(1) + x = self.flatten(x) return x def feat_mult(self): diff --git a/timm/models/layers/classifier.py b/timm/models/layers/classifier.py index 516cc6c9..2b745413 100644 --- a/timm/models/layers/classifier.py +++ b/timm/models/layers/classifier.py @@ -20,7 +20,7 @@ def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False): return global_pool, num_pooled_features -def _create_fc(num_features, num_classes, pool_type='avg', use_conv=False): +def _create_fc(num_features, num_classes, use_conv=False): if num_classes <= 0: fc = nn.Identity() # pass-through (no classifier) elif use_conv: @@ -45,11 +45,12 @@ class ClassifierHead(nn.Module): self.drop_rate = drop_rate self.global_pool, num_pooled_features = _create_pool(in_chs, num_classes, pool_type, use_conv=use_conv) self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) - self.flatten_after_fc = use_conv and pool_type + self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity() def forward(self, x): x = self.global_pool(x) if self.drop_rate: x = F.dropout(x, p=float(self.drop_rate), training=self.training) x = self.fc(x) + x = self.flatten(x) return x diff --git a/timm/models/layers/mlp.py b/timm/models/layers/mlp.py index 4739ba74..05d07652 100644 --- a/timm/models/layers/mlp.py +++ b/timm/models/layers/mlp.py @@ -40,6 +40,12 @@ class GluMlp(nn.Module): self.fc2 = nn.Linear(hidden_features // 2, out_features) self.drop = nn.Dropout(drop) + def init_weights(self): + # override init of fc1 w/ gate portion set to weight near zero, bias=1 + fc1_mid = self.fc1.bias.shape[0] // 2 + nn.init.ones_(self.fc1.bias[fc1_mid:]) + nn.init.normal_(self.fc1.weight[fc1_mid:], std=1e-6) + def forward(self, x): x = self.fc1(x) x, gates = x.chunk(2, dim=-1) diff --git a/timm/models/levit.py b/timm/models/levit.py index 2180254a..fa35f41f 100644 --- a/timm/models/levit.py +++ b/timm/models/levit.py @@ -84,63 +84,33 @@ __all__ = ['Levit'] @register_model -def levit_128s(pretrained=False, fuse=False,distillation=True, use_conv=False, **kwargs): +def levit_128s(pretrained=False, use_conv=False, **kwargs): return create_levit( - 'levit_128s', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) + 'levit_128s', pretrained=pretrained, use_conv=use_conv, **kwargs) @register_model -def levit_128(pretrained=False, fuse=False, distillation=True, use_conv=False, **kwargs): +def levit_128(pretrained=False, use_conv=False, **kwargs): return create_levit( - 'levit_128', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) + 'levit_128', pretrained=pretrained, use_conv=use_conv, **kwargs) @register_model -def levit_192(pretrained=False, fuse=False, distillation=True, use_conv=False, **kwargs): +def levit_192(pretrained=False, use_conv=False, **kwargs): return create_levit( - 'levit_192', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) + 'levit_192', pretrained=pretrained, use_conv=use_conv, **kwargs) @register_model -def levit_256(pretrained=False, fuse=False, distillation=True, use_conv=False, **kwargs): +def levit_256(pretrained=False, use_conv=False, **kwargs): return create_levit( - 'levit_256', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) + 'levit_256', pretrained=pretrained, use_conv=use_conv, **kwargs) @register_model -def levit_384(pretrained=False, fuse=False, distillation=True, use_conv=False, **kwargs): +def levit_384(pretrained=False, use_conv=False, **kwargs): return create_levit( - 'levit_384', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) - - -@register_model -def levit_c_128s(pretrained=False, fuse=False, distillation=True, use_conv=True,**kwargs): - return create_levit( - 'levit_128s', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) - - -@register_model -def levit_c_128(pretrained=False, fuse=False,distillation=True, use_conv=True, **kwargs): - return create_levit( - 'levit_128', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) - - -@register_model -def levit_c_192(pretrained=False, fuse=False, distillation=True, use_conv=True, **kwargs): - return create_levit( - 'levit_192', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) - - -@register_model -def levit_c_256(pretrained=False, fuse=False, distillation=True, use_conv=True, **kwargs): - return create_levit( - 'levit_256', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) - - -@register_model -def levit_c_384(pretrained=False, fuse=False, distillation=True, use_conv=True, **kwargs): - return create_levit( - 'levit_384', pretrained=pretrained, fuse=fuse, distillation=distillation, use_conv=use_conv, **kwargs) + 'levit_384', pretrained=pretrained, use_conv=use_conv, **kwargs) class ConvNorm(nn.Sequential): @@ -427,6 +397,9 @@ class AttentionSubsample(nn.Module): class Levit(nn.Module): """ Vision Transformer with support for patch or hybrid CNN input stage + + NOTE: distillation is defaulted to True since pretrained weights use it, will cause problems + w/ train scripts that don't take tuple outputs, """ def __init__( @@ -447,7 +420,8 @@ class Levit(nn.Module): attn_act_layer='hard_swish', distillation=True, use_conv=False, - drop_path=0): + drop_rate=0., + drop_path_rate=0.): super().__init__() act_layer = get_act_layer(act_layer) attn_act_layer = get_act_layer(attn_act_layer) @@ -486,7 +460,7 @@ class Levit(nn.Module): Attention( ed, kd, nh, attn_ratio=ar, act_layer=attn_act_layer, resolution=resolution, use_conv=use_conv), - drop_path)) + drop_path_rate)) if mr > 0: h = int(ed * mr) self.blocks.append( @@ -494,7 +468,7 @@ class Levit(nn.Module): ln_layer(ed, h, resolution=resolution), act_layer(), ln_layer(h, ed, bn_weight_init=0, resolution=resolution), - ), drop_path)) + ), drop_path_rate)) if do[0] == 'Subsample': # ('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) resolution_ = (resolution - 1) // do[5] + 1 @@ -511,26 +485,45 @@ class Levit(nn.Module): ln_layer(embed_dim[i + 1], h, resolution=resolution), act_layer(), ln_layer(h, embed_dim[i + 1], bn_weight_init=0, resolution=resolution), - ), drop_path)) + ), drop_path_rate)) self.blocks = nn.Sequential(*self.blocks) # Classifier head self.head = NormLinear(embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity() + self.head_dist = None if distillation: self.head_dist = NormLinear(embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity() - else: - self.head_dist = None @torch.jit.ignore def no_weight_decay(self): return {x for x in self.state_dict().keys() if 'attention_biases' in x} - def forward(self, x): + def get_classifier(self): + if self.head_dist is None: + return self.head + else: + return self.head, self.head_dist + + def reset_classifier(self, num_classes, global_pool='', distillation=None): + self.num_classes = num_classes + self.head = NormLinear(self.embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity() + if distillation is not None: + self.distillation = distillation + if self.distillation: + self.head_dist = NormLinear(self.embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity() + else: + self.head_dist = None + + def forward_features(self, x): x = self.patch_embed(x) if not self.use_conv: x = x.flatten(2).transpose(1, 2) x = self.blocks(x) x = x.mean((-2, -1)) if self.use_conv else x.mean(1) + return x + + def forward(self, x): + x = self.forward_features(x) if self.head_dist is not None: x, x_dist = self.head(x), self.head_dist(x) if self.training and not torch.jit.is_scripting(): diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 6f53264a..ea6de824 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -45,7 +45,7 @@ import torch import torch.nn as nn from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from .helpers import build_model_with_cfg, overlay_external_default_cfg +from .helpers import build_model_with_cfg, overlay_external_default_cfg, named_apply from .layers import PatchEmbed, Mlp, GluMlp, GatedMlp, DropPath, lecun_normal_, to_2tuple from .registry import register_model @@ -169,6 +169,11 @@ class SpatialGatingUnit(nn.Module): self.norm = norm_layer(gate_dim) self.proj = nn.Linear(seq_len, seq_len) + def init_weights(self): + # special init for the projection gate, called as override by base model init + nn.init.normal_(self.proj.weight, std=1e-6) + nn.init.ones_(self.proj.bias) + def forward(self, x): u, v = x.chunk(2, dim=-1) v = self.norm(v) @@ -205,7 +210,7 @@ class MlpMixer(nn.Module): in_chans=3, patch_size=16, num_blocks=8, - hidden_dim=512, + embed_dim=512, mlp_ratio=(0.5, 4.0), block_layer=MixerBlock, mlp_layer=Mlp, @@ -218,59 +223,71 @@ class MlpMixer(nn.Module): ): super().__init__() self.num_classes = num_classes + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models self.stem = PatchEmbed( - img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=hidden_dim, - norm_layer=norm_layer if stem_norm else None) + img_size=img_size, patch_size=patch_size, in_chans=in_chans, + embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None) # FIXME drop_path (stochastic depth scaling rule or all the same?) self.blocks = nn.Sequential(*[ block_layer( - hidden_dim, self.stem.num_patches, mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer, + embed_dim, self.stem.num_patches, mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer, act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate) for _ in range(num_blocks)]) - self.norm = norm_layer(hidden_dim) - self.head = nn.Linear(hidden_dim, self.num_classes) # zero init + self.norm = norm_layer(embed_dim) + self.head = nn.Linear(embed_dim, self.num_classes) # zero init self.init_weights(nlhb=nlhb) def init_weights(self, nlhb=False): head_bias = -math.log(self.num_classes) if nlhb else 0. - for n, m in self.named_modules(): - _init_weights(m, n, head_bias=head_bias) + named_apply(partial(_init_weights, head_bias=head_bias), module=self) # depth-first - def forward(self, x): + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): x = self.stem(x) x = self.blocks(x) x = self.norm(x) x = x.mean(dim=1) + return x + + def forward(self, x): + x = self.forward_features(x) x = self.head(x) return x -def _init_weights(m, n: str, head_bias: float = 0.): +def _init_weights(module: nn.Module, name: str, head_bias: float = 0.): """ Mixer weight initialization (trying to match Flax defaults) """ - if isinstance(m, nn.Linear): - if n.startswith('head'): - nn.init.zeros_(m.weight) - nn.init.constant_(m.bias, head_bias) - elif n.endswith('gate.proj'): - nn.init.normal_(m.weight, std=1e-4) - nn.init.ones_(m.bias) + if isinstance(module, nn.Linear): + if name.startswith('head'): + nn.init.zeros_(module.weight) + nn.init.constant_(module.bias, head_bias) else: - nn.init.xavier_uniform_(m.weight) - if m.bias is not None: - if 'mlp' in n: - nn.init.normal_(m.bias, std=1e-6) + nn.init.xavier_uniform_(module.weight) + if module.bias is not None: + if 'mlp' in name: + nn.init.normal_(module.bias, std=1e-6) else: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Conv2d): - lecun_normal_(m.weight) - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.LayerNorm): - nn.init.zeros_(m.bias) - nn.init.ones_(m.weight) + nn.init.zeros_(module.bias) + elif isinstance(module, nn.Conv2d): + lecun_normal_(module.weight) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(module.weight) + nn.init.zeros_(module.bias) + elif hasattr(module, 'init_weights'): + # NOTE if a parent module contains init_weights method, it can override the init of the + # child modules as this will be called in depth-first order. + module.init_weights() def _create_mixer(variant, pretrained=False, **kwargs): @@ -289,7 +306,7 @@ def mixer_s32_224(pretrained=False, **kwargs): """ Mixer-S/32 224x224 Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=32, num_blocks=8, hidden_dim=512, **kwargs) + model_args = dict(patch_size=32, num_blocks=8, embed_dim=512, **kwargs) model = _create_mixer('mixer_s32_224', pretrained=pretrained, **model_args) return model @@ -299,7 +316,7 @@ def mixer_s16_224(pretrained=False, **kwargs): """ Mixer-S/16 224x224 Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=16, num_blocks=8, hidden_dim=512, **kwargs) + model_args = dict(patch_size=16, num_blocks=8, embed_dim=512, **kwargs) model = _create_mixer('mixer_s16_224', pretrained=pretrained, **model_args) return model @@ -309,7 +326,7 @@ def mixer_b32_224(pretrained=False, **kwargs): """ Mixer-B/32 224x224 Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=32, num_blocks=12, hidden_dim=768, **kwargs) + model_args = dict(patch_size=32, num_blocks=12, embed_dim=768, **kwargs) model = _create_mixer('mixer_b32_224', pretrained=pretrained, **model_args) return model @@ -319,7 +336,7 @@ def mixer_b16_224(pretrained=False, **kwargs): """ Mixer-B/16 224x224. ImageNet-1k pretrained weights. Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=16, num_blocks=12, hidden_dim=768, **kwargs) + model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs) model = _create_mixer('mixer_b16_224', pretrained=pretrained, **model_args) return model @@ -329,7 +346,7 @@ def mixer_b16_224_in21k(pretrained=False, **kwargs): """ Mixer-B/16 224x224. ImageNet-21k pretrained weights. Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=16, num_blocks=12, hidden_dim=768, **kwargs) + model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs) model = _create_mixer('mixer_b16_224_in21k', pretrained=pretrained, **model_args) return model @@ -339,7 +356,7 @@ def mixer_l32_224(pretrained=False, **kwargs): """ Mixer-L/32 224x224. Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=32, num_blocks=24, hidden_dim=1024, **kwargs) + model_args = dict(patch_size=32, num_blocks=24, embed_dim=1024, **kwargs) model = _create_mixer('mixer_l32_224', pretrained=pretrained, **model_args) return model @@ -349,7 +366,7 @@ def mixer_l16_224(pretrained=False, **kwargs): """ Mixer-L/16 224x224. ImageNet-1k pretrained weights. Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=16, num_blocks=24, hidden_dim=1024, **kwargs) + model_args = dict(patch_size=16, num_blocks=24, embed_dim=1024, **kwargs) model = _create_mixer('mixer_l16_224', pretrained=pretrained, **model_args) return model @@ -359,35 +376,38 @@ def mixer_l16_224_in21k(pretrained=False, **kwargs): """ Mixer-L/16 224x224. ImageNet-21k pretrained weights. Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=16, num_blocks=24, hidden_dim=1024, **kwargs) + model_args = dict(patch_size=16, num_blocks=24, embed_dim=1024, **kwargs) model = _create_mixer('mixer_l16_224_in21k', pretrained=pretrained, **model_args) return model + @register_model def mixer_b16_224_miil(pretrained=False, **kwargs): """ Mixer-B/16 224x224. ImageNet-21k pretrained weights. Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K """ - model_args = dict(patch_size=16, num_blocks=12, hidden_dim=768, **kwargs) + model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs) model = _create_mixer('mixer_b16_224_miil', pretrained=pretrained, **model_args) return model + @register_model def mixer_b16_224_miil_in21k(pretrained=False, **kwargs): """ Mixer-B/16 224x224. ImageNet-1k pretrained weights. Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K """ - model_args = dict(patch_size=16, num_blocks=12, hidden_dim=768, **kwargs) + model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs) model = _create_mixer('mixer_b16_224_miil_in21k', pretrained=pretrained, **model_args) return model + @register_model def gmixer_12_224(pretrained=False, **kwargs): """ Glu-Mixer-12 224x224 (short & fat) Experiment by Ross Wightman, adding (Si)GLU to MLP-Mixer """ model_args = dict( - patch_size=20, num_blocks=12, hidden_dim=512, mlp_ratio=(1.0, 6.0), + patch_size=16, num_blocks=12, embed_dim=512, mlp_ratio=(1.0, 6.0), mlp_layer=GluMlp, act_layer=nn.SiLU, **kwargs) model = _create_mixer('gmixer_12_224', pretrained=pretrained, **model_args) return model @@ -399,7 +419,7 @@ def gmixer_24_224(pretrained=False, **kwargs): Experiment by Ross Wightman, adding (Si)GLU to MLP-Mixer """ model_args = dict( - patch_size=20, num_blocks=24, hidden_dim=384, mlp_ratio=(1.0, 6.0), + patch_size=16, num_blocks=24, embed_dim=384, mlp_ratio=(1.0, 6.0), mlp_layer=GluMlp, act_layer=nn.SiLU, **kwargs) model = _create_mixer('gmixer_24_224', pretrained=pretrained, **model_args) return model @@ -411,7 +431,7 @@ def resmlp_12_224(pretrained=False, **kwargs): Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 """ model_args = dict( - patch_size=16, num_blocks=12, hidden_dim=384, mlp_ratio=4, block_layer=ResBlock, norm_layer=Affine, **kwargs) + patch_size=16, num_blocks=12, embed_dim=384, mlp_ratio=4, block_layer=ResBlock, norm_layer=Affine, **kwargs) model = _create_mixer('resmlp_12_224', pretrained=pretrained, **model_args) return model @@ -422,7 +442,7 @@ def resmlp_24_224(pretrained=False, **kwargs): Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 """ model_args = dict( - patch_size=16, num_blocks=24, hidden_dim=384, mlp_ratio=4, + patch_size=16, num_blocks=24, embed_dim=384, mlp_ratio=4, block_layer=partial(ResBlock, init_values=1e-5), norm_layer=Affine, **kwargs) model = _create_mixer('resmlp_24_224', pretrained=pretrained, **model_args) return model @@ -434,7 +454,7 @@ def resmlp_36_224(pretrained=False, **kwargs): Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 """ model_args = dict( - patch_size=16, num_blocks=36, hidden_dim=384, mlp_ratio=4, + patch_size=16, num_blocks=36, embed_dim=384, mlp_ratio=4, block_layer=partial(ResBlock, init_values=1e-5), norm_layer=Affine, **kwargs) model = _create_mixer('resmlp_36_224', pretrained=pretrained, **model_args) return model @@ -446,7 +466,7 @@ def gmlp_ti16_224(pretrained=False, **kwargs): Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050 """ model_args = dict( - patch_size=16, num_blocks=30, hidden_dim=128, mlp_ratio=6, block_layer=SpatialGatingBlock, + patch_size=16, num_blocks=30, embed_dim=128, mlp_ratio=6, block_layer=SpatialGatingBlock, mlp_layer=GatedMlp, **kwargs) model = _create_mixer('gmlp_ti16_224', pretrained=pretrained, **model_args) return model @@ -458,7 +478,7 @@ def gmlp_s16_224(pretrained=False, **kwargs): Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050 """ model_args = dict( - patch_size=16, num_blocks=30, hidden_dim=256, mlp_ratio=6, block_layer=SpatialGatingBlock, + patch_size=16, num_blocks=30, embed_dim=256, mlp_ratio=6, block_layer=SpatialGatingBlock, mlp_layer=GatedMlp, **kwargs) model = _create_mixer('gmlp_s16_224', pretrained=pretrained, **model_args) return model @@ -470,7 +490,7 @@ def gmlp_b16_224(pretrained=False, **kwargs): Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050 """ model_args = dict( - patch_size=16, num_blocks=30, hidden_dim=512, mlp_ratio=6, block_layer=SpatialGatingBlock, + patch_size=16, num_blocks=30, embed_dim=512, mlp_ratio=6, block_layer=SpatialGatingBlock, mlp_layer=GatedMlp, **kwargs) model = _create_mixer('gmlp_b16_224', pretrained=pretrained, **model_args) return model diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py index e85112e6..f810eb82 100644 --- a/timm/models/mobilenetv3.py +++ b/timm/models/mobilenetv3.py @@ -119,6 +119,7 @@ class MobileNetV3(nn.Module): num_pooled_chs = head_chs * self.global_pool.feat_mult() self.conv_head = create_conv2d(num_pooled_chs, self.num_features, 1, padding=pad_type, bias=head_bias) self.act2 = act_layer(inplace=True) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() # don't flatten if pooling disabled self.classifier = Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() efficientnet_init_weights(self) @@ -137,6 +138,7 @@ class MobileNetV3(nn.Module): self.num_classes = num_classes # cannot meaningfully change pooling of efficient head after creation self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) + self.flatten = nn.Flatten(1) if global_pool else nn.Identity() # don't flatten if pooling disabled self.classifier = Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() def forward_features(self, x): @@ -151,8 +153,7 @@ class MobileNetV3(nn.Module): def forward(self, x): x = self.forward_features(x) - if not self.global_pool.is_identity(): - x = x.flatten(1) + x = self.flatten(x) if self.drop_rate > 0.: x = F.dropout(x, p=self.drop_rate, training=self.training) return self.classifier(x) diff --git a/timm/models/nfnet.py b/timm/models/nfnet.py index fc0a20c2..4e0f2b21 100644 --- a/timm/models/nfnet.py +++ b/timm/models/nfnet.py @@ -111,11 +111,11 @@ default_cfgs = dict( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l1_ra2-7dce93cd.pth', pool_size=(8, 8), input_size=(3, 256, 256), test_input_size=(3, 320, 320), crop_pct=1.0), eca_nfnet_l2=_dcfg( - url='', - pool_size=(9, 9), input_size=(3, 288, 288), test_input_size=(3, 352, 352), crop_pct=1.0), + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l2_ra3-da781a61.pth', + pool_size=(10, 10), input_size=(3, 320, 320), test_input_size=(3, 384, 384), crop_pct=1.0), eca_nfnet_l3=_dcfg( url='', - pool_size=(10, 10), input_size=(3, 320, 320), test_input_size=(3, 384, 384), crop_pct=1.0), + pool_size=(11, 11), input_size=(3, 352, 352), test_input_size=(3, 448, 448), crop_pct=1.0), nf_regnet_b0=_dcfg( url='', pool_size=(6, 6), input_size=(3, 192, 192), test_input_size=(3, 256, 256), first_conv='stem.conv'), @@ -210,6 +210,7 @@ def _dm_nfnet_cfg(depths, channels=(256, 512, 1536, 1536), act_layer='gelu', ski return cfg + model_cfgs = dict( # NFNet-F models w/ GELU compatible with DeepMind weights dm_nfnet_f0=_dm_nfnet_cfg(depths=(1, 2, 6, 3)), diff --git a/timm/models/pit.py b/timm/models/pit.py index 9c350861..460824e2 100644 --- a/timm/models/pit.py +++ b/timm/models/pit.py @@ -186,12 +186,13 @@ class PoolingVisionTransformer(nn.Module): ] self.transformers = SequentialTuple(*transformers) self.norm = nn.LayerNorm(base_dims[-1] * heads[-1], eps=1e-6) - self.embed_dim = base_dims[-1] * heads[-1] + self.num_features = self.embed_dim = base_dims[-1] * heads[-1] # Classifier head self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() - self.head_dist = nn.Linear(self.embed_dim, self.num_classes) \ - if num_classes > 0 and distilled else nn.Identity() + self.head_dist = None + if distilled: + self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() trunc_normal_(self.pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) @@ -207,13 +208,16 @@ class PoolingVisionTransformer(nn.Module): return {'pos_embed', 'cls_token'} def get_classifier(self): - return self.head + if self.head_dist is not None: + return self.head, self.head_dist + else: + return self.head def reset_classifier(self, num_classes, global_pool=''): self.num_classes = num_classes self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() - self.head_dist = nn.Linear(self.embed_dim, self.num_classes) \ - if num_classes > 0 and self.num_tokens == 2 else nn.Identity() + if self.head_dist is not None: + self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() def forward_features(self, x): x = self.patch_embed(x) @@ -221,19 +225,21 @@ class PoolingVisionTransformer(nn.Module): cls_tokens = self.cls_token.expand(x.shape[0], -1, -1) x, cls_tokens = self.transformers((x, cls_tokens)) cls_tokens = self.norm(cls_tokens) - return cls_tokens + if self.head_dist is not None: + return cls_tokens[:, 0], cls_tokens[:, 1] + else: + return cls_tokens[:, 0] def forward(self, x): x = self.forward_features(x) - x_cls = self.head(x[:, 0]) - if self.num_tokens > 1: - x_dist = self.head_dist(x[:, 1]) + if self.head_dist is not None: + x, x_dist = self.head(x[0]), self.head_dist(x[1]) # x must be a tuple if self.training and not torch.jit.is_scripting(): - return x_cls, x_dist + return x, x_dist else: - return (x_cls + x_dist) / 2 + return (x + x_dist) / 2 else: - return x_cls + return self.head(x) def checkpoint_filter_fn(state_dict, model): diff --git a/timm/models/registry.py b/timm/models/registry.py index 6927b6d6..f92219b2 100644 --- a/timm/models/registry.py +++ b/timm/models/registry.py @@ -65,11 +65,18 @@ def list_models(filter='', module='', pretrained=False, exclude_filters='', name model_list('*resnext*, 'resnet') -- returns all models with 'resnext' in 'resnet' module """ if module: - models = list(_module_to_models[module]) + all_models = list(_module_to_models[module]) else: - models = _model_entrypoints.keys() + all_models = _model_entrypoints.keys() if filter: - models = fnmatch.filter(models, filter) # include these models + models = [] + include_filters = filter if isinstance(filter, (tuple, list)) else [filter] + for f in include_filters: + include_models = fnmatch.filter(all_models, f) # include these models + if len(include_models): + models = set(models).union(include_models) + else: + models = all_models if exclude_filters: if not isinstance(exclude_filters, (tuple, list)): exclude_filters = [exclude_filters] diff --git a/timm/models/resnet.py b/timm/models/resnet.py index 2f02f12a..66baa37a 100644 --- a/timm/models/resnet.py +++ b/timm/models/resnet.py @@ -638,12 +638,15 @@ class ResNet(nn.Module): self.num_features = 512 * block.expansion self.global_pool, self.fc = create_classifier(self.num_features, self.num_classes, pool_type=global_pool) + self.init_weights(zero_init_last_bn=zero_init_last_bn) + + def init_weights(self, zero_init_last_bn=True): for n, m in self.named_modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1.) - nn.init.constant_(m.bias, 0.) + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) if zero_init_last_bn: for m in self.modules(): if hasattr(m, 'zero_init_last_bn'): diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py index 250695a8..84b16bb2 100644 --- a/timm/models/resnetv2.py +++ b/timm/models/resnetv2.py @@ -35,9 +35,9 @@ import torch.nn as nn from functools import partial from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD -from .helpers import build_model_with_cfg +from .helpers import build_model_with_cfg, named_apply, adapt_input_conv from .registry import register_model -from .layers import GroupNormAct, ClassifierHead, DropPath, AvgPool2dSame, create_pool2d, StdConv2d +from .layers import GroupNormAct, ClassifierHead, DropPath, AvgPool2dSame, create_pool2d, StdConv2d, create_conv2d def _cfg(url='', **kwargs): @@ -86,20 +86,10 @@ default_cfgs = { url='https://storage.googleapis.com/bit_models/BiT-M-R152x4.npz', num_classes=21843), - - # trained on imagenet-1k, NOTE not overly interesting set of weights, leaving disabled for now - # 'resnetv2_50x1_bits': _cfg( - # url='https://storage.googleapis.com/bit_models/BiT-S-R50x1.npz'), - # 'resnetv2_50x3_bits': _cfg( - # url='https://storage.googleapis.com/bit_models/BiT-S-R50x3.npz'), - # 'resnetv2_101x1_bits': _cfg( - # url='https://storage.googleapis.com/bit_models/BiT-S-R101x3.npz'), - # 'resnetv2_101x3_bits': _cfg( - # url='https://storage.googleapis.com/bit_models/BiT-S-R101x3.npz'), - # 'resnetv2_152x2_bits': _cfg( - # url='https://storage.googleapis.com/bit_models/BiT-S-R152x2.npz'), - # 'resnetv2_152x4_bits': _cfg( - # url='https://storage.googleapis.com/bit_models/BiT-S-R152x4.npz'), + 'resnetv2_50': _cfg( + input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic'), + 'resnetv2_50d': _cfg( + input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic', first_conv='stem.conv1'), } @@ -111,13 +101,6 @@ def make_div(v, divisor=8): return new_v -def tf2th(conv_weights): - """Possibly convert HWIO to OIHW.""" - if conv_weights.ndim == 4: - conv_weights = conv_weights.transpose([3, 2, 0, 1]) - return torch.from_numpy(conv_weights) - - class PreActBottleneck(nn.Module): """Pre-activation (v2) bottleneck block. @@ -152,6 +135,9 @@ class PreActBottleneck(nn.Module): self.conv3 = conv_layer(mid_chs, out_chs, 1) self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0 else nn.Identity() + def zero_init_last_bn(self): + nn.init.zeros_(self.norm3.weight) + def forward(self, x): x_preact = self.norm1(x) @@ -198,6 +184,9 @@ class Bottleneck(nn.Module): self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0 else nn.Identity() self.act3 = act_layer(inplace=True) + def zero_init_last_bn(self): + nn.init.zeros_(self.norm3.weight) + def forward(self, x): # shortcut branch shortcut = x @@ -276,7 +265,7 @@ class ResNetStage(nn.Module): def create_resnetv2_stem( in_chs, out_chs=64, stem_type='', preact=True, - conv_layer=partial(StdConv2d, eps=1e-8), norm_layer=partial(GroupNormAct, num_groups=32)): + conv_layer=StdConv2d, norm_layer=partial(GroupNormAct, num_groups=32)): stem = OrderedDict() assert stem_type in ('', 'fixed', 'same', 'deep', 'deep_fixed', 'deep_same') @@ -285,14 +274,17 @@ def create_resnetv2_stem( # A 3 deep 3x3 conv stack as in ResNet V1D models mid_chs = out_chs // 2 stem['conv1'] = conv_layer(in_chs, mid_chs, kernel_size=3, stride=2) + stem['norm1'] = norm_layer(mid_chs) stem['conv2'] = conv_layer(mid_chs, mid_chs, kernel_size=3, stride=1) + stem['norm2'] = norm_layer(mid_chs) stem['conv3'] = conv_layer(mid_chs, out_chs, kernel_size=3, stride=1) + if not preact: + stem['norm3'] = norm_layer(out_chs) else: # The usual 7x7 stem conv stem['conv'] = conv_layer(in_chs, out_chs, kernel_size=7, stride=2) - - if not preact: - stem['norm'] = norm_layer(out_chs) + if not preact: + stem['norm'] = norm_layer(out_chs) if 'fixed' in stem_type: # 'fixed' SAME padding approximation that is used in BiT models @@ -312,11 +304,12 @@ class ResNetV2(nn.Module): """Implementation of Pre-activation (v2) ResNet mode. """ - def __init__(self, layers, channels=(256, 512, 1024, 2048), - num_classes=1000, in_chans=3, global_pool='avg', output_stride=32, - width_factor=1, stem_chs=64, stem_type='', avg_down=False, preact=True, - act_layer=nn.ReLU, conv_layer=partial(StdConv2d, eps=1e-8), - norm_layer=partial(GroupNormAct, num_groups=32), drop_rate=0., drop_path_rate=0.): + def __init__( + self, layers, channels=(256, 512, 1024, 2048), + num_classes=1000, in_chans=3, global_pool='avg', output_stride=32, + width_factor=1, stem_chs=64, stem_type='', avg_down=False, preact=True, + act_layer=nn.ReLU, conv_layer=StdConv2d, norm_layer=partial(GroupNormAct, num_groups=32), + drop_rate=0., drop_path_rate=0., zero_init_last_bn=True): super().__init__() self.num_classes = num_classes self.drop_rate = drop_rate @@ -354,12 +347,14 @@ class ResNetV2(nn.Module): self.head = ClassifierHead( self.num_features, num_classes, pool_type=global_pool, drop_rate=self.drop_rate, use_conv=True) - for n, m in self.named_modules(): - if isinstance(m, nn.Linear) or ('.fc' in n and isinstance(m, nn.Conv2d)): - nn.init.normal_(m.weight, mean=0.0, std=0.01) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + self.init_weights(zero_init_last_bn=zero_init_last_bn) + + def init_weights(self, zero_init_last_bn=True): + named_apply(partial(_init_weights, zero_init_last_bn=zero_init_last_bn), self) + + @torch.jit.ignore() + def load_pretrained(self, checkpoint_path, prefix='resnet/'): + _load_weights(self, checkpoint_path, prefix) def get_classifier(self): return self.head.fc @@ -378,41 +373,59 @@ class ResNetV2(nn.Module): def forward(self, x): x = self.forward_features(x) x = self.head(x) - if not self.head.global_pool.is_identity(): - x = x.flatten(1) # conv classifier, flatten if pooling isn't pass-through (disabled) return x - def load_pretrained(self, checkpoint_path, prefix='resnet/'): - import numpy as np - weights = np.load(checkpoint_path) - with torch.no_grad(): - stem_conv_w = tf2th(weights[f'{prefix}root_block/standardized_conv2d/kernel']) - if self.stem.conv.weight.shape[1] == 1: - self.stem.conv.weight.copy_(stem_conv_w.sum(dim=1, keepdim=True)) - # FIXME handle > 3 in_chans? - else: - self.stem.conv.weight.copy_(stem_conv_w) - self.norm.weight.copy_(tf2th(weights[f'{prefix}group_norm/gamma'])) - self.norm.bias.copy_(tf2th(weights[f'{prefix}group_norm/beta'])) - if self.head.fc.weight.shape[0] == weights[f'{prefix}head/conv2d/kernel'].shape[-1]: - self.head.fc.weight.copy_(tf2th(weights[f'{prefix}head/conv2d/kernel'])) - self.head.fc.bias.copy_(tf2th(weights[f'{prefix}head/conv2d/bias'])) - for i, (sname, stage) in enumerate(self.stages.named_children()): - for j, (bname, block) in enumerate(stage.blocks.named_children()): - convname = 'standardized_conv2d' - block_prefix = f'{prefix}block{i + 1}/unit{j + 1:02d}/' - block.conv1.weight.copy_(tf2th(weights[f'{block_prefix}a/{convname}/kernel'])) - block.conv2.weight.copy_(tf2th(weights[f'{block_prefix}b/{convname}/kernel'])) - block.conv3.weight.copy_(tf2th(weights[f'{block_prefix}c/{convname}/kernel'])) - block.norm1.weight.copy_(tf2th(weights[f'{block_prefix}a/group_norm/gamma'])) - block.norm2.weight.copy_(tf2th(weights[f'{block_prefix}b/group_norm/gamma'])) - block.norm3.weight.copy_(tf2th(weights[f'{block_prefix}c/group_norm/gamma'])) - block.norm1.bias.copy_(tf2th(weights[f'{block_prefix}a/group_norm/beta'])) - block.norm2.bias.copy_(tf2th(weights[f'{block_prefix}b/group_norm/beta'])) - block.norm3.bias.copy_(tf2th(weights[f'{block_prefix}c/group_norm/beta'])) - if block.downsample is not None: - w = weights[f'{block_prefix}a/proj/{convname}/kernel'] - block.downsample.conv.weight.copy_(tf2th(w)) + +def _init_weights(module: nn.Module, name: str = '', zero_init_last_bn=True): + if isinstance(module, nn.Linear) or ('head.fc' in name and isinstance(module, nn.Conv2d)): + nn.init.normal_(module.weight, mean=0.0, std=0.01) + nn.init.zeros_(module.bias) + elif isinstance(module, nn.Conv2d): + nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu') + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, (nn.BatchNorm2d, nn.LayerNorm, nn.GroupNorm)): + nn.init.ones_(module.weight) + nn.init.zeros_(module.bias) + elif zero_init_last_bn and hasattr(module, 'zero_init_last_bn'): + module.zero_init_last_bn() + + +@torch.no_grad() +def _load_weights(model: nn.Module, checkpoint_path: str, prefix: str = 'resnet/'): + import numpy as np + + def t2p(conv_weights): + """Possibly convert HWIO to OIHW.""" + if conv_weights.ndim == 4: + conv_weights = conv_weights.transpose([3, 2, 0, 1]) + return torch.from_numpy(conv_weights) + + weights = np.load(checkpoint_path) + stem_conv_w = adapt_input_conv( + model.stem.conv.weight.shape[1], t2p(weights[f'{prefix}root_block/standardized_conv2d/kernel'])) + model.stem.conv.weight.copy_(stem_conv_w) + model.norm.weight.copy_(t2p(weights[f'{prefix}group_norm/gamma'])) + model.norm.bias.copy_(t2p(weights[f'{prefix}group_norm/beta'])) + if model.head.fc.weight.shape[0] == weights[f'{prefix}head/conv2d/kernel'].shape[-1]: + model.head.fc.weight.copy_(t2p(weights[f'{prefix}head/conv2d/kernel'])) + model.head.fc.bias.copy_(t2p(weights[f'{prefix}head/conv2d/bias'])) + for i, (sname, stage) in enumerate(model.stages.named_children()): + for j, (bname, block) in enumerate(stage.blocks.named_children()): + cname = 'standardized_conv2d' + block_prefix = f'{prefix}block{i + 1}/unit{j + 1:02d}/' + block.conv1.weight.copy_(t2p(weights[f'{block_prefix}a/{cname}/kernel'])) + block.conv2.weight.copy_(t2p(weights[f'{block_prefix}b/{cname}/kernel'])) + block.conv3.weight.copy_(t2p(weights[f'{block_prefix}c/{cname}/kernel'])) + block.norm1.weight.copy_(t2p(weights[f'{block_prefix}a/group_norm/gamma'])) + block.norm2.weight.copy_(t2p(weights[f'{block_prefix}b/group_norm/gamma'])) + block.norm3.weight.copy_(t2p(weights[f'{block_prefix}c/group_norm/gamma'])) + block.norm1.bias.copy_(t2p(weights[f'{block_prefix}a/group_norm/beta'])) + block.norm2.bias.copy_(t2p(weights[f'{block_prefix}b/group_norm/beta'])) + block.norm3.bias.copy_(t2p(weights[f'{block_prefix}c/group_norm/beta'])) + if block.downsample is not None: + w = weights[f'{block_prefix}a/proj/{cname}/kernel'] + block.downsample.conv.weight.copy_(t2p(w)) def _create_resnetv2(variant, pretrained=False, **kwargs): @@ -425,130 +438,99 @@ def _create_resnetv2(variant, pretrained=False, **kwargs): **kwargs) +def _create_resnetv2_bit(variant, pretrained=False, **kwargs): + return _create_resnetv2( + variant, pretrained=pretrained, stem_type='fixed', conv_layer=partial(StdConv2d, eps=1e-8), **kwargs) + + @register_model def resnetv2_50x1_bitm(pretrained=False, **kwargs): - return _create_resnetv2( - 'resnetv2_50x1_bitm', pretrained=pretrained, - layers=[3, 4, 6, 3], width_factor=1, stem_type='fixed', **kwargs) + return _create_resnetv2_bit( + 'resnetv2_50x1_bitm', pretrained=pretrained, layers=[3, 4, 6, 3], width_factor=1, **kwargs) @register_model def resnetv2_50x3_bitm(pretrained=False, **kwargs): - return _create_resnetv2( - 'resnetv2_50x3_bitm', pretrained=pretrained, - layers=[3, 4, 6, 3], width_factor=3, stem_type='fixed', **kwargs) + return _create_resnetv2_bit( + 'resnetv2_50x3_bitm', pretrained=pretrained, layers=[3, 4, 6, 3], width_factor=3, **kwargs) @register_model def resnetv2_101x1_bitm(pretrained=False, **kwargs): - return _create_resnetv2( - 'resnetv2_101x1_bitm', pretrained=pretrained, - layers=[3, 4, 23, 3], width_factor=1, stem_type='fixed', **kwargs) + return _create_resnetv2_bit( + 'resnetv2_101x1_bitm', pretrained=pretrained, layers=[3, 4, 23, 3], width_factor=1, **kwargs) @register_model def resnetv2_101x3_bitm(pretrained=False, **kwargs): - return _create_resnetv2( - 'resnetv2_101x3_bitm', pretrained=pretrained, - layers=[3, 4, 23, 3], width_factor=3, stem_type='fixed', **kwargs) + return _create_resnetv2_bit( + 'resnetv2_101x3_bitm', pretrained=pretrained, layers=[3, 4, 23, 3], width_factor=3, **kwargs) @register_model def resnetv2_152x2_bitm(pretrained=False, **kwargs): - return _create_resnetv2( - 'resnetv2_152x2_bitm', pretrained=pretrained, - layers=[3, 8, 36, 3], width_factor=2, stem_type='fixed', **kwargs) + return _create_resnetv2_bit( + 'resnetv2_152x2_bitm', pretrained=pretrained, layers=[3, 8, 36, 3], width_factor=2, **kwargs) @register_model def resnetv2_152x4_bitm(pretrained=False, **kwargs): - return _create_resnetv2( - 'resnetv2_152x4_bitm', pretrained=pretrained, - layers=[3, 8, 36, 3], width_factor=4, stem_type='fixed', **kwargs) + return _create_resnetv2_bit( + 'resnetv2_152x4_bitm', pretrained=pretrained, layers=[3, 8, 36, 3], width_factor=4, **kwargs) @register_model def resnetv2_50x1_bitm_in21k(pretrained=False, **kwargs): - return _create_resnetv2( + return _create_resnetv2_bit( 'resnetv2_50x1_bitm_in21k', pretrained=pretrained, num_classes=kwargs.pop('num_classes', 21843), - layers=[3, 4, 6, 3], width_factor=1, stem_type='fixed', **kwargs) + layers=[3, 4, 6, 3], width_factor=1, **kwargs) @register_model def resnetv2_50x3_bitm_in21k(pretrained=False, **kwargs): - return _create_resnetv2( + return _create_resnetv2_bit( 'resnetv2_50x3_bitm_in21k', pretrained=pretrained, num_classes=kwargs.pop('num_classes', 21843), - layers=[3, 4, 6, 3], width_factor=3, stem_type='fixed', **kwargs) + layers=[3, 4, 6, 3], width_factor=3, **kwargs) @register_model def resnetv2_101x1_bitm_in21k(pretrained=False, **kwargs): return _create_resnetv2( 'resnetv2_101x1_bitm_in21k', pretrained=pretrained, num_classes=kwargs.pop('num_classes', 21843), - layers=[3, 4, 23, 3], width_factor=1, stem_type='fixed', **kwargs) + layers=[3, 4, 23, 3], width_factor=1, **kwargs) @register_model def resnetv2_101x3_bitm_in21k(pretrained=False, **kwargs): - return _create_resnetv2( + return _create_resnetv2_bit( 'resnetv2_101x3_bitm_in21k', pretrained=pretrained, num_classes=kwargs.pop('num_classes', 21843), - layers=[3, 4, 23, 3], width_factor=3, stem_type='fixed', **kwargs) + layers=[3, 4, 23, 3], width_factor=3, **kwargs) @register_model def resnetv2_152x2_bitm_in21k(pretrained=False, **kwargs): - return _create_resnetv2( + return _create_resnetv2_bit( 'resnetv2_152x2_bitm_in21k', pretrained=pretrained, num_classes=kwargs.pop('num_classes', 21843), - layers=[3, 8, 36, 3], width_factor=2, stem_type='fixed', **kwargs) + layers=[3, 8, 36, 3], width_factor=2, **kwargs) @register_model def resnetv2_152x4_bitm_in21k(pretrained=False, **kwargs): - return _create_resnetv2( + return _create_resnetv2_bit( 'resnetv2_152x4_bitm_in21k', pretrained=pretrained, num_classes=kwargs.pop('num_classes', 21843), - layers=[3, 8, 36, 3], width_factor=4, stem_type='fixed', **kwargs) + layers=[3, 8, 36, 3], width_factor=4, **kwargs) -# NOTE the 'S' versions of the model weights arent as interesting as original 21k or transfer to 1K M. +@register_model +def resnetv2_50(pretrained=False, **kwargs): + return _create_resnetv2( + 'resnetv2_50', pretrained=pretrained, + layers=[3, 4, 6, 3], conv_layer=create_conv2d, norm_layer=nn.BatchNorm2d, **kwargs) -# @register_model -# def resnetv2_50x1_bits(pretrained=False, **kwargs): -# return _create_resnetv2( -# 'resnetv2_50x1_bits', pretrained=pretrained, -# layers=[3, 4, 6, 3], width_factor=1, stem_type='fixed', **kwargs) -# -# -# @register_model -# def resnetv2_50x3_bits(pretrained=False, **kwargs): -# return _create_resnetv2( -# 'resnetv2_50x3_bits', pretrained=pretrained, -# layers=[3, 4, 6, 3], width_factor=3, stem_type='fixed', **kwargs) -# -# -# @register_model -# def resnetv2_101x1_bits(pretrained=False, **kwargs): -# return _create_resnetv2( -# 'resnetv2_101x1_bits', pretrained=pretrained, -# layers=[3, 4, 23, 3], width_factor=1, stem_type='fixed', **kwargs) -# -# -# @register_model -# def resnetv2_101x3_bits(pretrained=False, **kwargs): -# return _create_resnetv2( -# 'resnetv2_101x3_bits', pretrained=pretrained, -# layers=[3, 4, 23, 3], width_factor=3, stem_type='fixed', **kwargs) -# -# -# @register_model -# def resnetv2_152x2_bits(pretrained=False, **kwargs): -# return _create_resnetv2( -# 'resnetv2_152x2_bits', pretrained=pretrained, -# layers=[3, 8, 36, 3], width_factor=2, stem_type='fixed', **kwargs) -# -# -# @register_model -# def resnetv2_152x4_bits(pretrained=False, **kwargs): -# return _create_resnetv2( -# 'resnetv2_152x4_bits', pretrained=pretrained, -# layers=[3, 8, 36, 3], width_factor=4, stem_type='fixed', **kwargs) -# + +@register_model +def resnetv2_50d(pretrained=False, **kwargs): + return _create_resnetv2( + 'resnetv2_50d', pretrained=pretrained, + layers=[3, 4, 6, 3], conv_layer=create_conv2d, norm_layer=nn.BatchNorm2d, + stem_type='deep', avg_down=True, **kwargs) diff --git a/timm/models/swin_transformer.py b/timm/models/swin_transformer.py index a845f505..2ee106d2 100644 --- a/timm/models/swin_transformer.py +++ b/timm/models/swin_transformer.py @@ -126,19 +126,18 @@ class WindowAttention(nn.Module): window_size (tuple[int]): The height and width of the window. num_heads (int): Number of attention heads. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True - qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 proj_drop (float, optional): Dropout ratio of output. Default: 0.0 """ - def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.): super().__init__() self.dim = dim self.window_size = window_size # Wh, Ww self.num_heads = num_heads head_dim = dim // num_heads - self.scale = qk_scale or head_dim ** -0.5 + self.scale = head_dim ** -0.5 # define a parameter table of relative position bias self.relative_position_bias_table = nn.Parameter( @@ -210,7 +209,6 @@ class SwinTransformerBlock(nn.Module): shift_size (int): Shift size for SW-MSA. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True - qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float, optional): Stochastic depth rate. Default: 0.0 @@ -219,7 +217,7 @@ class SwinTransformerBlock(nn.Module): """ def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0, - mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0., + mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): super().__init__() self.dim = dim @@ -236,8 +234,8 @@ class SwinTransformerBlock(nn.Module): self.norm1 = norm_layer(dim) self.attn = WindowAttention( - dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, - qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, qkv_bias=qkv_bias, + attn_drop=attn_drop, proj_drop=drop) self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) @@ -369,7 +367,6 @@ class BasicLayer(nn.Module): window_size (int): Local window size. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True - qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 @@ -379,7 +376,7 @@ class BasicLayer(nn.Module): """ def __init__(self, dim, input_resolution, depth, num_heads, window_size, - mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., + mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False): super().__init__() @@ -390,14 +387,11 @@ class BasicLayer(nn.Module): # build blocks self.blocks = nn.ModuleList([ - SwinTransformerBlock(dim=dim, input_resolution=input_resolution, - num_heads=num_heads, window_size=window_size, - shift_size=0 if (i % 2 == 0) else window_size // 2, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop, attn_drop=attn_drop, - drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, - norm_layer=norm_layer) + SwinTransformerBlock( + dim=dim, input_resolution=input_resolution, num_heads=num_heads, window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, drop=drop, attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, norm_layer=norm_layer) for i in range(depth)]) # patch merging layer @@ -436,7 +430,6 @@ class SwinTransformer(nn.Module): window_size (int): Window size. Default: 7 mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True - qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None drop_rate (float): Dropout rate. Default: 0 attn_drop_rate (float): Attention dropout rate. Default: 0 drop_path_rate (float): Stochastic depth rate. Default: 0.1 @@ -448,7 +441,7 @@ class SwinTransformer(nn.Module): def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000, embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), - window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None, + window_size=7, mlp_ratio=4., qkv_bias=True, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=nn.LayerNorm, ape=False, patch_norm=True, use_checkpoint=False, weight_init='', **kwargs): @@ -491,8 +484,9 @@ class SwinTransformer(nn.Module): num_heads=num_heads[i_layer], window_size=window_size, mlp_ratio=self.mlp_ratio, - qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, + qkv_bias=qkv_bias, + drop=drop_rate, + attn_drop=attn_drop_rate, drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], norm_layer=norm_layer, downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, @@ -520,6 +514,13 @@ class SwinTransformer(nn.Module): def no_weight_decay_keywords(self): return {'relative_position_bias_table'} + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() + def forward_features(self, x): x = self.patch_embed(x) if self.absolute_pos_embed is not None: diff --git a/timm/models/twins.py b/timm/models/twins.py index 793d2ede..4aed09d9 100644 --- a/timm/models/twins.py +++ b/timm/models/twins.py @@ -278,6 +278,8 @@ class Twins(nn.Module): super().__init__() self.num_classes = num_classes self.depths = depths + self.embed_dims = embed_dims + self.num_features = embed_dims[-1] img_size = to_2tuple(img_size) prev_chs = in_chans @@ -303,10 +305,10 @@ class Twins(nn.Module): self.pos_block = nn.ModuleList([PosConv(embed_dim, embed_dim) for embed_dim in embed_dims]) - self.norm = norm_layer(embed_dims[-1]) + self.norm = norm_layer(self.num_features) # classification head - self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else nn.Identity() + self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() # init weights self.apply(self._init_weights) @@ -320,7 +322,7 @@ class Twins(nn.Module): def reset_classifier(self, num_classes, global_pool=''): self.num_classes = num_classes - self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() def _init_weights(self, m): if isinstance(m, nn.Linear): diff --git a/timm/models/visformer.py b/timm/models/visformer.py index 5583ea3c..16631027 100644 --- a/timm/models/visformer.py +++ b/timm/models/visformer.py @@ -13,7 +13,7 @@ import torch.nn.functional as F from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .helpers import build_model_with_cfg, overlay_external_default_cfg -from .layers import to_2tuple, trunc_normal_, DropPath, PatchEmbed, LayerNorm2d +from .layers import to_2tuple, trunc_normal_, DropPath, PatchEmbed, LayerNorm2d, create_classifier from .registry import register_model @@ -140,14 +140,14 @@ class Visformer(nn.Module): def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, init_channels=32, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4., drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=LayerNorm2d, attn_stage='111', pos_embed=True, spatial_conv='111', - vit_stem=False, group=8, pool=True, conv_init=False, embed_norm=None): + vit_stem=False, group=8, global_pool='avg', conv_init=False, embed_norm=None): super().__init__() + img_size = to_2tuple(img_size) self.num_classes = num_classes - self.num_features = self.embed_dim = embed_dim + self.embed_dim = embed_dim self.init_channels = init_channels self.img_size = img_size self.vit_stem = vit_stem - self.pool = pool self.conv_init = conv_init if isinstance(depth, (list, tuple)): self.stage_num1, self.stage_num2, self.stage_num3 = depth @@ -164,31 +164,31 @@ class Visformer(nn.Module): self.patch_embed1 = PatchEmbed( img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, norm_layer=embed_norm, flatten=False) - img_size //= 16 + img_size = [x // 16 for x in img_size] else: if self.init_channels is None: self.stem = None self.patch_embed1 = PatchEmbed( img_size=img_size, patch_size=patch_size // 2, in_chans=in_chans, embed_dim=embed_dim // 2, norm_layer=embed_norm, flatten=False) - img_size //= 8 + img_size = [x // 8 for x in img_size] else: self.stem = nn.Sequential( nn.Conv2d(in_chans, self.init_channels, 7, stride=2, padding=3, bias=False), nn.BatchNorm2d(self.init_channels), nn.ReLU(inplace=True) ) - img_size //= 2 + img_size = [x // 2 for x in img_size] self.patch_embed1 = PatchEmbed( img_size=img_size, patch_size=patch_size // 4, in_chans=self.init_channels, embed_dim=embed_dim // 2, norm_layer=embed_norm, flatten=False) - img_size //= 4 + img_size = [x // 4 for x in img_size] if self.pos_embed: if self.vit_stem: - self.pos_embed1 = nn.Parameter(torch.zeros(1, embed_dim, img_size, img_size)) + self.pos_embed1 = nn.Parameter(torch.zeros(1, embed_dim, *img_size)) else: - self.pos_embed1 = nn.Parameter(torch.zeros(1, embed_dim//2, img_size, img_size)) + self.pos_embed1 = nn.Parameter(torch.zeros(1, embed_dim//2, *img_size)) self.pos_drop = nn.Dropout(p=drop_rate) self.stage1 = nn.ModuleList([ Block( @@ -199,14 +199,14 @@ class Visformer(nn.Module): for i in range(self.stage_num1) ]) - #stage2 + # stage2 if not self.vit_stem: self.patch_embed2 = PatchEmbed( img_size=img_size, patch_size=patch_size // 8, in_chans=embed_dim // 2, embed_dim=embed_dim, norm_layer=embed_norm, flatten=False) - img_size //= 2 + img_size = [x // 2 for x in img_size] if self.pos_embed: - self.pos_embed2 = nn.Parameter(torch.zeros(1, embed_dim, img_size, img_size)) + self.pos_embed2 = nn.Parameter(torch.zeros(1, embed_dim, *img_size)) self.stage2 = nn.ModuleList([ Block( dim=embed_dim, num_heads=num_heads, head_dim_ratio=1.0, mlp_ratio=mlp_ratio, @@ -221,9 +221,9 @@ class Visformer(nn.Module): self.patch_embed3 = PatchEmbed( img_size=img_size, patch_size=patch_size // 8, in_chans=embed_dim, embed_dim=embed_dim * 2, norm_layer=embed_norm, flatten=False) - img_size //= 2 + img_size = [x // 2 for x in img_size] if self.pos_embed: - self.pos_embed3 = nn.Parameter(torch.zeros(1, embed_dim*2, img_size, img_size)) + self.pos_embed3 = nn.Parameter(torch.zeros(1, embed_dim*2, *img_size)) self.stage3 = nn.ModuleList([ Block( dim=embed_dim*2, num_heads=num_heads, head_dim_ratio=1.0, mlp_ratio=mlp_ratio, @@ -234,11 +234,10 @@ class Visformer(nn.Module): ]) # head - if self.pool: - self.global_pooling = nn.AdaptiveAvgPool2d(1) - head_dim = embed_dim if self.vit_stem else embed_dim * 2 - self.norm = norm_layer(head_dim) - self.head = nn.Linear(head_dim, num_classes) + self.num_features = embed_dim if self.vit_stem else embed_dim * 2 + self.norm = norm_layer(self.num_features) + self.global_pool, self.head = create_classifier(self.num_features, self.num_classes, pool_type=global_pool) + self.head = nn.Linear(self.num_features, num_classes) # weights init if self.pos_embed: @@ -267,7 +266,14 @@ class Visformer(nn.Module): if m.bias is not None: nn.init.constant_(m.bias, 0.) - def forward(self, x): + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool='avg'): + self.num_classes = num_classes + self.global_pool, self.head = create_classifier(self.num_features, self.num_classes, pool_type=global_pool) + + def forward_features(self, x): if self.stem is not None: x = self.stem(x) @@ -297,14 +303,13 @@ class Visformer(nn.Module): for b in self.stage3: x = b(x) - # head x = self.norm(x) - if self.pool: - x = self.global_pooling(x) - else: - x = x[:, :, 0, 0] + return x - x = self.head(x.view(x.size(0), -1)) + def forward(self, x): + x = self.forward_features(x) + x = self.global_pool(x) + x = self.head(x) return x @@ -321,7 +326,7 @@ def _create_visformer(variant, pretrained=False, default_cfg=None, **kwargs): @register_model def visformer_tiny(pretrained=False, **kwargs): model_cfg = dict( - img_size=224, init_channels=16, embed_dim=192, depth=(7, 4, 4), num_heads=3, mlp_ratio=4., group=8, + init_channels=16, embed_dim=192, depth=(7, 4, 4), num_heads=3, mlp_ratio=4., group=8, attn_stage='011', spatial_conv='100', norm_layer=nn.BatchNorm2d, conv_init=True, embed_norm=nn.BatchNorm2d, **kwargs) model = _create_visformer('visformer_tiny', pretrained=pretrained, **model_cfg) @@ -331,7 +336,7 @@ def visformer_tiny(pretrained=False, **kwargs): @register_model def visformer_small(pretrained=False, **kwargs): model_cfg = dict( - img_size=224, init_channels=32, embed_dim=384, depth=(7, 4, 4), num_heads=6, mlp_ratio=4., group=8, + init_channels=32, embed_dim=384, depth=(7, 4, 4), num_heads=6, mlp_ratio=4., group=8, attn_stage='011', spatial_conv='100', norm_layer=nn.BatchNorm2d, conv_init=True, embed_norm=nn.BatchNorm2d, **kwargs) model = _create_visformer('visformer_small', pretrained=pretrained, **model_cfg) diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index ff74d836..c44358df 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -28,7 +28,7 @@ import torch.nn as nn import torch.nn.functional as F from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from .helpers import build_model_with_cfg, overlay_external_default_cfg +from .helpers import build_model_with_cfg, named_apply, adapt_input_conv from .layers import PatchEmbed, Mlp, DropPath, trunc_normal_, lecun_normal_ from .registry import register_model @@ -47,9 +47,18 @@ def _cfg(url='', **kwargs): default_cfgs = { - # patch models (my experiments) + # FIXME weights coming + 'vit_tiny_patch16_224': _cfg( + url='', + mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), + ), 'vit_small_patch16_224': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth', + url='', + mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), + ), + 'vit_small_patch32_224': _cfg( + url='', + mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), ), # patch models (weights ported from official Google JAX impl) @@ -97,29 +106,29 @@ default_cfgs = { num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), # deit models (FB weights) - 'vit_deit_tiny_patch16_224': _cfg( + 'deit_tiny_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth'), - 'vit_deit_small_patch16_224': _cfg( + 'deit_small_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth'), - 'vit_deit_base_patch16_224': _cfg( + 'deit_base_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth',), - 'vit_deit_base_patch16_384': _cfg( + 'deit_base_patch16_384': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_384-8de9b5d1.pth', input_size=(3, 384, 384), crop_pct=1.0), - 'vit_deit_tiny_distilled_patch16_224': _cfg( + 'deit_tiny_distilled_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_tiny_distilled_patch16_224-b40b3cf7.pth', classifier=('head', 'head_dist')), - 'vit_deit_small_distilled_patch16_224': _cfg( + 'deit_small_distilled_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_small_distilled_patch16_224-649709d9.pth', classifier=('head', 'head_dist')), - 'vit_deit_base_distilled_patch16_224': _cfg( + 'deit_base_distilled_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_224-df68dfff.pth', classifier=('head', 'head_dist')), - 'vit_deit_base_distilled_patch16_384': _cfg( + 'deit_base_distilled_patch16_384': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_384-d0272ac0.pth', input_size=(3, 384, 384), crop_pct=1.0, classifier=('head', 'head_dist')), - # ViT ImageNet-21K-P pretraining + # ViT ImageNet-21K-P pretraining by MILL 'vit_base_patch16_224_miil_in21k': _cfg( url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/vit_base_patch16_224_in21k_miil.pth', mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221, @@ -133,11 +142,11 @@ default_cfgs = { class Attention(nn.Module): - def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.): super().__init__() self.num_heads = num_heads head_dim = dim // num_heads - self.scale = qk_scale or head_dim ** -0.5 + self.scale = head_dim ** -0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) @@ -161,12 +170,11 @@ class Attention(nn.Module): class Block(nn.Module): - def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): super().__init__() self.norm1 = norm_layer(dim) - self.attn = Attention( - dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) @@ -190,7 +198,7 @@ class VisionTransformer(nn.Module): """ def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, - num_heads=12, mlp_ratio=4., qkv_bias=True, qk_scale=None, representation_size=None, distilled=False, + num_heads=12, mlp_ratio=4., qkv_bias=True, representation_size=None, distilled=False, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., embed_layer=PatchEmbed, norm_layer=None, act_layer=None, weight_init=''): """ @@ -204,7 +212,6 @@ class VisionTransformer(nn.Module): num_heads (int): number of attention heads mlp_ratio (int): ratio of mlp hidden dim to embedding dim qkv_bias (bool): enable bias for qkv if True - qk_scale (float): override default qk scale of head_dim ** -0.5 if set representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set distilled (bool): model includes a distillation token and head as in DeiT models drop_rate (float): dropout rate @@ -233,8 +240,8 @@ class VisionTransformer(nn.Module): dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule self.blocks = nn.Sequential(*[ Block( - dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, - drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, act_layer=act_layer) + dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, + attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, act_layer=act_layer) for i in range(depth)]) self.norm = norm_layer(embed_dim) @@ -254,16 +261,17 @@ class VisionTransformer(nn.Module): if distilled: self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() - # Weight init - assert weight_init in ('jax', 'jax_nlhb', 'nlhb', '') - head_bias = -math.log(self.num_classes) if 'nlhb' in weight_init else 0. + self.init_weights(weight_init) + + def init_weights(self, mode=''): + assert mode in ('jax', 'jax_nlhb', 'nlhb', '') + head_bias = -math.log(self.num_classes) if 'nlhb' in mode else 0. trunc_normal_(self.pos_embed, std=.02) if self.dist_token is not None: trunc_normal_(self.dist_token, std=.02) - if weight_init.startswith('jax'): + if mode.startswith('jax'): # leave cls token as zeros to match jax impl - for n, m in self.named_modules(): - _init_vit_weights(m, n, head_bias=head_bias, jax_impl=True) + named_apply(partial(_init_vit_weights, head_bias=head_bias, jax_impl=True), self) else: trunc_normal_(self.cls_token, std=.02) self.apply(_init_vit_weights) @@ -272,6 +280,10 @@ class VisionTransformer(nn.Module): # this fn left here for compat with downstream users _init_vit_weights(m) + @torch.jit.ignore() + def load_pretrained(self, checkpoint_path, prefix=''): + _load_weights(self, checkpoint_path, prefix) + @torch.jit.ignore def no_weight_decay(self): return {'pos_embed', 'cls_token', 'dist_token'} @@ -317,39 +329,92 @@ class VisionTransformer(nn.Module): return x -def _init_vit_weights(m, n: str = '', head_bias: float = 0., jax_impl: bool = False): +def _init_vit_weights(module: nn.Module, name: str = '', head_bias: float = 0., jax_impl: bool = False): """ ViT weight initialization * When called without n, head_bias, jax_impl args it will behave exactly the same as my original init for compatibility with prev hparam / downstream use cases (ie DeiT). * When called w/ valid n (module name) and jax_impl=True, will (hopefully) match JAX impl """ - if isinstance(m, nn.Linear): - if n.startswith('head'): - nn.init.zeros_(m.weight) - nn.init.constant_(m.bias, head_bias) - elif n.startswith('pre_logits'): - lecun_normal_(m.weight) - nn.init.zeros_(m.bias) + if isinstance(module, nn.Linear): + if name.startswith('head'): + nn.init.zeros_(module.weight) + nn.init.constant_(module.bias, head_bias) + elif name.startswith('pre_logits'): + lecun_normal_(module.weight) + nn.init.zeros_(module.bias) else: if jax_impl: - nn.init.xavier_uniform_(m.weight) - if m.bias is not None: - if 'mlp' in n: - nn.init.normal_(m.bias, std=1e-6) + nn.init.xavier_uniform_(module.weight) + if module.bias is not None: + if 'mlp' in name: + nn.init.normal_(module.bias, std=1e-6) else: - nn.init.zeros_(m.bias) + nn.init.zeros_(module.bias) else: - trunc_normal_(m.weight, std=.02) - if m.bias is not None: - nn.init.zeros_(m.bias) - elif jax_impl and isinstance(m, nn.Conv2d): + trunc_normal_(module.weight, std=.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif jax_impl and isinstance(module, nn.Conv2d): # NOTE conv was left to pytorch default in my original init - lecun_normal_(m.weight) - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.LayerNorm): - nn.init.zeros_(m.bias) - nn.init.ones_(m.weight) + lecun_normal_(module.weight) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)): + nn.init.zeros_(module.bias) + nn.init.ones_(module.weight) + + +@torch.no_grad() +def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = ''): + """ Load weights from .npz checkpoints for official Google Brain Flax implementation + """ + import numpy as np + + def _n2p(w, t=True): + if t and w.ndim == 4: + w = w.transpose([3, 2, 0, 1]) + elif t and w.ndim == 3: + w = w.transpose([2, 0, 1]) + elif t and w.ndim == 2: + w = w.transpose([1, 0]) + return torch.from_numpy(w) + + w = np.load(checkpoint_path) + if not prefix: + prefix = 'opt/target/' if 'opt/target/embedding/kernel' in w else prefix + + input_conv_w = adapt_input_conv( + model.patch_embed.proj.weight.shape[1], _n2p(w[f'{prefix}embedding/kernel'])) + model.patch_embed.proj.weight.copy_(input_conv_w) + model.patch_embed.proj.bias.copy_(_n2p(w[f'{prefix}embedding/bias'])) + model.cls_token.copy_(_n2p(w[f'{prefix}cls'], t=False)) + model.pos_embed.copy_(_n2p(w[f'{prefix}Transformer/posembed_input/pos_embedding'], t=False)) + model.norm.weight.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/scale'])) + model.norm.bias.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/bias'])) + if model.head.bias.shape[0] == w[f'{prefix}head/bias'].shape[-1]: + model.head.weight.copy_(_n2p(w[f'{prefix}head/kernel'])) + model.head.bias.copy_(_n2p(w[f'{prefix}head/bias'])) + for i, block in enumerate(model.blocks.children()): + block_prefix = f'{prefix}Transformer/encoderblock_{i}/' + block.norm1.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/scale'])) + block.norm1.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/bias'])) + mha_prefix = block_prefix + 'MultiHeadDotProductAttention_1/' + block.attn.qkv.weight.copy_(torch.cat([ + _n2p(w[f'{mha_prefix}query/kernel'], t=False).flatten(1).T, + _n2p(w[f'{mha_prefix}key/kernel'], t=False).flatten(1).T, + _n2p(w[f'{mha_prefix}value/kernel'], t=False).flatten(1).T])) + block.attn.qkv.bias.copy_(torch.cat([ + _n2p(w[f'{mha_prefix}query/bias'], t=False).reshape(-1), + _n2p(w[f'{mha_prefix}key/bias'], t=False).reshape(-1), + _n2p(w[f'{mha_prefix}value/bias'], t=False).reshape(-1)])) + block.attn.proj.weight.copy_(_n2p(w[f'{mha_prefix}out/kernel']).flatten(1)) + block.attn.proj.bias.copy_(_n2p(w[f'{mha_prefix}out/bias'])) + block.mlp.fc1.weight.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_0/kernel'])) + block.mlp.fc1.bias.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_0/bias'])) + block.mlp.fc2.weight.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_1/kernel'])) + block.mlp.fc2.bias.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_1/bias'])) + block.norm2.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_2/scale'])) + block.norm2.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_2/bias'])) def resize_pos_embed(posemb, posemb_new, num_tokens=1, gs_new=()): @@ -417,23 +482,34 @@ def _create_vision_transformer(variant, pretrained=False, default_cfg=None, **kw return model +@register_model +def vit_tiny_patch16_224(pretrained=False, **kwargs): + """ ViT-Tiny (Vit-Ti/16) + """ + model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs) + model = _create_vision_transformer('vit_tiny_patch16_224', pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_small_patch16_224(pretrained=False, **kwargs): - """ My custom 'small' ViT model. embed_dim=768, depth=8, num_heads=8, mlp_ratio=3. - NOTE: - * this differs from the DeiT based 'small' definitions with embed_dim=384, depth=12, num_heads=6 - * this model does not have a bias for QKV (unlike the official ViT and DeiT models) + """ ViT-Small (ViT-S/16) + NOTE I've replaced my previous 'small' model definition and weights with the small variant from the DeiT paper """ - model_kwargs = dict( - patch_size=16, embed_dim=768, depth=8, num_heads=8, mlp_ratio=3., - qkv_bias=False, norm_layer=nn.LayerNorm, **kwargs) - if pretrained: - # NOTE my scale was wrong for original weights, leaving this here until I have better ones for this model - model_kwargs.setdefault('qk_scale', 768 ** -0.5) + model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) model = _create_vision_transformer('vit_small_patch16_224', pretrained=pretrained, **model_kwargs) return model +@register_model +def vit_small_patch32_224(pretrained=False, **kwargs): + """ ViT-Small (ViT-S/32) + """ + model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer('vit_small_patch32_224', pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_base_patch16_224(pretrained=False, **kwargs): """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). @@ -569,86 +645,86 @@ def vit_huge_patch14_224_in21k(pretrained=False, **kwargs): @register_model -def vit_deit_tiny_patch16_224(pretrained=False, **kwargs): +def deit_tiny_patch16_224(pretrained=False, **kwargs): """ DeiT-tiny model @ 224x224 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs) - model = _create_vision_transformer('vit_deit_tiny_patch16_224', pretrained=pretrained, **model_kwargs) + model = _create_vision_transformer('deit_tiny_patch16_224', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_deit_small_patch16_224(pretrained=False, **kwargs): +def deit_small_patch16_224(pretrained=False, **kwargs): """ DeiT-small model @ 224x224 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) - model = _create_vision_transformer('vit_deit_small_patch16_224', pretrained=pretrained, **model_kwargs) + model = _create_vision_transformer('deit_small_patch16_224', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_deit_base_patch16_224(pretrained=False, **kwargs): +def deit_base_patch16_224(pretrained=False, **kwargs): """ DeiT base model @ 224x224 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) - model = _create_vision_transformer('vit_deit_base_patch16_224', pretrained=pretrained, **model_kwargs) + model = _create_vision_transformer('deit_base_patch16_224', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_deit_base_patch16_384(pretrained=False, **kwargs): +def deit_base_patch16_384(pretrained=False, **kwargs): """ DeiT base model @ 384x384 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) - model = _create_vision_transformer('vit_deit_base_patch16_384', pretrained=pretrained, **model_kwargs) + model = _create_vision_transformer('deit_base_patch16_384', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_deit_tiny_distilled_patch16_224(pretrained=False, **kwargs): +def deit_tiny_distilled_patch16_224(pretrained=False, **kwargs): """ DeiT-tiny distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs) model = _create_vision_transformer( - 'vit_deit_tiny_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) + 'deit_tiny_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) return model @register_model -def vit_deit_small_distilled_patch16_224(pretrained=False, **kwargs): +def deit_small_distilled_patch16_224(pretrained=False, **kwargs): """ DeiT-small distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) model = _create_vision_transformer( - 'vit_deit_small_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) + 'deit_small_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) return model @register_model -def vit_deit_base_distilled_patch16_224(pretrained=False, **kwargs): +def deit_base_distilled_patch16_224(pretrained=False, **kwargs): """ DeiT-base distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) model = _create_vision_transformer( - 'vit_deit_base_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) + 'deit_base_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) return model @register_model -def vit_deit_base_distilled_patch16_384(pretrained=False, **kwargs): +def deit_base_distilled_patch16_384(pretrained=False, **kwargs): """ DeiT-base distilled model @ 384x384 from paper (https://arxiv.org/abs/2012.12877). ImageNet-1k weights from https://github.com/facebookresearch/deit. """ model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) model = _create_vision_transformer( - 'vit_deit_base_distilled_patch16_384', pretrained=pretrained, distilled=True, **model_kwargs) + 'deit_base_distilled_patch16_384', pretrained=pretrained, distilled=True, **model_kwargs) return model diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index 7fc0cc88..c807ee9a 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -46,8 +46,8 @@ default_cfgs = { input_size=(3, 384, 384), crop_pct=1.0), # hybrid in-1k models (mostly untrained, experimental configs w/ resnetv2 stdconv backbones) - 'vit_tiny_r_s16_p8_224': _cfg(), - 'vit_small_r_s16_p8_224': _cfg(), + 'vit_tiny_r_s16_p8_224': _cfg(first_conv='patch_embed.backbone.conv'), + 'vit_small_r_s16_p8_224': _cfg(first_conv='patch_embed.backbone.conv'), 'vit_small_r20_s16_p2_224': _cfg(), 'vit_small_r20_s16_224': _cfg(), 'vit_small_r26_s32_224': _cfg(), @@ -57,10 +57,14 @@ default_cfgs = { 'vit_large_r50_s32_224': _cfg(), # hybrid models (using timm resnet backbones) - 'vit_small_resnet26d_224': _cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), - 'vit_small_resnet50d_s16_224': _cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), - 'vit_base_resnet26d_224': _cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), - 'vit_base_resnet50d_224': _cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + 'vit_small_resnet26d_224': _cfg( + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, first_conv='patch_embed.backbone.conv1.0'), + 'vit_small_resnet50d_s16_224': _cfg( + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, first_conv='patch_embed.backbone.conv1.0'), + 'vit_base_resnet26d_224': _cfg( + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, first_conv='patch_embed.backbone.conv1.0'), + 'vit_base_resnet50d_224': _cfg( + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, first_conv='patch_embed.backbone.conv1.0'), } @@ -140,12 +144,6 @@ def vit_base_r50_s16_224_in21k(pretrained=False, **kwargs): return model -@register_model -def vit_base_resnet50_224_in21k(pretrained=False, **kwargs): - # NOTE this is forwarding to model def above for backwards compatibility - return vit_base_r50_s16_224_in21k(pretrained=pretrained, **kwargs) - - @register_model def vit_base_r50_s16_384(pretrained=False, **kwargs): """ R50+ViT-B/16 hybrid from original paper (https://arxiv.org/abs/2010.11929). @@ -158,12 +156,6 @@ def vit_base_r50_s16_384(pretrained=False, **kwargs): return model -@register_model -def vit_base_resnet50_384(pretrained=False, **kwargs): - # NOTE this is forwarding to model def above for backwards compatibility - return vit_base_r50_s16_384(pretrained=pretrained, **kwargs) - - @register_model def vit_tiny_r_s16_p8_224(pretrained=False, **kwargs): """ R+ViT-Ti/S16 w/ 8x8 patch hybrid @ 224 x 224. From 0020268d9b292a3b8ac82dcb1e21039ca32b0823 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 12 Jun 2021 23:31:24 -0700 Subject: [PATCH 16/31] Try lower max size for non_std default_cfg test --- tests/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_models.py b/tests/test_models.py index ac156806..52a8023a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -174,7 +174,7 @@ def test_model_default_cfgs_non_std(model_name, batch_size): cfg = model.default_cfg input_size = _get_input_size(model_name=model_name, target=TARGET_FWD_SIZE) - if max(input_size) > MAX_FWD_SIZE: + if max(input_size) > 320: # FIXME const pytest.skip("Fixed input size model > limit.") input_tensor = torch.randn((batch_size, *input_size)) From 8319e0c37357e162f0e870c08621f145a8e76830 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sun, 13 Jun 2021 12:31:06 -0700 Subject: [PATCH 17/31] Add file docstring to std_conv.py --- timm/models/layers/std_conv.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/timm/models/layers/std_conv.py b/timm/models/layers/std_conv.py index 49b35875..3ccc16e1 100644 --- a/timm/models/layers/std_conv.py +++ b/timm/models/layers/std_conv.py @@ -1,3 +1,21 @@ +""" Convolution with Weight Standardization (StdConv and ScaledStdConv) + +StdConv: +@article{weightstandardization, + author = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille}, + title = {Weight Standardization}, + journal = {arXiv preprint arXiv:1903.10520}, + year = {2019}, +} +Code: https://github.com/joe-siyuan-qiao/WeightStandardization + +ScaledStdConv: +Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` + - https://arxiv.org/abs/2101.08692 +Official Deepmind JAX code: https://github.com/deepmind/deepmind-research/tree/master/nfnets + +Hacked together by / copyright Ross Wightman, 2021. +""" import torch import torch.nn as nn import torch.nn.functional as F @@ -5,12 +23,6 @@ import torch.nn.functional as F from .padding import get_padding, get_padding_value, pad_same -def get_weight(module): - std, mean = torch.std_mean(module.weight, dim=[1, 2, 3], keepdim=True, unbiased=False) - weight = (module.weight - mean) / (std + module.eps) - return weight - - class StdConv2d(nn.Conv2d): """Conv2d with Weight Standardization. Used for BiT ResNet-V2 models. @@ -30,7 +42,7 @@ class StdConv2d(nn.Conv2d): def forward(self, x): weight = F.batch_norm( self.weight.view(1, self.out_channels, -1), None, None, - eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + training=True, momentum=0., eps=self.eps).reshape_as(self.weight) x = F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return x @@ -56,7 +68,7 @@ class StdConv2dSame(nn.Conv2d): x = pad_same(x, self.kernel_size, self.stride, self.dilation) weight = F.batch_norm( self.weight.view(1, self.out_channels, -1), None, None, - eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + training=True, momentum=0., eps=self.eps).reshape_as(self.weight) x = F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return x @@ -86,7 +98,7 @@ class ScaledStdConv2d(nn.Conv2d): weight = F.batch_norm( self.weight.view(1, self.out_channels, -1), None, None, weight=(self.gain * self.scale).view(-1), - eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + training=True, momentum=0., eps=self.eps).reshape_as(self.weight) return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) @@ -117,5 +129,5 @@ class ScaledStdConv2dSame(nn.Conv2d): weight = F.batch_norm( self.weight.view(1, self.out_channels, -1), None, None, weight=(self.gain * self.scale).view(-1), - eps=self.eps, training=True, momentum=0.).reshape_as(self.weight) + training=True, momentum=0., eps=self.eps).reshape_as(self.weight) return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) From b9cfb64412e367a1352d46f00906453d0274282c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 14 Jun 2021 12:31:44 -0700 Subject: [PATCH 18/31] Support npz custom load for vision transformer hybrid models. Add posembed rescale for npz load. --- timm/models/layers/pool2d_same.py | 10 +- timm/models/vision_transformer.py | 96 ++++++++++++----- timm/models/vision_transformer_hybrid.py | 131 ++++++++++++++++++----- 3 files changed, 181 insertions(+), 56 deletions(-) diff --git a/timm/models/layers/pool2d_same.py b/timm/models/layers/pool2d_same.py index 5fcd0f1f..4c2a1c44 100644 --- a/timm/models/layers/pool2d_same.py +++ b/timm/models/layers/pool2d_same.py @@ -27,7 +27,8 @@ class AvgPool2dSame(nn.AvgPool2d): super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad) def forward(self, x): - return avg_pool2d_same( + x = pad_same(x, self.kernel_size, self.stride) + return F.avg_pool2d( x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) @@ -41,14 +42,15 @@ def max_pool2d_same( class MaxPool2dSame(nn.MaxPool2d): """ Tensorflow like 'SAME' wrapper for 2D max pooling """ - def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True): + def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False): kernel_size = to_2tuple(kernel_size) stride = to_2tuple(stride) dilation = to_2tuple(dilation) - super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode, count_include_pad) + super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode) def forward(self, x): - return max_pool2d_same(x, self.kernel_size, self.stride, self.padding, self.dilation, self.ceil_mode) + x = pad_same(x, self.kernel_size, self.stride, value=-float('inf')) + return F.max_pool2d(x, self.kernel_size, self.stride, (0, 0), self.dilation, self.ceil_mode) def create_pool2d(pool_type, kernel_size, stride=None, **kwargs): diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index c44358df..7dd9137e 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -52,6 +52,10 @@ default_cfgs = { url='', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), ), + 'vit_tiny_patch16_384': _cfg( + url='', + input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0 + ), 'vit_small_patch16_224': _cfg( url='', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), @@ -60,6 +64,14 @@ default_cfgs = { url='', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), ), + 'vit_small_patch16_384': _cfg( + url='', + input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0 + ), + 'vit_small_patch32_384': _cfg( + url='', + input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0 + ), # patch models (weights ported from official Google JAX impl) 'vit_base_patch16_224': _cfg( @@ -102,6 +114,7 @@ default_cfgs = { url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth', num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), 'vit_huge_patch14_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/imagenet21k/ViT-H_14.npz', hf_hub='timm/vit_huge_patch14_224_in21k', num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), @@ -371,24 +384,53 @@ def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = import numpy as np def _n2p(w, t=True): - if t and w.ndim == 4: - w = w.transpose([3, 2, 0, 1]) - elif t and w.ndim == 3: - w = w.transpose([2, 0, 1]) - elif t and w.ndim == 2: - w = w.transpose([1, 0]) + if w.ndim == 4 and w.shape[0] == w.shape[1] == w.shape[2] == 1: + w = w.flatten() + if t: + if w.ndim == 4: + w = w.transpose([3, 2, 0, 1]) + elif w.ndim == 3: + w = w.transpose([2, 0, 1]) + elif w.ndim == 2: + w = w.transpose([1, 0]) return torch.from_numpy(w) w = np.load(checkpoint_path) - if not prefix: - prefix = 'opt/target/' if 'opt/target/embedding/kernel' in w else prefix - - input_conv_w = adapt_input_conv( - model.patch_embed.proj.weight.shape[1], _n2p(w[f'{prefix}embedding/kernel'])) - model.patch_embed.proj.weight.copy_(input_conv_w) + if not prefix and 'opt/target/embedding/kernel' in w: + prefix = 'opt/target/' + + if hasattr(model.patch_embed, 'backbone'): + # hybrid + backbone = model.patch_embed.backbone + stem_only = not hasattr(backbone, 'stem') + stem = backbone if stem_only else backbone.stem + stem.conv.weight.copy_(adapt_input_conv(stem.conv.weight.shape[1], _n2p(w[f'{prefix}conv_root/kernel']))) + stem.norm.weight.copy_(_n2p(w[f'{prefix}gn_root/scale'])) + stem.norm.bias.copy_(_n2p(w[f'{prefix}gn_root/bias'])) + if not stem_only: + for i, stage in enumerate(backbone.stages): + for j, block in enumerate(stage.blocks): + bp = f'{prefix}block{i + 1}/unit{j + 1}/' + for r in range(3): + getattr(block, f'conv{r + 1}').weight.copy_(_n2p(w[f'{bp}conv{r + 1}/kernel'])) + getattr(block, f'norm{r + 1}').weight.copy_(_n2p(w[f'{bp}gn{r + 1}/scale'])) + getattr(block, f'norm{r + 1}').bias.copy_(_n2p(w[f'{bp}gn{r + 1}/bias'])) + if block.downsample is not None: + block.downsample.conv.weight.copy_(_n2p(w[f'{bp}conv_proj/kernel'])) + block.downsample.norm.weight.copy_(_n2p(w[f'{bp}gn_proj/scale'])) + block.downsample.norm.bias.copy_(_n2p(w[f'{bp}gn_proj/bias'])) + embed_conv_w = _n2p(w[f'{prefix}embedding/kernel']) + else: + embed_conv_w = adapt_input_conv( + model.patch_embed.proj.weight.shape[1], _n2p(w[f'{prefix}embedding/kernel'])) + model.patch_embed.proj.weight.copy_(embed_conv_w) model.patch_embed.proj.bias.copy_(_n2p(w[f'{prefix}embedding/bias'])) model.cls_token.copy_(_n2p(w[f'{prefix}cls'], t=False)) - model.pos_embed.copy_(_n2p(w[f'{prefix}Transformer/posembed_input/pos_embedding'], t=False)) + pos_embed_w = _n2p(w[f'{prefix}Transformer/posembed_input/pos_embedding'], t=False) + if pos_embed_w.shape != model.pos_embed.shape: + pos_embed_w = resize_pos_embed( # resize pos embedding when different size from pretrained weights + pos_embed_w, model.pos_embed, getattr(model, 'num_tokens', 1), model.patch_embed.grid_size) + model.pos_embed.copy_(pos_embed_w) model.norm.weight.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/scale'])) model.norm.bias.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/bias'])) if model.head.bias.shape[0] == w[f'{prefix}head/bias'].shape[-1]: @@ -396,23 +438,18 @@ def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = model.head.bias.copy_(_n2p(w[f'{prefix}head/bias'])) for i, block in enumerate(model.blocks.children()): block_prefix = f'{prefix}Transformer/encoderblock_{i}/' + mha_prefix = block_prefix + 'MultiHeadDotProductAttention_1/' block.norm1.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/scale'])) block.norm1.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/bias'])) - mha_prefix = block_prefix + 'MultiHeadDotProductAttention_1/' block.attn.qkv.weight.copy_(torch.cat([ - _n2p(w[f'{mha_prefix}query/kernel'], t=False).flatten(1).T, - _n2p(w[f'{mha_prefix}key/kernel'], t=False).flatten(1).T, - _n2p(w[f'{mha_prefix}value/kernel'], t=False).flatten(1).T])) + _n2p(w[f'{mha_prefix}{n}/kernel'], t=False).flatten(1).T for n in ('query', 'key', 'value')])) block.attn.qkv.bias.copy_(torch.cat([ - _n2p(w[f'{mha_prefix}query/bias'], t=False).reshape(-1), - _n2p(w[f'{mha_prefix}key/bias'], t=False).reshape(-1), - _n2p(w[f'{mha_prefix}value/bias'], t=False).reshape(-1)])) + _n2p(w[f'{mha_prefix}{n}/bias'], t=False).reshape(-1) for n in ('query', 'key', 'value')])) block.attn.proj.weight.copy_(_n2p(w[f'{mha_prefix}out/kernel']).flatten(1)) block.attn.proj.bias.copy_(_n2p(w[f'{mha_prefix}out/bias'])) - block.mlp.fc1.weight.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_0/kernel'])) - block.mlp.fc1.bias.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_0/bias'])) - block.mlp.fc2.weight.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_1/kernel'])) - block.mlp.fc2.bias.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_1/bias'])) + for r in range(2): + getattr(block.mlp, f'fc{r + 1}').weight.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_{r}/kernel'])) + getattr(block.mlp, f'fc{r + 1}').bias.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_{r}/bias'])) block.norm2.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_2/scale'])) block.norm2.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_2/bias'])) @@ -478,6 +515,7 @@ def _create_vision_transformer(variant, pretrained=False, default_cfg=None, **kw default_cfg=default_cfg, representation_size=repr_size, pretrained_filter_fn=checkpoint_filter_fn, + pretrained_custom_load='npz' in default_cfg['url'], **kwargs) return model @@ -510,6 +548,16 @@ def vit_small_patch32_224(pretrained=False, **kwargs): return model +@register_model +def vit_small_patch16_384(pretrained=False, **kwargs): + """ ViT-Small (ViT-S/16) + NOTE I've replaced my previous 'small' model definition and weights with the small variant from the DeiT paper + """ + model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer('vit_small_patch16_384', pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_base_patch16_224(pretrained=False, **kwargs): """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index c807ee9a..1bfe6685 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -35,26 +35,34 @@ def _cfg(url='', **kwargs): default_cfgs = { - # hybrid in-21k models (weights ported from official Google JAX impl where they exist) - 'vit_base_r50_s16_224_in21k': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_resnet50_224_in21k-6f7c7740.pth', - num_classes=21843, crop_pct=0.9), - - # hybrid in-1k models (weights ported from official JAX impl) - 'vit_base_r50_s16_384': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_resnet50_384-9fd3c705.pth', - input_size=(3, 384, 384), crop_pct=1.0), - - # hybrid in-1k models (mostly untrained, experimental configs w/ resnetv2 stdconv backbones) + # hybrid in-1k models (weights ported from official JAX impl where they exist) 'vit_tiny_r_s16_p8_224': _cfg(first_conv='patch_embed.backbone.conv'), + 'vit_tiny_r_s16_p8_384': _cfg( + first_conv='patch_embed.backbone.conv', input_size=(3, 384, 384), crop_pct=1.0), 'vit_small_r_s16_p8_224': _cfg(first_conv='patch_embed.backbone.conv'), 'vit_small_r20_s16_p2_224': _cfg(), 'vit_small_r20_s16_224': _cfg(), 'vit_small_r26_s32_224': _cfg(), + 'vit_small_r26_s32_384': _cfg( + input_size=(3, 384, 384), crop_pct=1.0), 'vit_base_r20_s16_224': _cfg(), 'vit_base_r26_s32_224': _cfg(), 'vit_base_r50_s16_224': _cfg(), + 'vit_base_r50_s16_384': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_resnet50_384-9fd3c705.pth', + input_size=(3, 384, 384), crop_pct=1.0), 'vit_large_r50_s32_224': _cfg(), + 'vit_large_r50_s32_384': _cfg(), + + # hybrid in-21k models (weights ported from official Google JAX impl where they exist) + 'vit_small_r26_s32_224_in21k': _cfg( + num_classes=21843, crop_pct=0.9), + 'vit_small_r26_s32_384_in21k': _cfg( + num_classes=21843, input_size=(3, 384, 384), crop_pct=1.0), + 'vit_base_r50_s16_224_in21k': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_resnet50_224_in21k-6f7c7740.pth', + num_classes=21843, crop_pct=0.9), + 'vit_large_r50_s32_224_in21k': _cfg(num_classes=21843, crop_pct=0.9), # hybrid models (using timm resnet backbones) 'vit_small_resnet26d_224': _cfg( @@ -99,7 +107,8 @@ class HybridEmbed(nn.Module): else: feature_dim = self.backbone.num_features assert feature_size[0] % patch_size[0] == 0 and feature_size[1] % patch_size[1] == 0 - self.num_patches = feature_size[0] // patch_size[0] * feature_size[1] // patch_size[1] + self.grid_size = (feature_size[0] // patch_size[0], feature_size[1] // patch_size[1]) + self.num_patches = self.grid_size[0] * self.grid_size[1] self.proj = nn.Conv2d(feature_dim, embed_dim, kernel_size=patch_size, stride=patch_size) def forward(self, x): @@ -133,37 +142,35 @@ def _resnetv2(layers=(3, 4, 9), **kwargs): @register_model -def vit_base_r50_s16_224_in21k(pretrained=False, **kwargs): - """ R50+ViT-B/16 hybrid model from original paper (https://arxiv.org/abs/2010.11929). - ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. +def vit_tiny_r_s16_p8_224(pretrained=False, **kwargs): + """ R+ViT-Ti/S16 w/ 8x8 patch hybrid @ 224 x 224. """ - backbone = _resnetv2(layers=(3, 4, 9), **kwargs) - model_kwargs = dict(embed_dim=768, depth=12, num_heads=12, representation_size=768, **kwargs) + backbone = _resnetv2(layers=(), **kwargs) + model_kwargs = dict(patch_size=8, embed_dim=192, depth=12, num_heads=3, **kwargs) model = _create_vision_transformer_hybrid( - 'vit_base_r50_s16_224_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) + 'vit_tiny_r_s16_p8_224', backbone=backbone, pretrained=pretrained, **model_kwargs) return model @register_model -def vit_base_r50_s16_384(pretrained=False, **kwargs): - """ R50+ViT-B/16 hybrid from original paper (https://arxiv.org/abs/2010.11929). - ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer. +def vit_tiny_r_s16_p8_384(pretrained=False, **kwargs): + """ R+ViT-Ti/S16 w/ 8x8 patch hybrid @ 384 x 384. """ - backbone = _resnetv2((3, 4, 9), **kwargs) - model_kwargs = dict(embed_dim=768, depth=12, num_heads=12, **kwargs) + backbone = _resnetv2(layers=(), **kwargs) + model_kwargs = dict(patch_size=8, embed_dim=192, depth=12, num_heads=3, **kwargs) model = _create_vision_transformer_hybrid( - 'vit_base_r50_s16_384', backbone=backbone, pretrained=pretrained, **model_kwargs) + 'vit_tiny_r_s16_p8_384', backbone=backbone, pretrained=pretrained, **model_kwargs) return model @register_model -def vit_tiny_r_s16_p8_224(pretrained=False, **kwargs): - """ R+ViT-Ti/S16 w/ 8x8 patch hybrid @ 224 x 224. +def vit_tiny_r_s16_p8_384(pretrained=False, **kwargs): + """ R+ViT-Ti/S16 w/ 8x8 patch hybrid @ 384 x 384. """ backbone = _resnetv2(layers=(), **kwargs) model_kwargs = dict(patch_size=8, embed_dim=192, depth=12, num_heads=3, **kwargs) model = _create_vision_transformer_hybrid( - 'vit_tiny_r_s16_p8_224', backbone=backbone, pretrained=pretrained, **model_kwargs) + 'vit_tiny_r_s16_p8_384', backbone=backbone, pretrained=pretrained, **model_kwargs) return model @@ -212,6 +219,17 @@ def vit_small_r26_s32_224(pretrained=False, **kwargs): return model +@register_model +def vit_small_r26_s32_384(pretrained=False, **kwargs): + """ R26+ViT-S/S32 hybrid. + """ + backbone = _resnetv2((2, 2, 2, 2), **kwargs) + model_kwargs = dict(embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer_hybrid( + 'vit_small_r26_s32_384', backbone=backbone, pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_base_r20_s16_224(pretrained=False, **kwargs): """ R20+ViT-B/S16 hybrid. @@ -245,17 +263,74 @@ def vit_base_r50_s16_224(pretrained=False, **kwargs): return model +@register_model +def vit_base_r50_s16_384(pretrained=False, **kwargs): + """ R50+ViT-B/16 hybrid from original paper (https://arxiv.org/abs/2010.11929). + ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer. + """ + backbone = _resnetv2((3, 4, 9), **kwargs) + model_kwargs = dict(embed_dim=768, depth=12, num_heads=12, **kwargs) + model = _create_vision_transformer_hybrid( + 'vit_base_r50_s16_384', backbone=backbone, pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_large_r50_s32_224(pretrained=False, **kwargs): """ R50+ViT-L/S32 hybrid. """ backbone = _resnetv2((3, 4, 6, 3), **kwargs) - model_kwargs = dict(embed_dim=768, depth=12, num_heads=12, **kwargs) + model_kwargs = dict(embed_dim=1024, depth=24, num_heads=16, **kwargs) model = _create_vision_transformer_hybrid( 'vit_large_r50_s32_224', backbone=backbone, pretrained=pretrained, **model_kwargs) return model +@register_model +def vit_large_r50_s32_384(pretrained=False, **kwargs): + """ R50+ViT-L/S32 hybrid. + """ + backbone = _resnetv2((3, 4, 6, 3), **kwargs) + model_kwargs = dict(embed_dim=1024, depth=24, num_heads=16, **kwargs) + model = _create_vision_transformer_hybrid( + 'vit_large_r50_s32_384', backbone=backbone, pretrained=pretrained, **model_kwargs) + return model + + +@register_model +def vit_small_r26_s32_224_in21k(pretrained=False, **kwargs): + """ R26+ViT-S/S32 hybrid. + """ + backbone = _resnetv2((2, 2, 2, 2), **kwargs) + model_kwargs = dict(embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer_hybrid( + 'vit_small_r26_s32_224_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) + return model + + +@register_model +def vit_small_r26_s32_384_in21k(pretrained=False, **kwargs): + """ R26+ViT-S/S32 hybrid. + """ + backbone = _resnetv2((2, 2, 2, 2), **kwargs) + model_kwargs = dict(embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer_hybrid( + 'vit_small_r26_s32_384_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) + return model + + +@register_model +def vit_base_r50_s16_224_in21k(pretrained=False, **kwargs): + """ R50+ViT-B/16 hybrid model from original paper (https://arxiv.org/abs/2010.11929). + ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + """ + backbone = _resnetv2(layers=(3, 4, 9), **kwargs) + model_kwargs = dict(embed_dim=768, depth=12, num_heads=12, representation_size=768, **kwargs) + model = _create_vision_transformer_hybrid( + 'vit_base_r50_s16_224_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_small_resnet26d_224(pretrained=False, **kwargs): """ Custom ViT small hybrid w/ ResNet26D stride 32. No pretrained weights. From 511a8e8c96dcbac7014aec8355f38a658ef40e49 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 14 Jun 2021 17:01:12 -0700 Subject: [PATCH 19/31] Add official ResMLP weights. --- timm/models/mlp_mixer.py | 146 +++++++++++++++++++++++++++++++++++---- 1 file changed, 134 insertions(+), 12 deletions(-) diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 637e00ea..db3a1be5 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -14,8 +14,9 @@ Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2 year={2021} } -Also supporting preliminary (not verified) implementations of ResMlp, gMLP, and possibly more... +Also supporting ResMlp, and a preliminary (not verified) implementations of gMLP +Code: https://github.com/facebookresearch/deit Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 @misc{touvron2021resmlp, title={ResMLP: Feedforward networks for image classification with data-efficient training}, @@ -94,11 +95,36 @@ default_cfgs = dict( gmixer_12_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), gmixer_24_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), - resmlp_12_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + resmlp_12_224=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlp_12_no_dist.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), resmlp_24_224=_cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resmlp_24_224_raa-a8256759.pth', - mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, crop_pct=0.89), - resmlp_36_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + url='https://dl.fbaipublicfiles.com/deit/resmlp_24_no_dist.pth', + #url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resmlp_24_224_raa-a8256759.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + resmlp_36_224=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlp_36_no_dist.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + resmlp_big_24_224=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlpB_24_no_dist.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + + resmlp_12_distilled_224=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlp_12_dist.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + resmlp_24_distilled_224=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlp_24_dist.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + resmlp_36_distilled_224=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlp_36_dist.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + resmlp_big_24_distilled_224=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlpB_24_dist.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + + resmlp_big_24_224_in22ft1k=_cfg( + url='https://dl.fbaipublicfiles.com/deit/resmlpB_24_22k.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), gmlp_ti16_224=_cfg(), gmlp_s16_224=_cfg(), @@ -266,7 +292,7 @@ class MlpMixer(nn.Module): return x -def _init_weights(module: nn.Module, name: str, head_bias: float = 0.): +def _init_weights(module: nn.Module, name: str, head_bias: float = 0., flax=False): """ Mixer weight initialization (trying to match Flax defaults) """ if isinstance(module, nn.Linear): @@ -274,12 +300,19 @@ def _init_weights(module: nn.Module, name: str, head_bias: float = 0.): nn.init.zeros_(module.weight) nn.init.constant_(module.bias, head_bias) else: - nn.init.xavier_uniform_(module.weight) - if module.bias is not None: - if 'mlp' in name: - nn.init.normal_(module.bias, std=1e-6) - else: + if flax: + # Flax defaults + lecun_normal_(module.weight) + if module.bias is not None: nn.init.zeros_(module.bias) + else: + # like MLP init in vit (my original init) + nn.init.xavier_uniform_(module.weight) + if module.bias is not None: + if 'mlp' in name: + nn.init.normal_(module.bias, std=1e-6) + else: + nn.init.zeros_(module.bias) elif isinstance(module, nn.Conv2d): lecun_normal_(module.weight) if module.bias is not None: @@ -293,6 +326,23 @@ def _init_weights(module: nn.Module, name: str, head_bias: float = 0.): module.init_weights() +def checkpoint_filter_fn(state_dict, model): + """ Remap checkpoints if needed """ + if 'patch_embed.proj.weight' in state_dict: + # Remap FB ResMlp models -> timm + out_dict = {} + for k, v in state_dict.items(): + k = k.replace('patch_embed.', 'stem.') + k = k.replace('attn.', 'linear_tokens.') + k = k.replace('mlp.', 'mlp_channels.') + k = k.replace('gamma_', 'ls') + if k.endswith('.alpha') or k.endswith('.beta'): + v = v.reshape(1, 1, -1) + out_dict[k] = v + return out_dict + return state_dict + + def _create_mixer(variant, pretrained=False, **kwargs): if kwargs.get('features_only', None): raise RuntimeError('features_only not implemented for MLP-Mixer models.') @@ -300,6 +350,7 @@ def _create_mixer(variant, pretrained=False, **kwargs): model = build_model_with_cfg( MlpMixer, variant, pretrained, default_cfg=default_cfgs[variant], + pretrained_filter_fn=checkpoint_filter_fn, **kwargs) return model @@ -458,11 +509,82 @@ def resmlp_36_224(pretrained=False, **kwargs): """ model_args = dict( patch_size=16, num_blocks=36, embed_dim=384, mlp_ratio=4, - block_layer=partial(ResBlock, init_values=1e-5), norm_layer=Affine, **kwargs) + block_layer=partial(ResBlock, init_values=1e-6), norm_layer=Affine, **kwargs) model = _create_mixer('resmlp_36_224', pretrained=pretrained, **model_args) return model +@register_model +def resmlp_big_24_224(pretrained=False, **kwargs): + """ ResMLP-B-24 + Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 + """ + model_args = dict( + patch_size=8, num_blocks=24, embed_dim=768, mlp_ratio=4, + block_layer=partial(ResBlock, init_values=1e-6), norm_layer=Affine, **kwargs) + model = _create_mixer('resmlp_big_24_224', pretrained=pretrained, **model_args) + return model + + +@register_model +def resmlp_12_distilled_224(pretrained=False, **kwargs): + """ ResMLP-12 + Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 + """ + model_args = dict( + patch_size=16, num_blocks=12, embed_dim=384, mlp_ratio=4, block_layer=ResBlock, norm_layer=Affine, **kwargs) + model = _create_mixer('resmlp_12_distilled_224', pretrained=pretrained, **model_args) + return model + + +@register_model +def resmlp_24_distilled_224(pretrained=False, **kwargs): + """ ResMLP-24 + Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 + """ + model_args = dict( + patch_size=16, num_blocks=24, embed_dim=384, mlp_ratio=4, + block_layer=partial(ResBlock, init_values=1e-5), norm_layer=Affine, **kwargs) + model = _create_mixer('resmlp_24_distilled_224', pretrained=pretrained, **model_args) + return model + + +@register_model +def resmlp_36_distilled_224(pretrained=False, **kwargs): + """ ResMLP-36 + Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 + """ + model_args = dict( + patch_size=16, num_blocks=36, embed_dim=384, mlp_ratio=4, + block_layer=partial(ResBlock, init_values=1e-6), norm_layer=Affine, **kwargs) + model = _create_mixer('resmlp_36_distilled_224', pretrained=pretrained, **model_args) + return model + + +@register_model +def resmlp_big_24_distilled_224(pretrained=False, **kwargs): + """ ResMLP-B-24 + Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 + """ + model_args = dict( + patch_size=8, num_blocks=24, embed_dim=768, mlp_ratio=4, + block_layer=partial(ResBlock, init_values=1e-6), norm_layer=Affine, **kwargs) + model = _create_mixer('resmlp_big_24_distilled_224', pretrained=pretrained, **model_args) + return model + + +@register_model +def resmlp_big_24_224_in22ft1k(pretrained=False, **kwargs): + """ ResMLP-B-24 + Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404 + """ + model_args = dict( + patch_size=8, num_blocks=24, embed_dim=768, mlp_ratio=4, + block_layer=partial(ResBlock, init_values=1e-6), norm_layer=Affine, **kwargs) + model = _create_mixer('resmlp_big_24_224_in22ft1k', pretrained=pretrained, **model_args) + return model + + @register_model def gmlp_ti16_224(pretrained=False, **kwargs): """ gMLP-Tiny From 1228f5a3d84afb7da614387be681b08a3dc8317f Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 18 Jun 2021 11:40:33 -0700 Subject: [PATCH 20/31] Add BiT distilled 50x1 and teacher 152x2 models from 'A good teacher is patient and consistent' paper. --- timm/models/resnetv2.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py index 84b16bb2..054b0af1 100644 --- a/timm/models/resnetv2.py +++ b/timm/models/resnetv2.py @@ -11,6 +11,7 @@ https://github.com/google-research/vision_transformer Thanks to the Google team for the above two repositories and associated papers: * Big Transfer (BiT): General Visual Representation Learning - https://arxiv.org/abs/1912.11370 * An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale - https://arxiv.org/abs/2010.11929 +* Knowledge distillation: A good teacher is patient and consistent - https://arxiv.org/abs/2106.05237 Original copyright of Google code below, modifications by Ross Wightman, Copyright 2020. """ @@ -86,6 +87,16 @@ default_cfgs = { url='https://storage.googleapis.com/bit_models/BiT-M-R152x4.npz', num_classes=21843), + 'resnetv2_50x1_bit_distilled': _cfg( + url='https://storage.googleapis.com/bit_models/distill/R50x1_224.npz', + input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic'), + 'resnetv2_152x2_bit_teacher': _cfg( + url='https://storage.googleapis.com/bit_models/distill/R152x2_T_224.npz', + input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic'), + 'resnetv2_152x2_bit_teacher_384': _cfg( + url='https://storage.googleapis.com/bit_models/distill/R152x2_T_384.npz', + input_size=(3, 384, 384), crop_pct=1.0, interpolation='bicubic'), + 'resnetv2_50': _cfg( input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic'), 'resnetv2_50d': _cfg( @@ -521,6 +532,33 @@ def resnetv2_152x4_bitm_in21k(pretrained=False, **kwargs): layers=[3, 8, 36, 3], width_factor=4, **kwargs) +@register_model +def resnetv2_50x1_bit_distilled(pretrained=False, **kwargs): + """ ResNetV2-50x1-BiT Distilled + Paper: Knowledge distillation: A good teacher is patient and consistent - https://arxiv.org/abs/2106.05237 + """ + return _create_resnetv2_bit( + 'resnetv2_50x1_bit_distilled', pretrained=pretrained, layers=[3, 4, 6, 3], width_factor=1, **kwargs) + + +@register_model +def resnetv2_152x2_bit_teacher(pretrained=False, **kwargs): + """ ResNetV2-152x2-BiT Teacher + Paper: Knowledge distillation: A good teacher is patient and consistent - https://arxiv.org/abs/2106.05237 + """ + return _create_resnetv2_bit( + 'resnetv2_152x2_bit_teacher', pretrained=pretrained, layers=[3, 8, 36, 3], width_factor=2, **kwargs) + + +@register_model +def resnetv2_152x2_bit_teacher_384(pretrained=False, **kwargs): + """ ResNetV2-152xx-BiT Teacher @ 384x384 + Paper: Knowledge distillation: A good teacher is patient and consistent - https://arxiv.org/abs/2106.05237 + """ + return _create_resnetv2_bit( + 'resnetv2_152x2_bit_teacher_384', pretrained=pretrained, layers=[3, 8, 36, 3], width_factor=2, **kwargs) + + @register_model def resnetv2_50(pretrained=False, **kwargs): return _create_resnetv2( From 8257b86550b8453b658e386498d4e643d6bf8d38 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 18 Jun 2021 16:16:06 -0700 Subject: [PATCH 21/31] Fix up resnetv2 bit/bitm model default res --- timm/models/resnetv2.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py index 054b0af1..a3c89532 100644 --- a/timm/models/resnetv2.py +++ b/timm/models/resnetv2.py @@ -44,8 +44,8 @@ from .layers import GroupNormAct, ClassifierHead, DropPath, AvgPool2dSame, creat def _cfg(url='', **kwargs): return { 'url': url, - 'num_classes': 1000, 'input_size': (3, 480, 480), 'pool_size': (7, 7), - 'crop_pct': 1.0, 'interpolation': 'bilinear', + 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7), + 'crop_pct': 0.875, 'interpolation': 'bilinear', 'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD, 'first_conv': 'stem.conv', 'classifier': 'head.fc', **kwargs @@ -55,17 +55,23 @@ def _cfg(url='', **kwargs): default_cfgs = { # pretrained on imagenet21k, finetuned on imagenet1k 'resnetv2_50x1_bitm': _cfg( - url='https://storage.googleapis.com/bit_models/BiT-M-R50x1-ILSVRC2012.npz'), + url='https://storage.googleapis.com/bit_models/BiT-M-R50x1-ILSVRC2012.npz', + input_size=(3, 448, 448), pool_size=(14, 14), crop_pct=1.0), 'resnetv2_50x3_bitm': _cfg( - url='https://storage.googleapis.com/bit_models/BiT-M-R50x3-ILSVRC2012.npz'), + url='https://storage.googleapis.com/bit_models/BiT-M-R50x3-ILSVRC2012.npz', + input_size=(3, 448, 448), pool_size=(14, 14), crop_pct=1.0), 'resnetv2_101x1_bitm': _cfg( - url='https://storage.googleapis.com/bit_models/BiT-M-R101x1-ILSVRC2012.npz'), + url='https://storage.googleapis.com/bit_models/BiT-M-R101x1-ILSVRC2012.npz', + input_size=(3, 448, 448), pool_size=(14, 14), crop_pct=1.0), 'resnetv2_101x3_bitm': _cfg( - url='https://storage.googleapis.com/bit_models/BiT-M-R101x3-ILSVRC2012.npz'), + url='https://storage.googleapis.com/bit_models/BiT-M-R101x3-ILSVRC2012.npz', + input_size=(3, 448, 448), pool_size=(14, 14), crop_pct=1.0), 'resnetv2_152x2_bitm': _cfg( - url='https://storage.googleapis.com/bit_models/BiT-M-R152x2-ILSVRC2012.npz'), + url='https://storage.googleapis.com/bit_models/BiT-M-R152x2-ILSVRC2012.npz', + input_size=(3, 448, 448), pool_size=(14, 14), crop_pct=1.0), 'resnetv2_152x4_bitm': _cfg( - url='https://storage.googleapis.com/bit_models/BiT-M-R152x4-ILSVRC2012.npz'), + url='https://storage.googleapis.com/bit_models/BiT-M-R152x4-ILSVRC2012.npz', + input_size=(3, 480, 480), pool_size=(15, 15), crop_pct=1.0), # only one at 480x480? # trained on imagenet-21k 'resnetv2_50x1_bitm_in21k': _cfg( @@ -89,18 +95,18 @@ default_cfgs = { 'resnetv2_50x1_bit_distilled': _cfg( url='https://storage.googleapis.com/bit_models/distill/R50x1_224.npz', - input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic'), + interpolation='bicubic'), 'resnetv2_152x2_bit_teacher': _cfg( url='https://storage.googleapis.com/bit_models/distill/R152x2_T_224.npz', - input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic'), + interpolation='bicubic'), 'resnetv2_152x2_bit_teacher_384': _cfg( url='https://storage.googleapis.com/bit_models/distill/R152x2_T_384.npz', - input_size=(3, 384, 384), crop_pct=1.0, interpolation='bicubic'), + input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, interpolation='bicubic'), 'resnetv2_50': _cfg( - input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic'), + interpolation='bicubic'), 'resnetv2_50d': _cfg( - input_size=(3, 224, 224), crop_pct=0.875, interpolation='bicubic', first_conv='stem.conv1'), + interpolation='bicubic', first_conv='stem.conv1'), } From b319eb5b5d8d29d109a1ca33bd0de0a1ac0d329c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 18 Jun 2021 16:16:49 -0700 Subject: [PATCH 22/31] Update ViT weights, more details to be added before merge. --- timm/models/vision_transformer.py | 264 ++++++++++++++--------- timm/models/vision_transformer_hybrid.py | 128 +++++------ 2 files changed, 211 insertions(+), 181 deletions(-) diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index 7dd9137e..b8fc6fa5 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -27,7 +27,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from .helpers import build_model_with_cfg, named_apply, adapt_input_conv from .layers import PatchEmbed, Mlp, DropPath, trunc_normal_, lecun_normal_ from .registry import register_model @@ -40,106 +40,116 @@ def _cfg(url='', **kwargs): 'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None, 'crop_pct': .9, 'interpolation': 'bicubic', 'fixed_input_size': True, - 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, + 'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD, 'first_conv': 'patch_embed.proj', 'classifier': 'head', **kwargs } default_cfgs = { - # FIXME weights coming + # patch models (weights from official Google JAX impl) 'vit_tiny_patch16_224': _cfg( - url='', - mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), - ), + url='https://storage.googleapis.com/vit_models/augreg/' + 'Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'), 'vit_tiny_patch16_384': _cfg( - url='', - input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0 - ), - 'vit_small_patch16_224': _cfg( - url='', - mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), - ), + url='https://storage.googleapis.com/vit_models/augreg/' + 'Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz', + input_size=(3, 384, 384), crop_pct=1.0), 'vit_small_patch32_224': _cfg( - url='', - mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), - ), - 'vit_small_patch16_384': _cfg( - url='', - input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0 - ), + url='https://storage.googleapis.com/vit_models/augreg/' + 'S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'), 'vit_small_patch32_384': _cfg( - url='', - input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0 - ), - - # patch models (weights ported from official Google JAX impl) - 'vit_base_patch16_224': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', - mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), - ), + url='https://storage.googleapis.com/vit_models/augreg/' + 'S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz', + input_size=(3, 384, 384), crop_pct=1.0), + 'vit_small_patch16_224': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'), + 'vit_small_patch16_384': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz', + input_size=(3, 384, 384), crop_pct=1.0), 'vit_base_patch32_224': _cfg( - url='', # no official model weights for this combo, only for in21k - mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), - 'vit_base_patch16_384': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_384-83fb41ba.pth', - input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0), + url='https://storage.googleapis.com/vit_models/augreg/' + 'B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'), 'vit_base_patch32_384': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p32_384-830016f5.pth', - input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0), - 'vit_large_patch16_224': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth', - mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), + url='https://storage.googleapis.com/vit_models/augreg/' + 'B_32-i21k-300ep-lr_0.001-aug_light1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz', + input_size=(3, 384, 384), crop_pct=1.0), + 'vit_base_patch16_224': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz'), + 'vit_base_patch16_384': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz', + input_size=(3, 384, 384), crop_pct=1.0), 'vit_large_patch32_224': _cfg( url='', # no official model weights for this combo, only for in21k - mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), - 'vit_large_patch16_384': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_384-b3be5167.pth', - input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0), + ), 'vit_large_patch32_384': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p32_384-9b920ba8.pth', - input_size=(3, 384, 384), mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=1.0), + input_size=(3, 384, 384), crop_pct=1.0), + 'vit_large_patch16_224': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz'), + 'vit_large_patch16_384': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', + input_size=(3, 384, 384), crop_pct=1.0), - # patch models, imagenet21k (weights ported from official Google JAX impl) - 'vit_base_patch16_224_in21k': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth', - num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), + # patch models, imagenet21k (weights from official Google JAX impl) + 'vit_tiny_patch16_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npz', + num_classes=21843), + 'vit_small_patch32_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npz', + num_classes=21843), + 'vit_small_patch16_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npz', + num_classes=21843), 'vit_base_patch32_224_in21k': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth', - num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), - 'vit_large_patch16_224_in21k': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth', - num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), + url='https://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0.npz', + num_classes=21843), + 'vit_base_patch16_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz', + num_classes=21843), 'vit_large_patch32_224_in21k': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth', - num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), + num_classes=21843), + 'vit_large_patch16_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1.npz', + num_classes=21843), 'vit_huge_patch14_224_in21k': _cfg( url='https://storage.googleapis.com/vit_models/imagenet21k/ViT-H_14.npz', hf_hub='timm/vit_huge_patch14_224_in21k', - num_classes=21843, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), + num_classes=21843), # deit models (FB weights) 'deit_tiny_patch16_224': _cfg( - url='https://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth'), + url='https://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), 'deit_small_patch16_224': _cfg( - url='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth'), + url='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), 'deit_base_patch16_224': _cfg( - url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth',), + url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), 'deit_base_patch16_384': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_384-8de9b5d1.pth', - input_size=(3, 384, 384), crop_pct=1.0), + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, input_size=(3, 384, 384), crop_pct=1.0), 'deit_tiny_distilled_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_tiny_distilled_patch16_224-b40b3cf7.pth', - classifier=('head', 'head_dist')), + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, classifier=('head', 'head_dist')), 'deit_small_distilled_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_small_distilled_patch16_224-649709d9.pth', - classifier=('head', 'head_dist')), + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, classifier=('head', 'head_dist')), 'deit_base_distilled_patch16_224': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_224-df68dfff.pth', - classifier=('head', 'head_dist')), + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, classifier=('head', 'head_dist')), 'deit_base_distilled_patch16_384': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_384-d0272ac0.pth', - input_size=(3, 384, 384), crop_pct=1.0, classifier=('head', 'head_dist')), + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, input_size=(3, 384, 384), crop_pct=1.0, + classifier=('head', 'head_dist')), # ViT ImageNet-21K-P pretraining by MILL 'vit_base_patch16_224_miil_in21k': _cfg( @@ -530,12 +540,11 @@ def vit_tiny_patch16_224(pretrained=False, **kwargs): @register_model -def vit_small_patch16_224(pretrained=False, **kwargs): - """ ViT-Small (ViT-S/16) - NOTE I've replaced my previous 'small' model definition and weights with the small variant from the DeiT paper +def vit_tiny_patch16_384(pretrained=False, **kwargs): + """ ViT-Tiny (Vit-Ti/16) @ 384x384. """ - model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) - model = _create_vision_transformer('vit_small_patch16_224', pretrained=pretrained, **model_kwargs) + model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs) + model = _create_vision_transformer('vit_tiny_patch16_384', pretrained=pretrained, **model_kwargs) return model @@ -543,28 +552,37 @@ def vit_small_patch16_224(pretrained=False, **kwargs): def vit_small_patch32_224(pretrained=False, **kwargs): """ ViT-Small (ViT-S/32) """ - model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) + model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs) model = _create_vision_transformer('vit_small_patch32_224', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_small_patch16_384(pretrained=False, **kwargs): +def vit_small_patch32_384(pretrained=False, **kwargs): + """ ViT-Small (ViT-S/32) at 384x384. + """ + model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer('vit_small_patch32_384', pretrained=pretrained, **model_kwargs) + return model + + +@register_model +def vit_small_patch16_224(pretrained=False, **kwargs): """ ViT-Small (ViT-S/16) NOTE I've replaced my previous 'small' model definition and weights with the small variant from the DeiT paper """ model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) - model = _create_vision_transformer('vit_small_patch16_384', pretrained=pretrained, **model_kwargs) + model = _create_vision_transformer('vit_small_patch16_224', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_base_patch16_224(pretrained=False, **kwargs): - """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). - ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer. +def vit_small_patch16_384(pretrained=False, **kwargs): + """ ViT-Small (ViT-S/16) + NOTE I've replaced my previous 'small' model definition and weights with the small variant from the DeiT paper """ - model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) - model = _create_vision_transformer('vit_base_patch16_224', pretrained=pretrained, **model_kwargs) + model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer('vit_small_patch16_384', pretrained=pretrained, **model_kwargs) return model @@ -577,6 +595,26 @@ def vit_base_patch32_224(pretrained=False, **kwargs): return model +@register_model +def vit_base_patch32_384(pretrained=False, **kwargs): + """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). + ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer. + """ + model_kwargs = dict(patch_size=32, embed_dim=768, depth=12, num_heads=12, **kwargs) + model = _create_vision_transformer('vit_base_patch32_384', pretrained=pretrained, **model_kwargs) + return model + + +@register_model +def vit_base_patch16_224(pretrained=False, **kwargs): + """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). + ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer. + """ + model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) + model = _create_vision_transformer('vit_base_patch16_224', pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_base_patch16_384(pretrained=False, **kwargs): """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). @@ -588,31 +626,31 @@ def vit_base_patch16_384(pretrained=False, **kwargs): @register_model -def vit_base_patch32_384(pretrained=False, **kwargs): - """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). - ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer. +def vit_large_patch32_224(pretrained=False, **kwargs): + """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights. """ - model_kwargs = dict(patch_size=32, embed_dim=768, depth=12, num_heads=12, **kwargs) - model = _create_vision_transformer('vit_base_patch32_384', pretrained=pretrained, **model_kwargs) + model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16, **kwargs) + model = _create_vision_transformer('vit_large_patch32_224', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_large_patch16_224(pretrained=False, **kwargs): +def vit_large_patch32_384(pretrained=False, **kwargs): """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). - ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer. + ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer. """ - model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16, **kwargs) - model = _create_vision_transformer('vit_large_patch16_224', pretrained=pretrained, **model_kwargs) + model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16, **kwargs) + model = _create_vision_transformer('vit_large_patch32_384', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_large_patch32_224(pretrained=False, **kwargs): - """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights. +def vit_large_patch16_224(pretrained=False, **kwargs): + """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). + ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer. """ - model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16, **kwargs) - model = _create_vision_transformer('vit_large_patch32_224', pretrained=pretrained, **model_kwargs) + model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16, **kwargs) + model = _create_vision_transformer('vit_large_patch16_224', pretrained=pretrained, **model_kwargs) return model @@ -627,23 +665,32 @@ def vit_large_patch16_384(pretrained=False, **kwargs): @register_model -def vit_large_patch32_384(pretrained=False, **kwargs): - """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). - ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer. +def vit_tiny_patch16_224_in21k(pretrained=False, **kwargs): + """ ViT-Tiny (Vit-Ti/16). + ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ - model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16, **kwargs) - model = _create_vision_transformer('vit_large_patch32_384', pretrained=pretrained, **model_kwargs) + model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs) + model = _create_vision_transformer('vit_tiny_patch16_224_in21k', pretrained=pretrained, **model_kwargs) return model @register_model -def vit_base_patch16_224_in21k(pretrained=False, **kwargs): - """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). +def vit_small_patch32_224_in21k(pretrained=False, **kwargs): + """ ViT-Small (ViT-S/16) ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ - model_kwargs = dict( - patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=768, **kwargs) - model = _create_vision_transformer('vit_base_patch16_224_in21k', pretrained=pretrained, **model_kwargs) + model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer('vit_small_patch32_224_in21k', pretrained=pretrained, **model_kwargs) + return model + + +@register_model +def vit_small_patch16_224_in21k(pretrained=False, **kwargs): + """ ViT-Small (ViT-S/16) + ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + """ + model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) + model = _create_vision_transformer('vit_small_patch16_224_in21k', pretrained=pretrained, **model_kwargs) return model @@ -659,13 +706,13 @@ def vit_base_patch32_224_in21k(pretrained=False, **kwargs): @register_model -def vit_large_patch16_224_in21k(pretrained=False, **kwargs): - """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). +def vit_base_patch16_224_in21k(pretrained=False, **kwargs): + """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ model_kwargs = dict( - patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=1024, **kwargs) - model = _create_vision_transformer('vit_large_patch16_224_in21k', pretrained=pretrained, **model_kwargs) + patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=768, **kwargs) + model = _create_vision_transformer('vit_base_patch16_224_in21k', pretrained=pretrained, **model_kwargs) return model @@ -680,6 +727,17 @@ def vit_large_patch32_224_in21k(pretrained=False, **kwargs): return model +@register_model +def vit_large_patch16_224_in21k(pretrained=False, **kwargs): + """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). + ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + """ + model_kwargs = dict( + patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=1024, **kwargs) + model = _create_vision_transformer('vit_large_patch16_224_in21k', pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_huge_patch14_224_in21k(pretrained=False, **kwargs): """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929). diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index 1bfe6685..a53419a0 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -35,34 +35,51 @@ def _cfg(url='', **kwargs): default_cfgs = { - # hybrid in-1k models (weights ported from official JAX impl where they exist) - 'vit_tiny_r_s16_p8_224': _cfg(first_conv='patch_embed.backbone.conv'), + # hybrid in-1k models (weights from official JAX impl where they exist) + 'vit_tiny_r_s16_p8_224': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'R_Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz', + first_conv='patch_embed.backbone.conv'), 'vit_tiny_r_s16_p8_384': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'R_Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz', first_conv='patch_embed.backbone.conv', input_size=(3, 384, 384), crop_pct=1.0), - 'vit_small_r_s16_p8_224': _cfg(first_conv='patch_embed.backbone.conv'), - 'vit_small_r20_s16_p2_224': _cfg(), - 'vit_small_r20_s16_224': _cfg(), - 'vit_small_r26_s32_224': _cfg(), + 'vit_small_r26_s32_224': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'R26_S_32-i21k-300ep-lr_0.001-aug_light0-wd_0.03-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.03-res_224.npz', + ), 'vit_small_r26_s32_384': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'R26_S_32-i21k-300ep-lr_0.001-aug_medium2-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz', input_size=(3, 384, 384), crop_pct=1.0), - 'vit_base_r20_s16_224': _cfg(), 'vit_base_r26_s32_224': _cfg(), 'vit_base_r50_s16_224': _cfg(), 'vit_base_r50_s16_384': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_resnet50_384-9fd3c705.pth', input_size=(3, 384, 384), crop_pct=1.0), - 'vit_large_r50_s32_224': _cfg(), - 'vit_large_r50_s32_384': _cfg(), - - # hybrid in-21k models (weights ported from official Google JAX impl where they exist) + 'vit_large_r50_s32_224': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'R50_L_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz' + ), + 'vit_large_r50_s32_384': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/' + 'R50_L_32-i21k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz', + input_size=(3, 384, 384), crop_pct=1.0 + ), + + # hybrid in-21k models (weights from official Google JAX impl where they exist) + 'vit_tiny_r_s16_p8_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/Ti_16-i1k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz', + num_classes=21843, crop_pct=0.9, first_conv='patch_embed.backbone.conv'), 'vit_small_r26_s32_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/R26_S_32-i21k-300ep-lr_0.001-aug_medium2-wd_0.03-do_0.0-sd_0.0.npz', num_classes=21843, crop_pct=0.9), - 'vit_small_r26_s32_384_in21k': _cfg( - num_classes=21843, input_size=(3, 384, 384), crop_pct=1.0), 'vit_base_r50_s16_224_in21k': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_resnet50_224_in21k-6f7c7740.pth', num_classes=21843, crop_pct=0.9), - 'vit_large_r50_s32_224_in21k': _cfg(num_classes=21843, crop_pct=0.9), + 'vit_large_r50_s32_224_in21k': _cfg( + url='https://storage.googleapis.com/vit_models/augreg/R50_L_32-i21k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0.npz', + num_classes=21843, crop_pct=0.9), # hybrid models (using timm resnet backbones) 'vit_small_resnet26d_224': _cfg( @@ -163,51 +180,6 @@ def vit_tiny_r_s16_p8_384(pretrained=False, **kwargs): return model -@register_model -def vit_tiny_r_s16_p8_384(pretrained=False, **kwargs): - """ R+ViT-Ti/S16 w/ 8x8 patch hybrid @ 384 x 384. - """ - backbone = _resnetv2(layers=(), **kwargs) - model_kwargs = dict(patch_size=8, embed_dim=192, depth=12, num_heads=3, **kwargs) - model = _create_vision_transformer_hybrid( - 'vit_tiny_r_s16_p8_384', backbone=backbone, pretrained=pretrained, **model_kwargs) - return model - - -@register_model -def vit_small_r_s16_p8_224(pretrained=False, **kwargs): - """ R+ViT-S/S16 w/ 8x8 patch hybrid @ 224 x 224. - """ - backbone = _resnetv2(layers=(), **kwargs) - model_kwargs = dict(patch_size=8, embed_dim=384, depth=12, num_heads=6, **kwargs) - model = _create_vision_transformer_hybrid( - 'vit_small_r_s16_p8_224', backbone=backbone, pretrained=pretrained, **model_kwargs) - - return model - - -@register_model -def vit_small_r20_s16_p2_224(pretrained=False, **kwargs): - """ R52+ViT-S/S16 w/ 2x2 patch hybrid @ 224 x 224. - """ - backbone = _resnetv2((2, 4), **kwargs) - model_kwargs = dict(patch_size=2, embed_dim=384, depth=12, num_heads=6, **kwargs) - model = _create_vision_transformer_hybrid( - 'vit_small_r20_s16_p2_224', backbone=backbone, pretrained=pretrained, **model_kwargs) - return model - - -@register_model -def vit_small_r20_s16_224(pretrained=False, **kwargs): - """ R20+ViT-S/S16 hybrid. - """ - backbone = _resnetv2((2, 2, 2), **kwargs) - model_kwargs = dict(embed_dim=384, depth=12, num_heads=6, **kwargs) - model = _create_vision_transformer_hybrid( - 'vit_small_r20_s16_224', backbone=backbone, pretrained=pretrained, **model_kwargs) - return model - - @register_model def vit_small_r26_s32_224(pretrained=False, **kwargs): """ R26+ViT-S/S32 hybrid. @@ -230,17 +202,6 @@ def vit_small_r26_s32_384(pretrained=False, **kwargs): return model -@register_model -def vit_base_r20_s16_224(pretrained=False, **kwargs): - """ R20+ViT-B/S16 hybrid. - """ - backbone = _resnetv2((2, 2, 2), **kwargs) - model_kwargs = dict(embed_dim=768, depth=12, num_heads=12, **kwargs) - model = _create_vision_transformer_hybrid( - 'vit_base_r20_s16_224', backbone=backbone, pretrained=pretrained, **model_kwargs) - return model - - @register_model def vit_base_r26_s32_224(pretrained=False, **kwargs): """ R26+ViT-B/S32 hybrid. @@ -298,24 +259,24 @@ def vit_large_r50_s32_384(pretrained=False, **kwargs): @register_model -def vit_small_r26_s32_224_in21k(pretrained=False, **kwargs): - """ R26+ViT-S/S32 hybrid. +def vit_tiny_r_s16_p8_224_in21k(pretrained=False, **kwargs): + """ R+ViT-Ti/S16 w/ 8x8 patch hybrid. ImageNet-21k. """ - backbone = _resnetv2((2, 2, 2, 2), **kwargs) - model_kwargs = dict(embed_dim=384, depth=12, num_heads=6, **kwargs) + backbone = _resnetv2(layers=(), **kwargs) + model_kwargs = dict(patch_size=8, embed_dim=192, depth=12, num_heads=3, **kwargs) model = _create_vision_transformer_hybrid( - 'vit_small_r26_s32_224_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) + 'vit_tiny_r_s16_p8_224_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) return model @register_model -def vit_small_r26_s32_384_in21k(pretrained=False, **kwargs): - """ R26+ViT-S/S32 hybrid. +def vit_small_r26_s32_224_in21k(pretrained=False, **kwargs): + """ R26+ViT-S/S32 hybrid. ImageNet-21k. """ backbone = _resnetv2((2, 2, 2, 2), **kwargs) model_kwargs = dict(embed_dim=384, depth=12, num_heads=6, **kwargs) model = _create_vision_transformer_hybrid( - 'vit_small_r26_s32_384_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) + 'vit_small_r26_s32_224_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) return model @@ -331,6 +292,17 @@ def vit_base_r50_s16_224_in21k(pretrained=False, **kwargs): return model +@register_model +def vit_large_r50_s32_224_in21k(pretrained=False, **kwargs): + """ R50+ViT-L/S32 hybrid. ImageNet-21k. + """ + backbone = _resnetv2((3, 4, 6, 3), **kwargs) + model_kwargs = dict(embed_dim=1024, depth=24, num_heads=16, **kwargs) + model = _create_vision_transformer_hybrid( + 'vit_large_r50_s32_224_in21k', backbone=backbone, pretrained=pretrained, **model_kwargs) + return model + + @register_model def vit_small_resnet26d_224(pretrained=False, **kwargs): """ Custom ViT small hybrid w/ ResNet26D stride 32. No pretrained weights. From 4c09a2f169587bb2b2ca35fb23e432a66038d8d8 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 18 Jun 2021 16:17:34 -0700 Subject: [PATCH 23/31] Bump version 0.4.12 --- timm/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timm/version.py b/timm/version.py index d4f33464..94c48197 100644 --- a/timm/version.py +++ b/timm/version.py @@ -1 +1 @@ -__version__ = '0.4.11' +__version__ = '0.4.12' From 8f4a0222edae291c9fbb3636f23fe4299b7d523f Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 18 Jun 2021 16:49:28 -0700 Subject: [PATCH 24/31] Add GMixer-24 MLP model weights, trained w/ TPU + PyTorch XLA --- timm/models/mlp_mixer.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index db3a1be5..7a87eb36 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -93,7 +93,9 @@ default_cfgs = dict( ), gmixer_12_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), - gmixer_24_224=_cfg(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), + gmixer_24_224=_cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gmixer_24_224_raa-7daf7ae6.pth', + mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), resmlp_12_224=_cfg( url='https://dl.fbaipublicfiles.com/deit/resmlp_12_no_dist.pth', @@ -457,11 +459,11 @@ def mixer_b16_224_miil_in21k(pretrained=False, **kwargs): @register_model def gmixer_12_224(pretrained=False, **kwargs): - """ Glu-Mixer-12 224x224 (short & fat) + """ Glu-Mixer-12 224x224 Experiment by Ross Wightman, adding (Si)GLU to MLP-Mixer """ model_args = dict( - patch_size=16, num_blocks=12, embed_dim=512, mlp_ratio=(1.0, 6.0), + patch_size=16, num_blocks=12, embed_dim=384, mlp_ratio=(1.0, 4.0), mlp_layer=GluMlp, act_layer=nn.SiLU, **kwargs) model = _create_mixer('gmixer_12_224', pretrained=pretrained, **model_args) return model @@ -469,11 +471,11 @@ def gmixer_12_224(pretrained=False, **kwargs): @register_model def gmixer_24_224(pretrained=False, **kwargs): - """ Glu-Mixer-24 224x224 (tall & slim) + """ Glu-Mixer-24 224x224 Experiment by Ross Wightman, adding (Si)GLU to MLP-Mixer """ model_args = dict( - patch_size=16, num_blocks=24, embed_dim=384, mlp_ratio=(1.0, 6.0), + patch_size=16, num_blocks=24, embed_dim=384, mlp_ratio=(1.0, 4.0), mlp_layer=GluMlp, act_layer=nn.SiLU, **kwargs) model = _create_mixer('gmixer_24_224', pretrained=pretrained, **model_args) return model From 26f04a8e3ef7c581f4766cd34e71d94105c32064 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 19 Jun 2021 16:39:36 -0700 Subject: [PATCH 25/31] Fix a weight link --- timm/models/vision_transformer_hybrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index a53419a0..30330d39 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -69,7 +69,7 @@ default_cfgs = { # hybrid in-21k models (weights from official Google JAX impl where they exist) 'vit_tiny_r_s16_p8_224_in21k': _cfg( - url='https://storage.googleapis.com/vit_models/augreg/Ti_16-i1k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz', + url='https://storage.googleapis.com/vit_models/augreg/R_Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npz', num_classes=21843, crop_pct=0.9, first_conv='patch_embed.backbone.conv'), 'vit_small_r26_s32_224_in21k': _cfg( url='https://storage.googleapis.com/vit_models/augreg/R26_S_32-i21k-300ep-lr_0.001-aug_medium2-wd_0.03-do_0.0-sd_0.0.npz', From 381b2797858248619fe8007fa1c5f5a5d4ab3919 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sat, 19 Jun 2021 22:28:44 -0700 Subject: [PATCH 26/31] Add hybrid model fwds back --- tests/test_models.py | 2 +- timm/models/vision_transformer_hybrid.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_models.py b/tests/test_models.py index 52a8023a..0a770784 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -173,7 +173,7 @@ def test_model_default_cfgs_non_std(model_name, batch_size): state_dict = model.state_dict() cfg = model.default_cfg - input_size = _get_input_size(model_name=model_name, target=TARGET_FWD_SIZE) + input_size = _get_input_size(model=model) if max(input_size) > 320: # FIXME const pytest.skip("Fixed input size model > limit.") diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index 30330d39..5d725c58 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -236,6 +236,12 @@ def vit_base_r50_s16_384(pretrained=False, **kwargs): return model +@register_model +def vit_base_resnet50_384(pretrained=False, **kwargs): + # DEPRECATED this is forwarding to model def above for backwards compatibility + return vit_base_r50_s16_384(pretrained=pretrained, **kwargs) + + @register_model def vit_large_r50_s32_224(pretrained=False, **kwargs): """ R50+ViT-L/S32 hybrid. @@ -292,6 +298,12 @@ def vit_base_r50_s16_224_in21k(pretrained=False, **kwargs): return model +@register_model +def vit_base_resnet50_224_in21k(pretrained=False, **kwargs): + # DEPRECATED this is forwarding to model def above for backwards compatibility + return vit_base_r50_s16_224_in21k(pretrained=pretrained, **kwargs) + + @register_model def vit_large_r50_s32_224_in21k(pretrained=False, **kwargs): """ R50+ViT-L/S32 hybrid. ImageNet-21k. From 9c9755a80869f7b42eb63c5bf9477aae3056615e Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sun, 20 Jun 2021 17:46:06 -0700 Subject: [PATCH 27/31] AugReg release --- README.md | 19 +++++++++++++++++++ timm/models/vision_transformer.py | 11 ++++++++--- timm/models/vision_transformer_hybrid.py | 12 ++++++++---- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 704bc32c..76261ccc 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,25 @@ I'm fortunate to be able to dedicate significant time and money of my own suppor ## What's New +### June 20, 2021 +* Release Vision Transformer 'AugReg' weights from [How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers](https://arxiv.org/abs/2106.10270) + * .npz weight loading support added, can load any of the 50K+ weights from the [AugReg series](https://console.cloud.google.com/storage/browser/vit_models/augreg) + * See [example notebook](https://colab.research.google.com/github/google-research/vision_transformer/blob/master/vit_jax_augreg.ipynb) from official impl for navigating the augreg weights + * Replaced all default weights w/ best AugReg variant (if possible). All AugReg 21k classifiers work. + * Highlights: `vit_large_patch16_384` (87.1 top-1), `vit_large_r50_s32_384` (86.2 top-1), `vit_base_patch16_384` (86.0 top-1) + * `vit_deit_*` renamed to just `deit_*` + * Remove my old small model, replace with DeiT compatible small w/ AugReg weights +* Add 1st training of my `gmixer_24_224` MLP /w GLU, 78.1 top-1 w/ 25M params. +* Add weights from official ResMLP release (https://github.com/facebookresearch/deit) +* Add `eca_nfnet_l2` weights from my 'lightweight' series. 84.7 top-1 at 384x384. +* Add distilled BiT 50x1 student and 152x2 Teacher weights from [Knowledge distillation: A good teacher is patient and consistent](https://arxiv.org/abs/2106.05237) +* NFNets and ResNetV2-BiT models work w/ Pytorch XLA now + * weight standardization uses F.batch_norm instead of std_mean (std_mean wasn't lowered) + * eps values adjusted, will be slight differences but should be quite close +* Improve test coverage and classifier interface of non-conv (vision transformer and mlp) models +* Cleanup a few classifier / flatten details for models w/ conv classifiers or early global pool +* Please report any regressions, this PR touched quite a few models. + ### June 8, 2021 * Add first ResMLP weights, trained in PyTorch XLA on TPU-VM w/ my XLA branch. 24 block variant, 79.2 top-1. * Add ResNet51-Q model w/ pretrained weights at 82.36 top-1. diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index b8fc6fa5..89fba7de 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -1,7 +1,12 @@ """ Vision Transformer (ViT) in PyTorch -A PyTorch implement of Vision Transformers as described in -'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale' - https://arxiv.org/abs/2010.11929 +A PyTorch implement of Vision Transformers as described in: + +'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale' + - https://arxiv.org/abs/2010.11929 + +`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers` + - https://arxiv.org/abs/2106.TODO The official jax code is released and available at https://github.com/google-research/vision_transformer @@ -15,7 +20,7 @@ for some einops/einsum fun * Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT * Bert reference code checks against Huggingface Transformers and Tensorflow Bert -Hacked together by / Copyright 2020 Ross Wightman +Hacked together by / Copyright 2021 Ross Wightman """ import math import logging diff --git a/timm/models/vision_transformer_hybrid.py b/timm/models/vision_transformer_hybrid.py index 5d725c58..d5f0a537 100644 --- a/timm/models/vision_transformer_hybrid.py +++ b/timm/models/vision_transformer_hybrid.py @@ -1,13 +1,17 @@ """ Hybrid Vision Transformer (ViT) in PyTorch -A PyTorch implement of the Hybrid Vision Transformers as described in +A PyTorch implement of the Hybrid Vision Transformers as described in: + 'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale' - https://arxiv.org/abs/2010.11929 -NOTE This relies on code in vision_transformer.py. The hybrid model definitions were moved here to -keep file sizes sane. +`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers` + - https://arxiv.org/abs/2106.TODO + +NOTE These hybrid model definitions depend on code in vision_transformer.py. +They were moved here to keep file sizes sane. -Hacked together by / Copyright 2020 Ross Wightman +Hacked together by / Copyright 2021 Ross Wightman """ from copy import deepcopy from functools import partial From dc422820eca4e550a4057561e595fc8b36209137 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Sun, 20 Jun 2021 22:12:56 -0700 Subject: [PATCH 28/31] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 76261ccc..6b41d772 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ I'm fortunate to be able to dedicate significant time and money of my own suppor ### June 20, 2021 * Release Vision Transformer 'AugReg' weights from [How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers](https://arxiv.org/abs/2106.10270) * .npz weight loading support added, can load any of the 50K+ weights from the [AugReg series](https://console.cloud.google.com/storage/browser/vit_models/augreg) - * See [example notebook](https://colab.research.google.com/github/google-research/vision_transformer/blob/master/vit_jax_augreg.ipynb) from official impl for navigating the augreg weights + * See [example notebook](https://colab.research.google.com/github/google-research/vision_transformer/blob/master/vit_jax_augreg.ipynb) from [official impl](https://github.com/google-research/vision_transformer/) for navigating the augreg weights * Replaced all default weights w/ best AugReg variant (if possible). All AugReg 21k classifiers work. * Highlights: `vit_large_patch16_384` (87.1 top-1), `vit_large_r50_s32_384` (86.2 top-1), `vit_base_patch16_384` (86.0 top-1) * `vit_deit_*` renamed to just `deit_*` From b41cffaa93e8205bd8bd309f82c33c07c420eefd Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 22 Jun 2021 23:16:05 -0700 Subject: [PATCH 29/31] Fix a few issues loading pretrained vit/bit npz weights w/ num_classes=0 __init__ arg. Missed a few other small classifier handling detail on Mlp, GhostNet, Levit. Should fix #713 --- tests/test_models.py | 17 +++++++++++++++++ timm/models/ghostnet.py | 2 +- timm/models/levit.py | 2 +- timm/models/mlp_mixer.py | 2 +- timm/models/resnetv2.py | 3 ++- timm/models/visformer.py | 1 - timm/models/vision_transformer.py | 2 +- 7 files changed, 23 insertions(+), 6 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 0a770784..5c8b02db 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -147,6 +147,15 @@ def test_model_default_cfgs(model_name, batch_size): # FIXME mobilenetv3/ghostnet forward_features vs removed pooling differ assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2] + if 'pruned' not in model_name: # FIXME better pruned model handling + # test classifier + global pool deletion via __init__ + model = create_model(model_name, pretrained=False, num_classes=0, global_pool='').eval() + outputs = model.forward(input_tensor) + assert len(outputs.shape) == 4 + if not isinstance(model, timm.models.MobileNetV3) and not isinstance(model, timm.models.GhostNet): + # FIXME mobilenetv3/ghostnet forward_features vs removed pooling differ + assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2] + # check classifier name matches default_cfg classifier = cfg['classifier'] if not isinstance(classifier, (tuple, list)): @@ -193,6 +202,13 @@ def test_model_default_cfgs_non_std(model_name, batch_size): assert len(outputs.shape) == 2 assert outputs.shape[1] == model.num_features + model = create_model(model_name, pretrained=False, num_classes=0).eval() + outputs = model.forward(input_tensor) + if isinstance(outputs, tuple): + outputs = outputs[0] + assert len(outputs.shape) == 2 + assert outputs.shape[1] == model.num_features + # check classifier name matches default_cfg classifier = cfg['classifier'] if not isinstance(classifier, (tuple, list)): @@ -217,6 +233,7 @@ if 'GITHUB_ACTIONS' not in os.environ: """Create that pretrained weights load, verify support for in_chans != 3 while doing so.""" in_chans = 3 if 'pruned' in model_name else 1 # pruning not currently supported with in_chans change create_model(model_name, pretrained=True, in_chans=in_chans, num_classes=5) + create_model(model_name, pretrained=True, in_chans=in_chans, num_classes=0) @pytest.mark.timeout(120) @pytest.mark.parametrize('model_name', list_models(pretrained=True, exclude_filters=NON_STD_FILTERS)) diff --git a/timm/models/ghostnet.py b/timm/models/ghostnet.py index a73047c5..3b6f90a4 100644 --- a/timm/models/ghostnet.py +++ b/timm/models/ghostnet.py @@ -182,7 +182,7 @@ class GhostNet(nn.Module): self.conv_head = nn.Conv2d(prev_chs, out_chs, 1, 1, 0, bias=True) self.act2 = nn.ReLU(inplace=True) self.flatten = nn.Flatten(1) if global_pool else nn.Identity() # don't flatten if pooling disabled - self.classifier = Linear(out_chs, num_classes) + self.classifier = Linear(out_chs, num_classes) if num_classes > 0 else nn.Identity() def get_classifier(self): return self.classifier diff --git a/timm/models/levit.py b/timm/models/levit.py index fa35f41f..9987e4ba 100644 --- a/timm/models/levit.py +++ b/timm/models/levit.py @@ -542,7 +542,7 @@ def checkpoint_filter_fn(state_dict, model): state_dict = state_dict['model'] D = model.state_dict() for k in state_dict.keys(): - if D[k].ndim == 4 and state_dict[k].ndim == 2: + if k in D and D[k].ndim == 4 and state_dict[k].ndim == 2: state_dict[k] = state_dict[k][:, :, None, None] return state_dict diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 7a87eb36..c51e61e3 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -266,7 +266,7 @@ class MlpMixer(nn.Module): act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate) for _ in range(num_blocks)]) self.norm = norm_layer(embed_dim) - self.head = nn.Linear(embed_dim, self.num_classes) # zero init + self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() self.init_weights(nlhb=nlhb) diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py index a3c89532..8110fcca 100644 --- a/timm/models/resnetv2.py +++ b/timm/models/resnetv2.py @@ -424,7 +424,8 @@ def _load_weights(model: nn.Module, checkpoint_path: str, prefix: str = 'resnet/ model.stem.conv.weight.copy_(stem_conv_w) model.norm.weight.copy_(t2p(weights[f'{prefix}group_norm/gamma'])) model.norm.bias.copy_(t2p(weights[f'{prefix}group_norm/beta'])) - if model.head.fc.weight.shape[0] == weights[f'{prefix}head/conv2d/kernel'].shape[-1]: + if isinstance(model.head.fc, nn.Conv2d) and \ + model.head.fc.weight.shape[0] == weights[f'{prefix}head/conv2d/kernel'].shape[-1]: model.head.fc.weight.copy_(t2p(weights[f'{prefix}head/conv2d/kernel'])) model.head.fc.bias.copy_(t2p(weights[f'{prefix}head/conv2d/bias'])) for i, (sname, stage) in enumerate(model.stages.named_children()): diff --git a/timm/models/visformer.py b/timm/models/visformer.py index 16631027..7740f381 100644 --- a/timm/models/visformer.py +++ b/timm/models/visformer.py @@ -237,7 +237,6 @@ class Visformer(nn.Module): self.num_features = embed_dim if self.vit_stem else embed_dim * 2 self.norm = norm_layer(self.num_features) self.global_pool, self.head = create_classifier(self.num_features, self.num_classes, pool_type=global_pool) - self.head = nn.Linear(self.num_features, num_classes) # weights init if self.pos_embed: diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index 89fba7de..0a960987 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -448,7 +448,7 @@ def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = model.pos_embed.copy_(pos_embed_w) model.norm.weight.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/scale'])) model.norm.bias.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/bias'])) - if model.head.bias.shape[0] == w[f'{prefix}head/bias'].shape[-1]: + if isinstance(model.head, nn.Linear) and model.head.bias.shape[0] == w[f'{prefix}head/bias'].shape[-1]: model.head.weight.copy_(_n2p(w[f'{prefix}head/kernel'])) model.head.bias.copy_(_n2p(w[f'{prefix}head/bias'])) for i, block in enumerate(model.blocks.children()): From 85f894e03d63a7085f6144057e305a4a8e926d04 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 23 Jun 2021 10:38:34 -0700 Subject: [PATCH 30/31] Fix ViT in21k representation (pre_logits) layer handling across old and new npz checkpoints --- timm/models/resnetv2.py | 2 +- timm/models/vision_transformer.py | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py index 8110fcca..b96d7742 100644 --- a/timm/models/resnetv2.py +++ b/timm/models/resnetv2.py @@ -424,7 +424,7 @@ def _load_weights(model: nn.Module, checkpoint_path: str, prefix: str = 'resnet/ model.stem.conv.weight.copy_(stem_conv_w) model.norm.weight.copy_(t2p(weights[f'{prefix}group_norm/gamma'])) model.norm.bias.copy_(t2p(weights[f'{prefix}group_norm/beta'])) - if isinstance(model.head.fc, nn.Conv2d) and \ + if isinstance(getattr(model.head, 'fc', None), nn.Conv2d) and \ model.head.fc.weight.shape[0] == weights[f'{prefix}head/conv2d/kernel'].shape[-1]: model.head.fc.weight.copy_(t2p(weights[f'{prefix}head/conv2d/kernel'])) model.head.fc.bias.copy_(t2p(weights[f'{prefix}head/conv2d/bias'])) diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index 0a960987..9ec45868 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -6,7 +6,7 @@ A PyTorch implement of Vision Transformers as described in: - https://arxiv.org/abs/2010.11929 `How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers` - - https://arxiv.org/abs/2106.TODO + - https://arxiv.org/abs/2106.10270 The official jax code is released and available at https://github.com/google-research/vision_transformer @@ -451,6 +451,9 @@ def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = if isinstance(model.head, nn.Linear) and model.head.bias.shape[0] == w[f'{prefix}head/bias'].shape[-1]: model.head.weight.copy_(_n2p(w[f'{prefix}head/kernel'])) model.head.bias.copy_(_n2p(w[f'{prefix}head/bias'])) + if isinstance(getattr(model.pre_logits, 'fc', None), nn.Linear) and f'{prefix}pre_logits/bias' in w: + model.pre_logits.fc.weight.copy_(_n2p(w[f'{prefix}pre_logits/kernel'])) + model.pre_logits.fc.bias.copy_(_n2p(w[f'{prefix}pre_logits/bias'])) for i, block in enumerate(model.blocks.children()): block_prefix = f'{prefix}Transformer/encoderblock_{i}/' mha_prefix = block_prefix + 'MultiHeadDotProductAttention_1/' @@ -673,6 +676,7 @@ def vit_large_patch16_384(pretrained=False, **kwargs): def vit_tiny_patch16_224_in21k(pretrained=False, **kwargs): """ ViT-Tiny (Vit-Ti/16). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer """ model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs) model = _create_vision_transformer('vit_tiny_patch16_224_in21k', pretrained=pretrained, **model_kwargs) @@ -683,6 +687,7 @@ def vit_tiny_patch16_224_in21k(pretrained=False, **kwargs): def vit_small_patch32_224_in21k(pretrained=False, **kwargs): """ ViT-Small (ViT-S/16) ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer """ model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs) model = _create_vision_transformer('vit_small_patch32_224_in21k', pretrained=pretrained, **model_kwargs) @@ -693,6 +698,7 @@ def vit_small_patch32_224_in21k(pretrained=False, **kwargs): def vit_small_patch16_224_in21k(pretrained=False, **kwargs): """ ViT-Small (ViT-S/16) ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer """ model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) model = _create_vision_transformer('vit_small_patch16_224_in21k', pretrained=pretrained, **model_kwargs) @@ -703,9 +709,10 @@ def vit_small_patch16_224_in21k(pretrained=False, **kwargs): def vit_base_patch32_224_in21k(pretrained=False, **kwargs): """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer """ model_kwargs = dict( - patch_size=32, embed_dim=768, depth=12, num_heads=12, representation_size=768, **kwargs) + patch_size=32, embed_dim=768, depth=12, num_heads=12, **kwargs) model = _create_vision_transformer('vit_base_patch32_224_in21k', pretrained=pretrained, **model_kwargs) return model @@ -714,9 +721,10 @@ def vit_base_patch32_224_in21k(pretrained=False, **kwargs): def vit_base_patch16_224_in21k(pretrained=False, **kwargs): """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer """ model_kwargs = dict( - patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=768, **kwargs) + patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) model = _create_vision_transformer('vit_base_patch16_224_in21k', pretrained=pretrained, **model_kwargs) return model @@ -725,6 +733,7 @@ def vit_base_patch16_224_in21k(pretrained=False, **kwargs): def vit_large_patch32_224_in21k(pretrained=False, **kwargs): """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + NOTE: this model has a representation layer but the 21k classifier head is zero'd out in original weights """ model_kwargs = dict( patch_size=32, embed_dim=1024, depth=24, num_heads=16, representation_size=1024, **kwargs) @@ -736,9 +745,10 @@ def vit_large_patch32_224_in21k(pretrained=False, **kwargs): def vit_large_patch16_224_in21k(pretrained=False, **kwargs): """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. + NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer """ model_kwargs = dict( - patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=1024, **kwargs) + patch_size=16, embed_dim=1024, depth=24, num_heads=16, **kwargs) model = _create_vision_transformer('vit_large_patch16_224_in21k', pretrained=pretrained, **model_kwargs) return model @@ -747,7 +757,7 @@ def vit_large_patch16_224_in21k(pretrained=False, **kwargs): def vit_huge_patch14_224_in21k(pretrained=False, **kwargs): """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. - NOTE: converted weights not currently available, too large for github release hosting. + NOTE: this model has a representation layer but the 21k classifier head is zero'd out in original weights """ model_kwargs = dict( patch_size=14, embed_dim=1280, depth=32, num_heads=16, representation_size=1280, **kwargs) From 20a2be14c3c6c53a2ec9127ea8fc22ba47be5aae Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 23 Jun 2021 10:40:30 -0700 Subject: [PATCH 31/31] Add gMLP-S weights, 79.6 top-1 --- README.md | 3 +++ timm/models/mlp_mixer.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b41d772..07c71a76 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,9 @@ I'm fortunate to be able to dedicate significant time and money of my own suppor ## What's New +### June 23, 2021 +* Reproduce gMLP model training, `gmlp_s16_224` trained to 79.6 top-1, matching [paper](https://arxiv.org/abs/2105.08050). + ### June 20, 2021 * Release Vision Transformer 'AugReg' weights from [How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers](https://arxiv.org/abs/2106.10270) * .npz weight loading support added, can load any of the 50K+ weights from the [AugReg series](https://console.cloud.google.com/storage/browser/vit_models/augreg) diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index c51e61e3..f128b9c9 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -129,7 +129,9 @@ default_cfgs = dict( mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), gmlp_ti16_224=_cfg(), - gmlp_s16_224=_cfg(), + gmlp_s16_224=_cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gmlp_s16_224_raa-10536d42.pth', + ), gmlp_b16_224=_cfg(), )