diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py index 3ec1ab9b..1d5a7fc0 100644 --- a/timm/models/mobilenetv3.py +++ b/timm/models/mobilenetv3.py @@ -39,6 +39,12 @@ default_cfgs = { 'mobilenetv3_large_100': _cfg( interpolation='bicubic', url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_large_100_ra-f55367f5.pth'), + 'mobilenetv3_large_100_1k_miil_77_9': _cfg( + interpolation='bilinear', mean=(0, 0, 0), std=(1, 1, 1), + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mobilenetv3_large_100_1k_miil_77_9.pth'), + 'mobilenetv3_large_100_21k_miil': _cfg( + interpolation='bilinear', mean=(0, 0, 0), std=(1, 1, 1), + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mobilenetv3_large_100_21k_miil.pth', num_classes=11221), 'mobilenetv3_small_075': _cfg(url=''), 'mobilenetv3_small_100': _cfg(url=''), 'mobilenetv3_rw': _cfg( @@ -367,6 +373,20 @@ def mobilenetv3_large_100(pretrained=False, **kwargs): return model +@register_model +def mobilenetv3_large_100_1k_miil(pretrained=False, **kwargs): + """ MobileNet V3 """ + model = _gen_mobilenet_v3('mobilenetv3_large_100_1k_miil_77_9', 1.0, pretrained=pretrained, **kwargs) + return model + + +@register_model +def mobilenetv3_large_100_21k_miil(pretrained=False, **kwargs): + """ MobileNet V3 """ + model = _gen_mobilenet_v3('mobilenetv3_large_100_21k_miil', 1.0, pretrained=pretrained, **kwargs) + return model + + @register_model def mobilenetv3_small_075(pretrained=False, **kwargs): """ MobileNet V3 """ diff --git a/timm/models/tresnet.py b/timm/models/tresnet.py index a8c237ed..27cdc6cc 100644 --- a/timm/models/tresnet.py +++ b/timm/models/tresnet.py @@ -32,7 +32,9 @@ def _cfg(url='', **kwargs): default_cfgs = { 'tresnet_m': _cfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/tresnet_m_80_8-dbc13962.pth'), + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/tresnet_m_1k_miil_83_1.pth'), + 'tresnet_m_21k_miil': _cfg( + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/tresnet_m_miil_21k.pth', num_classes=11221), 'tresnet_l': _cfg( url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/tresnet_l_81_5-235b486c.pth'), 'tresnet_xl': _cfg( @@ -264,6 +266,10 @@ def tresnet_m(pretrained=False, **kwargs): model_kwargs = dict(layers=[3, 4, 11, 3], **kwargs) return _create_tresnet('tresnet_m', pretrained=pretrained, **model_kwargs) +@register_model +def tresnet_m_21k_miil(pretrained=False, **kwargs): + model_kwargs = dict(layers=[3, 4, 11, 3], **kwargs) + return _create_tresnet('tresnet_m_21k_miil', pretrained=pretrained, **model_kwargs) @register_model def tresnet_l(pretrained=False, **kwargs): diff --git a/timm/models/vision_transformer.py b/timm/models/vision_transformer.py index f57242a1..ae73d1d8 100644 --- a/timm/models/vision_transformer.py +++ b/timm/models/vision_transformer.py @@ -118,6 +118,17 @@ default_cfgs = { 'vit_deit_base_distilled_patch16_384': _cfg( url='https://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_384-d0272ac0.pth', input_size=(3, 384, 384), crop_pct=1.0, classifier=('head', 'head_dist')), + + # ViT ImageNet-21K-P pretraining + 'vit_base_patch16_224_21k_miil': _cfg( + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/vit_base_patch16_224_21k_miil.pth', + mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221, + ), + 'vit_base_patch16_224_1k_miil': _cfg( + url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm' + '/vit_base_patch16_224_1k_miil_84_4.pth', + mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', + ), } @@ -155,7 +166,7 @@ class Attention(nn.Module): def forward(self, x): B, N, C = x.shape qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) - q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) attn = (q @ k.transpose(-2, -1)) * self.scale attn = attn.softmax(dim=-1) @@ -652,7 +663,7 @@ def vit_deit_tiny_distilled_patch16_224(pretrained=False, **kwargs): """ model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs) model = _create_vision_transformer( - 'vit_deit_tiny_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) + 'vit_deit_tiny_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) return model @@ -663,7 +674,7 @@ def vit_deit_small_distilled_patch16_224(pretrained=False, **kwargs): """ model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs) model = _create_vision_transformer( - 'vit_deit_small_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) + 'vit_deit_small_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) return model @@ -674,7 +685,7 @@ def vit_deit_base_distilled_patch16_224(pretrained=False, **kwargs): """ model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs) model = _create_vision_transformer( - 'vit_deit_base_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) + 'vit_deit_base_distilled_patch16_224', pretrained=pretrained, distilled=True, **model_kwargs) return model @@ -687,3 +698,21 @@ def vit_deit_base_distilled_patch16_384(pretrained=False, **kwargs): model = _create_vision_transformer( 'vit_deit_base_distilled_patch16_384', pretrained=pretrained, distilled=True, **model_kwargs) return model + +@register_model +def vit_base_patch16_224_21k_miil(pretrained=False, **kwargs): + """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). + Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K + """ + model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, qkv_bias=False, **kwargs) + model = _create_vision_transformer('vit_base_patch16_224_21k_miil', pretrained=pretrained, **model_kwargs) + return model + +@register_model +def vit_base_patch16_224_1k_miil(pretrained=False, **kwargs): + """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). + Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K + """ + model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, qkv_bias=False, **kwargs) + model = _create_vision_transformer('vit_base_patch16_224_1k_miil_84_4', pretrained=pretrained, **model_kwargs) + return model \ No newline at end of file