@ -24,12 +24,12 @@ from functools import partial
import torch
import torch . nn as nn
import torch . nn . functional as F
from timm . data import IMAGENET_DEFAULT_MEAN , IMAGENET_DEFAULT_STD
from . helpers import build_model_with_cfg
from . registry import register_model
from . layers import ClassifierHead , DropPath , AvgPool2dSame , ScaledStdConv2d , get_act_layer , get_attn , make_divisible , get_act_fn
from . layers import ClassifierHead , DropPath , AvgPool2dSame , ScaledStdConv2d , ScaledStdConv2dSame , \
get_act_layer , get_act_fn , get_attn , make_divisible
def _dcfg ( url = ' ' , * * kwargs ) :
@ -38,75 +38,102 @@ def _dcfg(url='', **kwargs):
' num_classes ' : 1000 , ' input_size ' : ( 3 , 224 , 224 ) , ' pool_size ' : ( 7 , 7 ) ,
' crop_pct ' : 0.9 , ' interpolation ' : ' bicubic ' ,
' mean ' : IMAGENET_DEFAULT_MEAN , ' std ' : IMAGENET_DEFAULT_STD ,
' first_conv ' : ' stem.conv ' , ' classifier ' : ' head.fc ' ,
' first_conv ' : ' stem.conv 1 ' , ' classifier ' : ' head.fc ' ,
* * kwargs
}
default_cfgs = dict (
dm_nfnet_f0 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f0-604f9c3a.pth ' ,
pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) , crop_pct = .9 ) ,
dm_nfnet_f1 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f1-fc540f82.pth ' ,
pool_size = ( 7 , 7 ) , input_size = ( 3 , 224 , 224 ) , test_input_size = ( 3 , 320 , 320 ) , crop_pct = 0.91 ) ,
dm_nfnet_f2 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f2-89875923.pth ' ,
pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 352 , 352 ) , crop_pct = 0.92 ) ,
dm_nfnet_f3 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f3-d74ab3aa.pth ' ,
pool_size = ( 10 , 10 ) , input_size = ( 3 , 320 , 320 ) , test_input_size = ( 3 , 416 , 416 ) , crop_pct = 0.94 ) ,
dm_nfnet_f4 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f4-0ac5b10b.pth ' ,
pool_size = ( 12 , 12 ) , input_size = ( 3 , 384 , 384 ) , test_input_size = ( 3 , 512 , 512 ) , crop_pct = 0.951 ) ,
dm_nfnet_f5 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f5-ecb20ab1.pth ' ,
pool_size = ( 13 , 13 ) , input_size = ( 3 , 416 , 416 ) , test_input_size = ( 3 , 544 , 544 ) , crop_pct = 0.954 ) ,
dm_nfnet_f6 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f6-e0f12116.pth ' ,
pool_size = ( 14 , 14 ) , input_size = ( 3 , 448 , 448 ) , test_input_size = ( 3 , 576 , 576 ) , crop_pct = 0.956 ) ,
nfnet_f0 = _dcfg (
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) ),
nfnet_f1 = _dcfg (
url = ' ' , pool_size = ( 7 , 7 ) , input_size = ( 3 , 224 , 224 ) , test_input_size = ( 3 , 320 , 320 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 7 , 7 ) , input_size = ( 3 , 224 , 224 ) , test_input_size = ( 3 , 320 , 320 ) ),
nfnet_f2 = _dcfg (
url = ' ' , pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 352 , 352 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 352 , 352 ) ),
nfnet_f3 = _dcfg (
url = ' ' , pool_size = ( 10 , 10 ) , input_size = ( 3 , 320 , 320 ) , test_input_size = ( 3 , 416 , 416 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 10 , 10 ) , input_size = ( 3 , 320 , 320 ) , test_input_size = ( 3 , 416 , 416 ) ),
nfnet_f4 = _dcfg (
url = ' ' , pool_size = ( 12 , 12 ) , input_size = ( 3 , 384 , 384 ) , test_input_size = ( 3 , 512 , 512 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 12 , 12 ) , input_size = ( 3 , 384 , 384 ) , test_input_size = ( 3 , 512 , 512 ) ),
nfnet_f5 = _dcfg (
url = ' ' , pool_size = ( 13 , 13 ) , input_size = ( 3 , 416 , 416 ) , test_input_size = ( 3 , 544 , 544 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 13 , 13 ) , input_size = ( 3 , 416 , 416 ) , test_input_size = ( 3 , 544 , 544 ) ),
nfnet_f6 = _dcfg (
url = ' ' , pool_size = ( 14 , 14 ) , input_size = ( 3 , 448 , 448 ) , test_input_size = ( 3 , 576 , 576 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 14 , 14 ) , input_size = ( 3 , 448 , 448 ) , test_input_size = ( 3 , 576 , 576 ) ),
nfnet_f7 = _dcfg (
url = ' ' , pool_size = ( 15 , 15 ) , input_size = ( 3 , 480 , 480 ) , test_input_size = ( 3 , 608 , 608 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 15 , 15 ) , input_size = ( 3 , 480 , 480 ) , test_input_size = ( 3 , 608 , 608 ) ),
nfnet_f0s = _dcfg (
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) ),
nfnet_f1s = _dcfg (
url = ' ' , pool_size = ( 7 , 7 ) , input_size = ( 3 , 224 , 224 ) , test_input_size = ( 3 , 320 , 320 ) , first_conv = ' stem.conv1 ' ) ,
url = ' ' , pool_size = ( 7 , 7 ) , input_size = ( 3 , 224 , 224 ) , test_input_size = ( 3 , 320 , 320 ) ),
nfnet_f2s = _dcfg (
url = ' ' , pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 352 , 352 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 352 , 352 ) ),
nfnet_f3s = _dcfg (
url = ' ' , pool_size = ( 10 , 10 ) , input_size = ( 3 , 320 , 320 ) , test_input_size = ( 3 , 416 , 416 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 10 , 10 ) , input_size = ( 3 , 320 , 320 ) , test_input_size = ( 3 , 416 , 416 ) ),
nfnet_f4s = _dcfg (
url = ' ' , pool_size = ( 12 , 12 ) , input_size = ( 3 , 384 , 384 ) , test_input_size = ( 3 , 512 , 512 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 12 , 12 ) , input_size = ( 3 , 384 , 384 ) , test_input_size = ( 3 , 512 , 512 ) ),
nfnet_f5s = _dcfg (
url = ' ' , pool_size = ( 13 , 13 ) , input_size = ( 3 , 416 , 416 ) , test_input_size = ( 3 , 544 , 544 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 13 , 13 ) , input_size = ( 3 , 416 , 416 ) , test_input_size = ( 3 , 544 , 544 ) ),
nfnet_f6s = _dcfg (
url = ' ' , pool_size = ( 14 , 14 ) , input_size = ( 3 , 448 , 448 ) , test_input_size = ( 3 , 576 , 576 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 14 , 14 ) , input_size = ( 3 , 448 , 448 ) , test_input_size = ( 3 , 576 , 576 ) ),
nfnet_f7s = _dcfg (
url = ' ' , pool_size = ( 15 , 15 ) , input_size = ( 3 , 480 , 480 ) , test_input_size = ( 3 , 608 , 608 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 15 , 15 ) , input_size = ( 3 , 480 , 480 ) , test_input_size = ( 3 , 608 , 608 ) ),
nfnet_l0a = _dcfg (
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) ),
nfnet_l0b = _dcfg (
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) ),
nfnet_l0c = _dcfg (
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) , first_conv = ' stem.conv1 ' ),
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) ),
nf_regnet_b0 = _dcfg ( url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) ) ,
nf_regnet_b0 = _dcfg (
url = ' ' , pool_size = ( 6 , 6 ) , input_size = ( 3 , 192 , 192 ) , test_input_size = ( 3 , 256 , 256 ) , first_conv = ' stem.conv ' ) ,
nf_regnet_b1 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/nf_regnet_b1_256_ra2-ad85cfef.pth ' ,
pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 288 , 288 ) ) , # NOT to paper spec
nf_regnet_b2 = _dcfg ( url = ' ' , pool_size = ( 8 , 8 ) , input_size = ( 3 , 240 , 240 ) , test_input_size = ( 3 , 272 , 272 ) ) ,
nf_regnet_b3 = _dcfg ( url = ' ' , pool_size = ( 9 , 9 ) , input_size = ( 3 , 288 , 288 ) , test_input_size = ( 3 , 320 , 320 ) ) ,
nf_regnet_b4 = _dcfg ( url = ' ' , pool_size = ( 10 , 10 ) , input_size = ( 3 , 320 , 320 ) , test_input_size = ( 3 , 384 , 384 ) ) ,
nf_regnet_b5 = _dcfg ( url = ' ' , pool_size = ( 12 , 12 ) , input_size = ( 3 , 384 , 384 ) , test_input_size = ( 3 , 456 , 456 ) ) ,
nf_resnet26 = _dcfg ( url = ' ' ) ,
pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 288 , 288 ) , first_conv = ' stem.conv ' ) , # NOT to paper spec
nf_regnet_b2 = _dcfg (
url = ' ' , pool_size = ( 8 , 8 ) , input_size = ( 3 , 240 , 240 ) , test_input_size = ( 3 , 272 , 272 ) , first_conv = ' stem.conv ' ) ,
nf_regnet_b3 = _dcfg (
url = ' ' , pool_size = ( 9 , 9 ) , input_size = ( 3 , 288 , 288 ) , test_input_size = ( 3 , 320 , 320 ) , first_conv = ' stem.conv ' ) ,
nf_regnet_b4 = _dcfg (
url = ' ' , pool_size = ( 10 , 10 ) , input_size = ( 3 , 320 , 320 ) , test_input_size = ( 3 , 384 , 384 ) , first_conv = ' stem.conv ' ) ,
nf_regnet_b5 = _dcfg (
url = ' ' , pool_size = ( 12 , 12 ) , input_size = ( 3 , 384 , 384 ) , test_input_size = ( 3 , 456 , 456 ) , first_conv = ' stem.conv ' ) ,
nf_resnet26 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ) ,
nf_resnet50 = _dcfg (
url = ' https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/nf_resnet50_ra2-9f236009.pth ' ,
pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 288 , 288 ) , crop_pct = 0.94 ) ,
nf_resnet101 = _dcfg ( url = ' ' ) ,
pool_size = ( 8 , 8 ) , input_size = ( 3 , 256 , 256 ) , test_input_size = ( 3 , 288 , 288 ) , crop_pct = 0.94 , first_conv = ' stem.conv ' ),
nf_resnet101 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ),
nf_seresnet26 = _dcfg ( url = ' ' ),
nf_seresnet50 = _dcfg ( url = ' ' ),
nf_seresnet101 = _dcfg ( url = ' ' ),
nf_seresnet26 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ),
nf_seresnet50 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ),
nf_seresnet101 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ),
nf_ecaresnet26 = _dcfg ( url = ' ' ),
nf_ecaresnet50 = _dcfg ( url = ' ' ),
nf_ecaresnet101 = _dcfg ( url = ' ' ),
nf_ecaresnet26 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ),
nf_ecaresnet50 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ),
nf_ecaresnet101 = _dcfg ( url = ' ' , first_conv = ' stem.conv ' ),
)
@ -115,7 +142,6 @@ class NfCfg:
depths : Tuple [ int , int , int , int ]
channels : Tuple [ int , int , int , int ]
alpha : float = 0.2
gamma_in_act : bool = False
stem_type : str = ' 3x3 '
stem_chs : Optional [ int ] = None
group_size : Optional [ int ] = None
@ -128,6 +154,8 @@ class NfCfg:
ch_div : int = 8 # round channels % 8 == 0 to keep tensor-core use optimal
reg : bool = False # enables EfficientNet-like options used in RegNet variants, expand from in_chs, se in middle
extra_conv : bool = False # extra 3x3 bottleneck convolution for NFNet models
gamma_in_act : bool = False
same_padding : bool = False
skipinit : bool = False # disabled by default, non-trivial performance impact
zero_init_fc : bool = False
act_layer : str = ' silu '
@ -163,8 +191,26 @@ def _nfnet_cfg(
return cfg
def _dm_nfnet_cfg ( depths , channels = ( 256 , 512 , 1536 , 1536 ) , act_layer = ' gelu ' , skipinit = True ) :
attn_kwargs = dict ( reduction_ratio = 0.5 , divisor = 8 )
cfg = NfCfg (
depths = depths , channels = channels , stem_type = ' deep_quad ' , stem_chs = 128 , group_size = 128 ,
bottle_ratio = 0.5 , extra_conv = True , gamma_in_act = True , same_padding = True , skipinit = skipinit ,
num_features = int ( channels [ - 1 ] * 2.0 ) , act_layer = act_layer , attn_layer = ' se ' , attn_kwargs = attn_kwargs )
return cfg
model_cfgs = dict (
# NFNet-F models w/ GeLU
# NFNet-F models w/ GELU compatible with DeepMind weights
dm_nfnet_f0 = _dm_nfnet_cfg ( depths = ( 1 , 2 , 6 , 3 ) ) ,
dm_nfnet_f1 = _dm_nfnet_cfg ( depths = ( 2 , 4 , 12 , 6 ) ) ,
dm_nfnet_f2 = _dm_nfnet_cfg ( depths = ( 3 , 6 , 18 , 9 ) ) ,
dm_nfnet_f3 = _dm_nfnet_cfg ( depths = ( 4 , 8 , 24 , 12 ) ) ,
dm_nfnet_f4 = _dm_nfnet_cfg ( depths = ( 5 , 10 , 30 , 15 ) ) ,
dm_nfnet_f5 = _dm_nfnet_cfg ( depths = ( 6 , 12 , 36 , 18 ) ) ,
dm_nfnet_f6 = _dm_nfnet_cfg ( depths = ( 7 , 14 , 42 , 21 ) ) ,
# NFNet-F models w/ GELU (I will likely deprecate/remove these models and just keep dm_ ver for GELU)
nfnet_f0 = _nfnet_cfg ( depths = ( 1 , 2 , 6 , 3 ) ) ,
nfnet_f1 = _nfnet_cfg ( depths = ( 2 , 4 , 12 , 6 ) ) ,
nfnet_f2 = _nfnet_cfg ( depths = ( 3 , 6 , 18 , 9 ) ) ,
@ -229,7 +275,7 @@ class GammaAct(nn.Module):
self . inplace = inplace
def forward ( self , x ) :
return self . gamma * self . act_fn( x , inplace = self . inplace )
return self . act_fn( x , inplace = self . inplace ) . mul_ ( self . gamma )
def act_with_gamma ( act_type , gamma : float = 1. ) :
@ -325,8 +371,7 @@ class NormFreeBlock(nn.Module):
out = self . drop_path ( out )
if self . skipinit_gain is not None :
# this really slows things down for some reason, TBD
out = out * self . skipinit_gain
out . mul_ ( self . skipinit_gain ) # this slows things down more than expected, TBD
out = out * self . alpha + shortcut
return out
@ -419,12 +464,13 @@ class NormFreeNet(nn.Module):
self . num_classes = num_classes
self . drop_rate = drop_rate
assert cfg . act_layer in _nonlin_gamma , f " Please add non-linearity constants for activation ( { cfg . act_layer } ). "
conv_layer = ScaledStdConv2dSame if cfg . same_padding else ScaledStdConv2d
if cfg . gamma_in_act :
act_layer = act_with_gamma ( cfg . act_layer , gamma = _nonlin_gamma [ cfg . act_layer ] )
conv_layer = partial ( ScaledStdConv2d, bias = True , gain = True )
conv_layer = partial ( conv_layer, eps = 1e-4 ) # DM weights better with higher eps
else :
act_layer = get_act_layer ( cfg . act_layer )
conv_layer = partial ( ScaledStdConv2d, bias = True , gain = True , gamma = _nonlin_gamma [ cfg . act_layer ] )
conv_layer = partial ( conv_layer , gamma = _nonlin_gamma [ cfg . act_layer ] )
attn_layer = partial ( get_attn ( cfg . attn_layer ) , * * cfg . attn_kwargs ) if cfg . attn_layer else None
stem_chs = make_divisible ( ( cfg . stem_chs or cfg . channels [ 0 ] ) * cfg . width_factor , cfg . ch_div )
@ -538,6 +584,69 @@ def _create_normfreenet(variant, pretrained=False, **kwargs):
* * kwargs )
@register_model
def dm_nfnet_f0 ( pretrained = False , * * kwargs ) :
""" NFNet-F0 (DeepMind weight compatible)
` High - Performance Large - Scale Image Recognition Without Normalization `
- https : / / arxiv . org / abs / 2102.06171
"""
return _create_normfreenet ( ' dm_nfnet_f0 ' , pretrained = pretrained , * * kwargs )
@register_model
def dm_nfnet_f1 ( pretrained = False , * * kwargs ) :
""" NFNet-F1 (DeepMind weight compatible)
` High - Performance Large - Scale Image Recognition Without Normalization `
- https : / / arxiv . org / abs / 2102.06171
"""
return _create_normfreenet ( ' dm_nfnet_f1 ' , pretrained = pretrained , * * kwargs )
@register_model
def dm_nfnet_f2 ( pretrained = False , * * kwargs ) :
""" NFNet-F2 (DeepMind weight compatible)
` High - Performance Large - Scale Image Recognition Without Normalization `
- https : / / arxiv . org / abs / 2102.06171
"""
return _create_normfreenet ( ' dm_nfnet_f2 ' , pretrained = pretrained , * * kwargs )
@register_model
def dm_nfnet_f3 ( pretrained = False , * * kwargs ) :
""" NFNet-F3 (DeepMind weight compatible)
` High - Performance Large - Scale Image Recognition Without Normalization `
- https : / / arxiv . org / abs / 2102.06171
"""
return _create_normfreenet ( ' dm_nfnet_f3 ' , pretrained = pretrained , * * kwargs )
@register_model
def dm_nfnet_f4 ( pretrained = False , * * kwargs ) :
""" NFNet-F4 (DeepMind weight compatible)
` High - Performance Large - Scale Image Recognition Without Normalization `
- https : / / arxiv . org / abs / 2102.06171
"""
return _create_normfreenet ( ' dm_nfnet_f4 ' , pretrained = pretrained , * * kwargs )
@register_model
def dm_nfnet_f5 ( pretrained = False , * * kwargs ) :
""" NFNet-F5 (DeepMind weight compatible)
` High - Performance Large - Scale Image Recognition Without Normalization `
- https : / / arxiv . org / abs / 2102.06171
"""
return _create_normfreenet ( ' dm_nfnet_f5 ' , pretrained = pretrained , * * kwargs )
@register_model
def dm_nfnet_f6 ( pretrained = False , * * kwargs ) :
""" NFNet-F6 (DeepMind weight compatible)
` High - Performance Large - Scale Image Recognition Without Normalization `
- https : / / arxiv . org / abs / 2102.06171
"""
return _create_normfreenet ( ' dm_nfnet_f6 ' , pretrained = pretrained , * * kwargs )
@register_model
def nfnet_f0 ( pretrained = False , * * kwargs ) :
""" NFNet-F0