diff --git a/README.md b/README.md index ec9c9940..a644d0d0 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,15 @@ I'm fortunate to be able to dedicate significant time and money of my own suppor ## What's New +### March 17, 2021 +* Add new ECA-NFNet-L0 (rename `nfnet_l0c`->`eca_nfnet_l0`) weights trained by myself. + * 82.6 top-1 @ 288x288, 82.8 @ 320x320, trained at 224x224 + * Uses SiLU activation, approx 2x faster than `dm_nfnet_f0` and 50% faster than `nfnet_f0s` w/ 1/3 param count +* Integrate [Hugging Face model hub](https://huggingface.co/models) into timm create_model and default_cfg handling for pretrained weight and config sharing (more on this soon!) +* Merge HardCoRe NAS models contributed by https://github.com/yoniaflalo +* Merge PyTorch trained EfficientNet-EL and pruned ES/EL variants contributed by [DeGirum](https://github.com/DeGirum) + + ### March 7, 2021 * First 0.4.x PyPi release w/ NFNets (& related), ByoB (GPU-Efficient, RepVGG, etc). * Change feature extraction for pre-activation nets (NFNets, ResNetV2) to return features before activation. @@ -171,6 +180,7 @@ A full version of the list below with source links can be found in the [document * MobileNet-V2 - https://arxiv.org/abs/1801.04381 * Single-Path NAS - https://arxiv.org/abs/1904.02877 * GPU-Efficient Networks - https://arxiv.org/abs/2006.14090 +* HardCoRe-NAS - https://arxiv.org/abs/2102.11646 * HRNet - https://arxiv.org/abs/1908.07919 * Inception-V3 - https://arxiv.org/abs/1512.00567 * Inception-ResNet-V2 and Inception-V4 - https://arxiv.org/abs/1602.07261 diff --git a/timm/models/layers/std_conv.py b/timm/models/layers/std_conv.py index 077dc5fb..b0cb1eeb 100644 --- a/timm/models/layers/std_conv.py +++ b/timm/models/layers/std_conv.py @@ -76,13 +76,13 @@ class ScaledStdConv2d(nn.Conv2d): def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding=None, dilation=1, groups=1, - bias=True, gamma=1.0, eps=1e-5, use_layernorm=False): + bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=False): if padding is None: padding = get_padding(kernel_size, stride, dilation) super().__init__( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) - self.gain = nn.Parameter(torch.ones(self.out_channels, 1, 1, 1)) + self.gain = nn.Parameter(torch.full((self.out_channels, 1, 1, 1), gain_init)) self.scale = gamma * self.weight[0].numel() ** -0.5 # gamma * 1 / sqrt(fan-in) self.eps = eps ** 2 if use_layernorm else eps self.use_layernorm = use_layernorm # experimental, slightly faster/less GPU memory to hijack LN kernel @@ -110,12 +110,12 @@ class ScaledStdConv2dSame(nn.Conv2d): def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding='SAME', dilation=1, groups=1, - bias=True, gamma=1.0, eps=1e-5, use_layernorm=False): + bias=True, gamma=1.0, eps=1e-5, gain_init=1.0, use_layernorm=False): padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, dilation=dilation) super().__init__( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) - self.gain = nn.Parameter(torch.ones(self.out_channels, 1, 1, 1)) + self.gain = nn.Parameter(torch.full((self.out_channels, 1, 1, 1), gain_init)) self.scale = gamma * self.weight[0].numel() ** -0.5 self.same_pad = is_dynamic self.eps = eps ** 2 if use_layernorm else eps diff --git a/timm/models/nfnet.py b/timm/models/nfnet.py index 662ab0a8..90406e66 100644 --- a/timm/models/nfnet.py +++ b/timm/models/nfnet.py @@ -104,8 +104,8 @@ default_cfgs = dict( url='', pool_size=(7, 7), input_size=(3, 224, 224), test_input_size=(3, 288, 288)), nfnet_l0b=_dcfg( url='', pool_size=(7, 7), input_size=(3, 224, 224), test_input_size=(3, 288, 288)), - nfnet_l0c=_dcfg( - url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/nfnet_l0c-ad1045c2.pth', + eca_nfnet_l0=_dcfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l0_ra2-e3e9ac50.pth', pool_size=(7, 7), input_size=(3, 224, 224), test_input_size=(3, 288, 288), crop_pct=1.0), nf_regnet_b0=_dcfg( @@ -238,7 +238,7 @@ model_cfgs = dict( nfnet_l0b=_nfnet_cfg( depths=(1, 2, 6, 3), channels=(256, 512, 1536, 1536), feat_mult=1.5, group_size=64, bottle_ratio=0.25, attn_kwargs=dict(reduction_ratio=0.25, divisor=8), act_layer='silu'), - nfnet_l0c=_nfnet_cfg( + eca_nfnet_l0=_nfnet_cfg( depths=(1, 2, 6, 3), channels=(256, 512, 1536, 1536), feat_mult=1.5, group_size=64, bottle_ratio=0.25, attn_layer='eca', attn_kwargs=dict(), act_layer='silu'), @@ -343,7 +343,7 @@ class NormFreeBlock(nn.Module): else: self.attn = None self.act3 = act_layer() - self.conv3 = conv_layer(mid_chs, out_chs, 1) + self.conv3 = conv_layer(mid_chs, out_chs, 1, gain_init=1. if skipinit else 0.) if not reg and attn_layer is not None: self.attn_last = attn_layer(out_chs) # ResNet blocks apply attn after conv3 else: @@ -804,11 +804,11 @@ def nfnet_l0b(pretrained=False, **kwargs): @register_model -def nfnet_l0c(pretrained=False, **kwargs): - """ NFNet-L0c w/ SiLU +def eca_nfnet_l0(pretrained=False, **kwargs): + """ ECA-NFNet-L0 w/ SiLU My experimental 'light' model w/ 1.5x final_conv mult, 64 group_size, .25 bottleneck & ECA attn """ - return _create_normfreenet('nfnet_l0c', pretrained=pretrained, **kwargs) + return _create_normfreenet('eca_nfnet_l0', pretrained=pretrained, **kwargs) @register_model diff --git a/timm/version.py b/timm/version.py index 68eb9b68..ab45471d 100644 --- a/timm/version.py +++ b/timm/version.py @@ -1 +1 @@ -__version__ = '0.4.5' +__version__ = '0.4.6'