Add ResNet deep tiered stem and model weights for seresnext26t_32x4d and seresnext26d_32x4d

pull/65/head
rwightman 4 years ago
parent 73b78459dc
commit 1f4498f217

@ -2,6 +2,15 @@
## What's New
### Dec 28, 2019
* Add new model weights and training hparams (see Training Hparams section)
* `seresnext26d_32x4d`- 77.6 top-1, 93.6 top-5
* deep stem (32, 32, 64), avgpool downsample
* stem/dowsample from bag-of-tricks paper
* `seresnext26t_32x4d`- 78.0 top-1, 93.7 top-5
* deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant)
* stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments
### Dec 23, 2019
* Add RandAugment trained MixNet-XL weights with 80.48 top-1.
* `--dist-bn` argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval
@ -114,6 +123,8 @@ I've leveraged the training scripts in this repository to train a few of the mod
| efficientnet_b1 | 78.692 (21.308) | 94.086 (5.914) | 7.79M | bicubic | 240 |
| resnext50_32x4d | 78.512 (21.488) | 94.042 (5.958) | 25M | bicubic | 224 |
| resnet50 | 78.470 (21.530) | 94.266 (5.734) | 25.6M | bicubic | 224 |
| seresnext26t_32x4d | 77.998 (22.002) | 93.708 (6.292) | 16.8M | bicubic | 224 |
| seresnext26d_32x4d | 77.602 (22.398) | 93.608 (6.392) | 16.8M | bicubic | 224 |
| mixnet_m | 77.256 (22.744) | 93.418 (6.582) | 5.01M | bicubic | 224 |
| seresnext26_32x4d | 77.104 (22.896) | 93.316 (6.684) | 16.8M | bicubic | 224 |
| efficientnet_b0 | 76.912 (23.088) | 93.210 (6.790) | 5.29M | bicubic | 224 |
@ -237,11 +248,20 @@ Sources for original weights:
## Training Hyperparameters
### EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5
These params are for dual Titan RTX cards with NVIDIA Apex installed:
`./distributed_train.sh 2 /imagenet/ --model efficientnet_b2 -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .016`
### MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5
This params are for dual Titan RTX cards with NVIDIA Apex installed:
`./distributed_train.sh 2 /imagenet/ --model mixnet_xl -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .969 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.3 --amp --lr .016 --dist-bn reduce`
### SE-ResNeXt-26-D and SE-ResNeXt-26-T
These hparams (or similar) work well for a wide range of ResNet architecture, generally a good idea to increase the epoch # as the model size increases... ie approx 180-200 for ResNe(X)t50, and 220+ for larger. Increase batch size and LR proportionally for better GPUs or with AMP enabled. These params were for 2 1080Ti cards:
`./distributed_train.sh 2 /imagenet/ --model seresnext26t_32x4d --lr 0.1 --warmup-epochs 5 --epochs 160 --weight-decay 1e-4 --sched cosine --reprob 0.4 --remode pixel -b 112`
**TODO dig up some more**

@ -121,7 +121,7 @@ def gluon_resnet50_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
"""
default_cfg = default_cfgs['gluon_resnet50_v1c']
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=32, deep_stem=True, **kwargs)
stem_width=32, stem_type='deep', **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -134,7 +134,7 @@ def gluon_resnet101_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet101_v1c']
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=32, deep_stem=True, **kwargs)
stem_width=32, stem_type='deep', **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -147,7 +147,7 @@ def gluon_resnet152_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet152_v1c']
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=32, deep_stem=True, **kwargs)
stem_width=32, stem_type='deep', **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -160,7 +160,7 @@ def gluon_resnet50_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
"""
default_cfg = default_cfgs['gluon_resnet50_v1d']
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=32, deep_stem=True, avg_down=True, **kwargs)
stem_width=32, stem_type='deep', avg_down=True, **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -173,7 +173,7 @@ def gluon_resnet101_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet101_v1d']
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=32, deep_stem=True, avg_down=True, **kwargs)
stem_width=32, stem_type='deep', avg_down=True, **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -186,7 +186,7 @@ def gluon_resnet152_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet152_v1d']
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=32, deep_stem=True, avg_down=True, **kwargs)
stem_width=32, stem_type='deep', avg_down=True, **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -199,7 +199,7 @@ def gluon_resnet50_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
"""
default_cfg = default_cfgs['gluon_resnet50_v1e']
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=64, deep_stem=True, avg_down=True, **kwargs)
stem_width=64, stem_type='deep', avg_down=True, **kwargs)
model.default_cfg = default_cfg
#if pretrained:
# load_pretrained(model, default_cfg, num_classes, in_chans)
@ -212,7 +212,7 @@ def gluon_resnet101_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet101_v1e']
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=64, deep_stem=True, avg_down=True, **kwargs)
stem_width=64, stem_type='deep', avg_down=True, **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -225,7 +225,7 @@ def gluon_resnet152_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet152_v1e']
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=64, deep_stem=True, avg_down=True, **kwargs)
stem_width=64, stem_type='deep', avg_down=True, **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -238,7 +238,7 @@ def gluon_resnet50_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
"""
default_cfg = default_cfgs['gluon_resnet50_v1s']
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=64, deep_stem=True, **kwargs)
stem_width=64, stem_type='deep', **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -251,7 +251,7 @@ def gluon_resnet101_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet101_v1s']
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=64, deep_stem=True, **kwargs)
stem_width=64, stem_type='deep', **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -264,7 +264,7 @@ def gluon_resnet152_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs
"""
default_cfg = default_cfgs['gluon_resnet152_v1s']
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
stem_width=64, deep_stem=True, **kwargs)
stem_width=64, stem_type='deep', **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
@ -362,7 +362,7 @@ def gluon_senet154(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
default_cfg = default_cfgs['gluon_senet154']
model = ResNet(
Bottleneck, [3, 8, 36, 3], cardinality=64, base_width=4, use_se=True,
deep_stem=True, down_kernel_size=3, block_reduce_first=2,
stem_type='deep', down_kernel_size=3, block_reduce_first=2,
num_classes=num_classes, in_chans=in_chans, **kwargs)
model.default_cfg = default_cfg
if pretrained:

@ -91,6 +91,12 @@ default_cfgs = {
url='https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth'),
'swsl_resnext101_32x16d': _cfg(
url='https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth'),
'seresnext26d_32x4d': _cfg(
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext26d_32x4d-80fa48a3.pth',
interpolation='bicubic'),
'seresnext26t_32x4d': _cfg(
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext26t_32x4d-361bc1c4.pth',
interpolation='bicubic'),
}
@ -231,10 +237,11 @@ class ResNet(nn.Module):
ResNet variants:
* normal, b - 7x7 stem, stem_width = 64, same as torchvision ResNet, NVIDIA ResNet 'v1.5', Gluon v1b
* c - 3 layer deep 3x3 stem, stem_width = 32
* d - 3 layer deep 3x3 stem, stem_width = 32, average pool in downsample
* e - 3 layer deep 3x3 stem, stem_width = 64, average pool in downsample
* s - 3 layer deep 3x3 stem, stem_width = 64
* c - 3 layer deep 3x3 stem, stem_width = 32 (32, 32, 64)
* d - 3 layer deep 3x3 stem, stem_width = 32 (32, 32, 64), average pool in downsample
* e - 3 layer deep 3x3 stem, stem_width = 64 (64, 64, 128), average pool in downsample
* s - 3 layer deep 3x3 stem, stem_width = 64 (64, 64, 128)
* t - 3 layer deep 3x3 stem, stem width = 32 (24, 48, 64), average pool in downsample
ResNeXt
* normal - 7x7 stem, stem_width = 64, standard cardinality and base widths
@ -263,10 +270,13 @@ class ResNet(nn.Module):
Number of convolution groups for 3x3 conv in Bottleneck.
base_width : int, default 64
Factor determining bottleneck channels. `planes * base_width / 64 * cardinality`
deep_stem : bool, default False
Whether to replace the 7x7 conv1 with 3 3x3 convolution layers.
stem_width : int, default 64
Number of channels in stem convolutions
stem_type : str, default ''
The type of stem:
* '', default - a single 7x7 conv with a width of stem_width
* 'deep' - three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2
* 'deep_tiered' - three 3x3 conv layers of widths stem_width//4 * 3, stem_width//4 * 6, stem_width * 2
block_reduce_first: int, default 1
Reduction factor for first convolution output width of residual blocks,
1 for all archs except senets, where 2
@ -283,12 +293,13 @@ class ResNet(nn.Module):
Global pooling type. One of 'avg', 'max', 'avgmax', 'catavgmax'
"""
def __init__(self, block, layers, num_classes=1000, in_chans=3, use_se=False,
cardinality=1, base_width=64, stem_width=64, deep_stem=False,
cardinality=1, base_width=64, stem_width=64, stem_type='',
block_reduce_first=1, down_kernel_size=1, avg_down=False, dilated=False,
norm_layer=nn.BatchNorm2d, drop_rate=0.0, global_pool='avg',
zero_init_last_bn=True, block_args=None):
block_args = block_args or dict()
self.num_classes = num_classes
deep_stem = 'deep' in stem_type
self.inplanes = stem_width * 2 if deep_stem else 64
self.cardinality = cardinality
self.base_width = base_width
@ -298,16 +309,20 @@ class ResNet(nn.Module):
super(ResNet, self).__init__()
if deep_stem:
stem_chs_1 = stem_chs_2 = stem_width
if 'tiered' in stem_type:
stem_chs_1 = 3 * (stem_width // 4)
stem_chs_2 = 6 * (stem_width // 4)
self.conv1 = nn.Sequential(*[
nn.Conv2d(in_chans, stem_width, 3, stride=2, padding=1, bias=False),
norm_layer(stem_width),
nn.Conv2d(in_chans, stem_chs_1, 3, stride=2, padding=1, bias=False),
norm_layer(stem_chs_1),
nn.ReLU(inplace=True),
nn.Conv2d(stem_width, stem_width, 3, stride=1, padding=1, bias=False),
norm_layer(stem_width),
nn.Conv2d(stem_chs_1, stem_chs_2, 3, stride=1, padding=1, bias=False),
norm_layer(stem_chs_2),
nn.ReLU(inplace=True),
nn.Conv2d(stem_width, self.inplanes, 3, stride=1, padding=1, bias=False)])
nn.Conv2d(stem_chs_2, self.inplanes, 3, stride=1, padding=1, bias=False)])
else:
self.conv1 = nn.Conv2d(in_chans, stem_width, kernel_size=7, stride=2, padding=3, bias=False)
self.conv1 = nn.Conv2d(in_chans, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
@ -324,7 +339,7 @@ class ResNet(nn.Module):
self.num_features = 512 * block.expansion
self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)
last_bn_name = 'bn3' if 'Bottleneck' in block.__name__ else 'bn2'
last_bn_name = 'bn3' if 'Bottle' in block.__name__ else 'bn2'
for n, m in self.named_modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
@ -440,7 +455,7 @@ def resnet26d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
"""
default_cfg = default_cfgs['resnet26d']
model = ResNet(
Bottleneck, [2, 2, 2, 2], stem_width=32, deep_stem=True, avg_down=True,
Bottleneck, [2, 2, 2, 2], stem_width=32, stem_type='deep', avg_down=True,
num_classes=num_classes, in_chans=in_chans, **kwargs)
model.default_cfg = default_cfg
if pretrained:
@ -466,7 +481,7 @@ def resnet50d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
"""
default_cfg = default_cfgs['resnet50d']
model = ResNet(
Bottleneck, [3, 4, 6, 3], stem_width=32, deep_stem=True, avg_down=True,
Bottleneck, [3, 4, 6, 3], stem_width=32, stem_type='deep', avg_down=True,
num_classes=num_classes, in_chans=in_chans, **kwargs)
model.default_cfg = default_cfg
if pretrained:
@ -574,7 +589,7 @@ def resnext50d_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
default_cfg = default_cfgs['resnext50d_32x4d']
model = ResNet(
Bottleneck, [3, 4, 6, 3], cardinality=32, base_width=4,
stem_width=32, deep_stem=True, avg_down=True,
stem_width=32, stem_type='deep', avg_down=True,
num_classes=num_classes, in_chans=in_chans, **kwargs)
model.default_cfg = default_cfg
if pretrained:
@ -854,3 +869,34 @@ def swsl_resnext101_32x16d(pretrained=True, **kwargs):
if pretrained:
load_pretrained(model, num_classes=kwargs.get('num_classes', 0), in_chans=kwargs.get('in_chans', 3))
return model
@register_model
def seresnext26d_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
"""Constructs a ResNet-26 v1d model.
This is technically a 28 layer ResNet, sticking with 'd' modifier from Gluon for now.
"""
default_cfg = default_cfgs['seresnext26d_32x4d']
model = ResNet(
Bottleneck, [2, 2, 2, 2], cardinality=32, base_width=4,
stem_width=32, stem_type='deep', avg_down=True, use_se=True,
num_classes=num_classes, in_chans=in_chans, **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
return model
@register_model
def seresnext26t_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
"""Constructs a ResNet-26 v1d model.
"""
default_cfg = default_cfgs['seresnext26t_32x4d']
model = ResNet(
Bottleneck, [2, 2, 2, 2], cardinality=32, base_width=4,
stem_width=32, stem_type='deep_tiered', avg_down=True, use_se=True,
num_classes=num_classes, in_chans=in_chans, **kwargs)
model.default_cfg = default_cfg
if pretrained:
load_pretrained(model, default_cfg, num_classes, in_chans)
return model

Loading…
Cancel
Save