Finish with HRNet, weights and models updated. Improve consistency in model classifier/global pool treatment.

6 years ago · 3bef524f9c
parent 3ceeedc441
commit 3bef524f9c
19 changed files with 729 additions and 769 deletions
--- a/clean_checkpoint.py
+++ b/clean_checkpoint.py
@ -8,12 +8,15 @@ from collections import OrderedDict
 parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
 parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
-parser.add_argument('--output', default='./cleaned.pth', type=str, metavar='PATH',
+parser.add_argument('--output', default='', type=str, metavar='PATH',
                    help='output path')
 parser.add_argument('--use-ema', dest='use_ema', action='store_true',
                    help='use ema version of weights if present')


+_TEMP_NAME = './_checkpoint.pth'
+
+
 def main():
    args = parser.parse_args()

@ -40,13 +43,18 @@ def main():
            new_state_dict[name] = v
        print("=> Loaded state_dict from '{}'".format(args.checkpoint))

-        torch.save(new_state_dict, args.output)
-        with open(args.output, 'rb') as f:
+        torch.save(new_state_dict, _TEMP_NAME)
+        with open(_TEMP_NAME, 'rb') as f:
            sha_hash = hashlib.sha256(f.read()).hexdigest()

-        checkpoint_base = os.path.splitext(args.checkpoint)[0]
+        if args.output:
+            checkpoint_root, checkpoint_base = os.path.split(args.output)
+            checkpoint_base = os.path.splitext(checkpoint_base)[0]
+        else:
+            checkpoint_root = ''
+            checkpoint_base = os.path.splitext(args.checkpoint)[0]
        final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + '.pth'
-        shutil.move(args.output, final_filename)
+        shutil.move(_TEMP_NAME, os.path.join(checkpoint_root, final_filename))
        print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))
    else:
        print("Error: Checkpoint ({}) doesn't exist".format(args.checkpoint))
--- a/results/results-inv2-matched-frequency.csv
+++ b/results/results-inv2-matched-frequency.csv
@ -1,97 +1,155 @@
 model,top1,top1_err,top5,top5_err,param_count,img_size,cropt_pct,interpolation
-resnet18,57.18,42.82,80.19,19.81,11.69,224,0.875,bilinear
-gluon_resnet18_v1b,58.32,41.68,80.96,19.04,11.69,224,0.875,bicubic
-seresnet18,59.81,40.19,81.68,18.32,11.78,224,0.875,bicubic
-tv_resnet34,61.2,38.8,82.72,17.28,21.8,224,0.875,bilinear
-spnasnet_100,61.21,38.79,82.77,17.23,4.42,224,0.875,bilinear
-mnasnet_100,61.91,38.09,83.71,16.29,4.38,224,0.875,bicubic
-fbnetc_100,62.43,37.57,83.39,16.61,5.57,224,0.875,bilinear
-gluon_resnet34_v1b,62.56,37.44,84,16,21.8,224,0.875,bicubic
-resnet34,62.82,37.18,84.12,15.88,21.8,224,0.875,bilinear
-seresnet34,62.89,37.11,84.22,15.78,21.96,224,0.875,bilinear
-densenet121,62.94,37.06,84.26,15.74,7.98,224,0.875,bicubic
-semnasnet_100,63.12,36.88,84.53,15.47,3.89,224,0.875,bicubic
-mobilenetv3_100,63.23,36.77,84.52,15.48,5.48,224,0.875,bicubic
-tv_resnet50,63.33,36.67,84.65,15.35,25.56,224,0.875,bilinear
-mixnet_s,63.38,36.62,84.71,15.29,4.13,224,0.875,bicubic
-resnet26,63.45,36.55,84.27,15.73,16,224,0.875,bicubic
-tf_mixnet_s,63.59,36.41,84.27,15.73,4.13,224,0.875,bicubic
-dpn68,64.22,35.78,85.18,14.82,12.61,224,0.875,bicubic
-tf_mixnet_m,64.27,35.73,85.09,14.91,5.01,224,0.875,bicubic
-tf_efficientnet_b0,64.29,35.71,85.25,14.75,5.29,224,0.875,bicubic
-efficientnet_b0,64.58,35.42,85.89,14.11,5.29,224,0.875,bicubic
-resnet26d,64.63,35.37,85.12,14.88,16.01,224,0.875,bicubic
-mixnet_m,64.69,35.31,85.47,14.53,5.01,224,0.875,bicubic
-densenet169,64.78,35.22,85.25,14.75,14.15,224,0.875,bicubic
-seresnext26_32x4d,65.04,34.96,85.65,14.35,16.79,224,0.875,bicubic
-tf_efficientnet_es,65.24,34.76,85.54,14.46,5.44,224,0.875,bicubic
-densenet201,65.28,34.72,85.67,14.33,20.01,224,0.875,bicubic
-dpn68b,65.6,34.4,85.94,14.06,12.61,224,0.875,bicubic
-resnet101,65.68,34.32,85.98,14.02,44.55,224,0.875,bilinear
-densenet161,65.85,34.15,86.46,13.54,28.68,224,0.875,bicubic
-gluon_resnet50_v1b,66.04,33.96,86.27,13.73,25.56,224,0.875,bicubic
-inception_v3,66.12,33.88,86.34,13.66,27.16,299,0.875,bicubic
-tv_resnext50_32x4d,66.18,33.82,86.04,13.96,25.03,224,0.875,bilinear
-seresnet50,66.24,33.76,86.33,13.67,28.09,224,0.875,bilinear
-tf_inception_v3,66.41,33.59,86.68,13.32,23.83,299,0.875,bicubic
-gluon_resnet50_v1c,66.54,33.46,86.16,13.84,25.58,224,0.875,bicubic
-adv_inception_v3,66.6,33.4,86.56,13.44,23.83,299,0.875,bicubic
-wide_resnet50_2,66.65,33.35,86.81,13.19,68.88,224,0.875,bilinear
-wide_resnet101_2,66.68,33.32,87.04,12.96,126.89,224,0.875,bilinear
-tf_mixnet_l,66.78,33.22,86.46,13.54,7.33,224,0.875,bicubic
-resnet50,66.81,33.19,87,13,25.56,224,0.875,bicubic
-tf_efficientnet_em,66.87,33.13,86.98,13.02,6.9,240,0.882,bicubic
-resnext50_32x4d,66.88,33.12,86.36,13.64,25.03,224,0.875,bicubic
-tf_efficientnet_b1,66.89,33.11,87.04,12.96,7.79,240,0.882,bicubic
-mixnet_l,66.97,33.03,86.94,13.06,7.33,224,0.875,bicubic
-resnet152,67.02,32.98,87.57,12.43,60.19,224,0.875,bilinear
-gluon_resnet50_v1s,67.1,32.9,86.86,13.14,25.68,224,0.875,bicubic
-seresnet101,67.15,32.85,87.05,12.95,49.33,224,0.875,bilinear
-gluon_resnet101_v1b,67.45,32.55,87.23,12.77,44.55,224,0.875,bicubic
-efficientnet_b1,67.55,32.45,87.29,12.71,7.79,240,0.882,bicubic
-seresnet152,67.55,32.45,87.39,12.61,66.82,224,0.875,bilinear
-gluon_resnet101_v1c,67.56,32.44,87.16,12.84,44.57,224,0.875,bicubic
-gluon_inception_v3,67.59,32.41,87.46,12.54,23.83,299,0.875,bicubic
-xception,67.67,32.33,87.57,12.43,22.86,299,0.8975,bicubic
-efficientnet_b2,67.8,32.2,88.2,11.8,9.11,260,0.89,bicubic
-resnext101_32x8d,67.85,32.15,87.48,12.52,88.79,224,0.875,bilinear
-seresnext50_32x4d,67.87,32.13,87.62,12.38,27.56,224,0.875,bilinear
-gluon_resnet50_v1d,67.91,32.09,87.12,12.88,25.58,224,0.875,bicubic
-dpn92,68.01,31.99,87.59,12.41,37.67,224,0.875,bicubic
-tf_efficientnet_el,68.18,31.82,88.35,11.65,10.59,300,0.904,bicubic
-gluon_resnext50_32x4d,68.28,31.72,87.32,12.68,25.03,224,0.875,bicubic
-dpn98,68.58,31.42,87.66,12.34,61.57,224,0.875,bicubic
-gluon_seresnext50_32x4d,68.67,31.33,88.32,11.68,27.56,224,0.875,bicubic
-dpn107,68.71,31.29,88.13,11.87,86.92,224,0.875,bicubic
-gluon_resnet101_v1s,68.72,31.28,87.9,12.1,44.67,224,0.875,bicubic
+ig_resnext101_32x48d,76.87,23.13,93.32,6.68,828.41,224,0.875,bilinear
+ig_resnext101_32x32d,76.84,23.16,93.19,6.81,468.53,224,0.875,bilinear
+tf_efficientnet_b7_ap,76.09,23.91,92.97,7.03,66.35,600,0.949,bicubic
+tf_efficientnet_b8_ap,76.09,23.91,92.73,7.27,87.41,672,0.954,bicubic
+ig_resnext101_32x16d,75.71,24.29,92.9,7.1,194.03,224,0.875,bilinear
+swsl_resnext101_32x8d,75.45,24.55,92.75,7.25,88.79,224,0.875,bilinear
+tf_efficientnet_b6_ap,75.38,24.62,92.44,7.56,43.04,528,0.942,bicubic
+tf_efficientnet_b7,74.72,25.28,92.22,7.78,66.35,600,0.949,bicubic
+tf_efficientnet_b5_ap,74.59,25.41,91.99,8.01,30.39,456,0.934,bicubic
+swsl_resnext101_32x4d,74.15,25.85,91.99,8.01,44.18,224,0.875,bilinear
+swsl_resnext101_32x16d,74.01,25.99,92.17,7.83,194.03,224,0.875,bilinear
+tf_efficientnet_b6,73.9,26.1,91.75,8.25,43.04,528,0.942,bicubic
+ig_resnext101_32x8d,73.66,26.34,92.15,7.85,88.79,224,0.875,bilinear
+tf_efficientnet_b5,73.54,26.46,91.46,8.54,30.39,456,0.934,bicubic
+tf_efficientnet_b4_ap,72.89,27.11,90.98,9.02,19.34,380,0.922,bicubic
+swsl_resnext50_32x4d,72.58,27.42,90.84,9.16,25.03,224,0.875,bilinear
+pnasnet5large,72.37,27.63,90.26,9.74,86.06,331,0.875,bicubic
+nasnetalarge,72.31,27.69,90.51,9.49,88.75,331,0.875,bicubic
+tf_efficientnet_b4,72.28,27.72,90.6,9.4,19.34,380,0.922,bicubic
+swsl_resnet50,71.69,28.31,90.51,9.49,25.56,224,0.875,bilinear
+ssl_resnext101_32x8d,71.49,28.51,90.47,9.53,88.79,224,0.875,bilinear
+ssl_resnext101_32x16d,71.4,28.6,90.55,9.45,194.03,224,0.875,bilinear
+tf_efficientnet_b3_ap,70.92,29.08,89.43,10.57,12.23,300,0.904,bicubic
+tf_efficientnet_b3,70.62,29.38,89.44,10.56,12.23,300,0.904,bicubic
+gluon_senet154,70.6,29.4,88.92,11.08,115.09,224,0.875,bicubic
+ssl_resnext101_32x4d,70.5,29.5,89.76,10.24,44.18,224,0.875,bilinear
+senet154,70.48,29.52,88.99,11.01,115.09,224,0.875,bilinear
+gluon_seresnext101_64x4d,70.44,29.56,89.35,10.65,88.23,224,0.875,bicubic
+gluon_resnet152_v1s,70.32,29.68,88.87,11.13,60.32,224,0.875,bicubic
+inception_resnet_v2,70.12,29.88,88.68,11.32,55.84,299,0.8975,bicubic
+gluon_seresnext101_32x4d,70.01,29.99,88.91,11.09,48.96,224,0.875,bicubic
+gluon_resnet152_v1d,69.95,30.05,88.47,11.53,60.21,224,0.875,bicubic
+gluon_resnext101_64x4d,69.69,30.31,88.26,11.74,83.46,224,0.875,bicubic
+ssl_resnext50_32x4d,69.69,30.31,89.42,10.58,25.03,224,0.875,bilinear
+ens_adv_inception_resnet_v2,69.52,30.48,88.5,11.5,55.84,299,0.8975,bicubic
+inception_v4,69.35,30.65,88.78,11.22,42.68,299,0.875,bicubic
+seresnext101_32x4d,69.34,30.66,88.05,11.95,48.96,224,0.875,bilinear
+gluon_resnet152_v1c,69.13,30.87,87.89,12.11,60.21,224,0.875,bicubic
+mixnet_xl,69,31,88.19,11.81,11.9,224,0.875,bicubic
+gluon_resnet101_v1d,68.99,31.01,88.08,11.92,44.57,224,0.875,bicubic
+gluon_xception65,68.98,31.02,88.32,11.68,39.92,299,0.875,bicubic
+gluon_resnext101_32x4d,68.96,31.04,88.34,11.66,44.18,224,0.875,bicubic
+tf_efficientnet_b2_ap,68.93,31.07,88.34,11.66,9.11,260,0.89,bicubic
+gluon_resnet152_v1b,68.81,31.19,87.71,12.29,60.19,224,0.875,bicubic
+dpn131,68.76,31.24,87.48,12.52,79.25,224,0.875,bicubic
 resnext50d_32x4d,68.75,31.25,88.31,11.69,25.05,224,0.875,bicubic
 tf_efficientnet_b2,68.75,31.25,87.95,12.05,9.11,260,0.89,bicubic
-dpn131,68.76,31.24,87.48,12.52,79.25,224,0.875,bicubic
-gluon_resnet152_v1b,68.81,31.19,87.71,12.29,60.19,224,0.875,bicubic
-gluon_resnext101_32x4d,68.96,31.04,88.34,11.66,44.18,224,0.875,bicubic
-gluon_xception65,68.98,31.02,88.32,11.68,39.92,299,0.875,bicubic
-gluon_resnet101_v1d,68.99,31.01,88.08,11.92,44.57,224,0.875,bicubic
-mixnet_xl,69,31,88.19,11.81,11.9,224,0.875,bicubic
-gluon_resnet152_v1c,69.13,30.87,87.89,12.11,60.21,224,0.875,bicubic
-seresnext101_32x4d,69.34,30.66,88.05,11.95,48.96,224,0.875,bilinear
-inception_v4,69.35,30.65,88.78,11.22,42.68,299,0.875,bicubic
-ens_adv_inception_resnet_v2,69.52,30.48,88.5,11.5,55.84,299,0.8975,bicubic
-gluon_resnext101_64x4d,69.69,30.31,88.26,11.74,83.46,224,0.875,bicubic
-gluon_resnet152_v1d,69.95,30.05,88.47,11.53,60.21,224,0.875,bicubic
-gluon_seresnext101_32x4d,70.01,29.99,88.91,11.09,48.96,224,0.875,bicubic
-inception_resnet_v2,70.12,29.88,88.68,11.32,55.84,299,0.8975,bicubic
-gluon_resnet152_v1s,70.32,29.68,88.87,11.13,60.32,224,0.875,bicubic
-gluon_seresnext101_64x4d,70.44,29.56,89.35,10.65,88.23,224,0.875,bicubic
-senet154,70.48,29.52,88.99,11.01,115.09,224,0.875,bilinear
-gluon_senet154,70.6,29.4,88.92,11.08,115.09,224,0.875,bicubic
-tf_efficientnet_b3,70.62,29.38,89.44,10.56,12.23,300,0.904,bicubic
-tf_efficientnet_b4,72.28,27.72,90.6,9.4,19.34,380,0.922,bicubic
-nasnetalarge,72.31,27.69,90.51,9.49,88.75,331,0.875,bicubic
-pnasnet5large,72.37,27.63,90.26,9.74,86.06,331,0.875,bicubic
-tf_efficientnet_b5,73.37,26.63,91.21,8.79,30.39,456,0.934,bicubic
-ig_resnext101_32x8d,73.66,26.34,92.15,7.85,88.79,224,0.875,bilinear
-tf_efficientnet_b6,73.9,26.1,91.75,8.25,43.04,528,0.942,bicubic
-tf_efficientnet_b7,74.04,25.96,91.86,8.14,66.35,600,0.949,bicubic
-ig_resnext101_32x16d,75.71,24.29,92.9,7.1,194.03,224,0.875,bilinear
-ig_resnext101_32x32d,76.84,23.16,93.19,6.81,468.53,224,0.875,bilinear
-ig_resnext101_32x48d,76.87,23.13,93.32,6.68,828.41,224,0.875,bilinear
+gluon_resnet101_v1s,68.72,31.28,87.9,12.1,44.67,224,0.875,bicubic
+dpn107,68.71,31.29,88.13,11.87,86.92,224,0.875,bicubic
+gluon_seresnext50_32x4d,68.67,31.33,88.32,11.68,27.56,224,0.875,bicubic
+hrnet_w64,68.63,31.37,88.07,11.93,128.06,224,0.875,bilinear
+dpn98,68.58,31.42,87.66,12.34,61.57,224,0.875,bicubic
+ssl_resnet50,68.42,31.58,88.58,11.42,25.56,224,0.875,bilinear
+dla102x2,68.34,31.66,87.87,12.13,41.75,224,0.875,bilinear
+gluon_resnext50_32x4d,68.28,31.72,87.32,12.68,25.03,224,0.875,bicubic
+tf_efficientnet_el,68.18,31.82,88.35,11.65,10.59,300,0.904,bicubic
+dpn92,68.01,31.99,87.59,12.41,37.67,224,0.875,bicubic
+gluon_resnet50_v1d,67.91,32.09,87.12,12.88,25.58,224,0.875,bicubic
+seresnext50_32x4d,67.87,32.13,87.62,12.38,27.56,224,0.875,bilinear
+resnext101_32x8d,67.85,32.15,87.48,12.52,88.79,224,0.875,bilinear
+efficientnet_b2,67.8,32.2,88.2,11.8,9.11,260,0.89,bicubic
+hrnet_w44,67.77,32.23,87.53,12.47,67.06,224,0.875,bilinear
+hrnet_w48,67.77,32.23,87.42,12.58,77.47,224,0.875,bilinear
+xception,67.67,32.33,87.57,12.43,22.86,299,0.8975,bicubic
+dla169,67.61,32.39,87.56,12.44,53.99,224,0.875,bilinear
+gluon_inception_v3,67.59,32.41,87.46,12.54,23.83,299,0.875,bicubic
+hrnet_w40,67.59,32.41,87.13,12.87,57.56,224,0.875,bilinear
+gluon_resnet101_v1c,67.56,32.44,87.16,12.84,44.57,224,0.875,bicubic
+efficientnet_b1,67.55,32.45,87.29,12.71,7.79,240,0.882,bicubic
+seresnet152,67.55,32.45,87.39,12.61,66.82,224,0.875,bilinear
+res2net50_26w_8s,67.53,32.47,87.27,12.73,48.4,224,0.875,bilinear
+tf_efficientnet_b1_ap,67.52,32.48,87.77,12.23,7.79,240,0.882,bicubic
+tf_efficientnet_cc_b1_8e,67.48,32.52,87.31,12.69,39.72,240,0.882,bicubic
+gluon_resnet101_v1b,67.45,32.55,87.23,12.77,44.55,224,0.875,bicubic
+res2net101_26w_4s,67.45,32.55,87.01,12.99,45.21,224,0.875,bilinear
+seresnet101,67.15,32.85,87.05,12.95,49.33,224,0.875,bilinear
+gluon_resnet50_v1s,67.1,32.9,86.86,13.14,25.68,224,0.875,bicubic
+dla60x,67.08,32.92,87.17,12.83,17.65,224,0.875,bilinear
+dla60_res2net,67.03,32.97,87.14,12.86,21.15,224,0.875,bilinear
+resnet152,67.02,32.98,87.57,12.43,60.19,224,0.875,bilinear
+dla102x,67,33,86.77,13.23,26.77,224,0.875,bilinear
+mixnet_l,66.97,33.03,86.94,13.06,7.33,224,0.875,bicubic
+res2net50_26w_6s,66.91,33.09,86.9,13.1,37.05,224,0.875,bilinear
+tf_efficientnet_b1,66.89,33.11,87.04,12.96,7.79,240,0.882,bicubic
+resnext50_32x4d,66.88,33.12,86.36,13.64,25.03,224,0.875,bicubic
+tf_efficientnet_em,66.87,33.13,86.98,13.02,6.9,240,0.882,bicubic
+resnet50,66.81,33.19,87,13,25.56,224,0.875,bicubic
+hrnet_w32,66.79,33.21,87.29,12.71,41.23,224,0.875,bilinear
+tf_mixnet_l,66.78,33.22,86.46,13.54,7.33,224,0.875,bicubic
+hrnet_w30,66.76,33.24,86.79,13.21,37.71,224,0.875,bilinear
+wide_resnet101_2,66.68,33.32,87.04,12.96,126.89,224,0.875,bilinear
+wide_resnet50_2,66.65,33.35,86.81,13.19,68.88,224,0.875,bilinear
+dla60_res2next,66.64,33.36,87.02,12.98,17.33,224,0.875,bilinear
+adv_inception_v3,66.6,33.4,86.56,13.44,23.83,299,0.875,bicubic
+dla102,66.55,33.45,86.91,13.09,33.73,224,0.875,bilinear
+gluon_resnet50_v1c,66.54,33.46,86.16,13.84,25.58,224,0.875,bicubic
+tf_inception_v3,66.42,33.58,86.68,13.32,23.83,299,0.875,bicubic
+seresnet50,66.24,33.76,86.33,13.67,28.09,224,0.875,bilinear
+tf_efficientnet_cc_b0_8e,66.21,33.79,86.22,13.78,24.01,224,0.875,bicubic
+tv_resnext50_32x4d,66.18,33.82,86.04,13.96,25.03,224,0.875,bilinear
+res2net50_26w_4s,66.17,33.83,86.6,13.4,25.7,224,0.875,bilinear
+inception_v3,66.12,33.88,86.34,13.66,27.16,299,0.875,bicubic
+gluon_resnet50_v1b,66.04,33.96,86.27,13.73,25.56,224,0.875,bicubic
+res2net50_14w_8s,66.02,33.98,86.24,13.76,25.06,224,0.875,bilinear
+densenet161,65.85,34.15,86.46,13.54,28.68,224,0.875,bicubic
+res2next50,65.85,34.15,85.83,14.17,24.67,224,0.875,bilinear
+resnet101,65.68,34.32,85.98,14.02,44.55,224,0.875,bilinear
+dpn68b,65.6,34.4,85.94,14.06,12.61,224,0.875,bicubic
+tf_efficientnet_b0_ap,65.49,34.51,85.55,14.45,5.29,224,0.875,bicubic
+res2net50_48w_2s,65.32,34.68,85.96,14.04,25.29,224,0.875,bilinear
+densenet201,65.28,34.72,85.67,14.33,20.01,224,0.875,bicubic
+tf_efficientnet_es,65.24,34.76,85.54,14.46,5.44,224,0.875,bicubic
+dla60,65.22,34.78,85.75,14.25,22.33,224,0.875,bilinear
+tf_efficientnet_cc_b0_4e,65.13,34.87,85.13,14.87,13.31,224,0.875,bicubic
+seresnext26_32x4d,65.04,34.96,85.65,14.35,16.79,224,0.875,bicubic
+hrnet_w18,64.91,35.09,85.75,14.25,21.3,224,0.875,bilinear
+densenet169,64.78,35.22,85.25,14.75,14.15,224,0.875,bicubic
+mixnet_m,64.69,35.31,85.47,14.53,5.01,224,0.875,bicubic
+resnet26d,64.63,35.37,85.12,14.88,16.01,224,0.875,bicubic
+efficientnet_b0,64.58,35.42,85.89,14.11,5.29,224,0.875,bicubic
+tf_efficientnet_b0,64.29,35.71,85.25,14.75,5.29,224,0.875,bicubic
+tf_mixnet_m,64.27,35.73,85.09,14.91,5.01,224,0.875,bicubic
+dpn68,64.22,35.78,85.18,14.82,12.61,224,0.875,bicubic
+tf_mixnet_s,63.59,36.41,84.27,15.73,4.13,224,0.875,bicubic
+resnet26,63.45,36.55,84.27,15.73,16,224,0.875,bicubic
+mixnet_s,63.38,36.62,84.71,15.29,4.13,224,0.875,bicubic
+tv_resnet50,63.33,36.67,84.65,15.35,25.56,224,0.875,bilinear
+mobilenetv3_rw,63.23,36.77,84.52,15.48,5.48,224,0.875,bicubic
+semnasnet_100,63.12,36.88,84.53,15.47,3.89,224,0.875,bicubic
+densenet121,62.94,37.06,84.26,15.74,7.98,224,0.875,bicubic
+seresnet34,62.89,37.11,84.22,15.78,21.96,224,0.875,bilinear
+hrnet_w18_small_v2,62.83,37.17,83.97,16.03,15.6,224,0.875,bilinear
+resnet34,62.82,37.18,84.12,15.88,21.8,224,0.875,bilinear
+swsl_resnet18,62.73,37.27,84.3,15.7,11.69,224,0.875,bilinear
+gluon_resnet34_v1b,62.56,37.44,84,16,21.8,224,0.875,bicubic
+dla34,62.51,37.49,83.92,16.08,15.78,224,0.875,bilinear
+tf_mobilenetv3_large_100,62.47,37.53,83.96,16.04,5.48,224,0.875,bilinear
+fbnetc_100,62.43,37.57,83.39,16.61,5.57,224,0.875,bilinear
+mnasnet_100,61.91,38.09,83.71,16.29,4.38,224,0.875,bicubic
+ssl_resnet18,61.49,38.51,83.33,16.67,11.69,224,0.875,bilinear
+spnasnet_100,61.21,38.79,82.77,17.23,4.42,224,0.875,bilinear
+tv_resnet34,61.2,38.8,82.72,17.28,21.8,224,0.875,bilinear
+tf_mobilenetv3_large_075,60.38,39.62,81.96,18.04,3.99,224,0.875,bilinear
+seresnet18,59.81,40.19,81.68,18.32,11.78,224,0.875,bicubic
+tf_mobilenetv3_large_minimal_100,59.07,40.93,81.14,18.86,3.92,224,0.875,bilinear
+hrnet_w18_small,58.97,41.03,81.34,18.66,13.19,224,0.875,bilinear
+gluon_resnet18_v1b,58.32,41.68,80.96,19.04,11.69,224,0.875,bicubic
+resnet18,57.18,42.82,80.19,19.81,11.69,224,0.875,bilinear
+dla60x_c,56.02,43.98,78.96,21.04,1.34,224,0.875,bilinear
+tf_mobilenetv3_small_100,54.51,45.49,77.08,22.92,2.54,224,0.875,bilinear
+dla46x_c,53.08,46.92,76.84,23.16,1.08,224,0.875,bilinear
+dla46_c,52.2,47.8,75.68,24.32,1.31,224,0.875,bilinear
+tf_mobilenetv3_small_075,52.15,47.85,75.46,24.54,2.04,224,0.875,bilinear
+tf_mobilenetv3_small_minimal_100,49.53,50.47,73.05,26.95,2.04,224,0.875,bilinear
--- a/sotabench.py
+++ b/sotabench.py
@ -294,6 +294,17 @@ model_list = [
    _entry('res2next50', 'Res2NeXt-50', '1904.01169'),
    _entry('dla60_res2net', 'Res2Net-DLA-60', '1904.01169'),
    _entry('dla60_res2next', 'Res2NeXt-DLA-60', '1904.01169'),
+
+    ## HRNet official impl weights
+    _entry('hrnet_w18_small', 'HRNet-W18-C-Small-V1', '1908.07919'),
+    _entry('hrnet_w18_small_v2', 'HRNet-W18-C-Small-V2', '1908.07919'),
+    _entry('hrnet_w18', 'HRNet-W18-C', '1908.07919'),
+    _entry('hrnet_w30', 'HRNet-W30-C', '1908.07919'),
+    _entry('hrnet_w32', 'HRNet-W32-C', '1908.07919'),
+    _entry('hrnet_w40', 'HRNet-W40-C', '1908.07919'),
+    _entry('hrnet_w44', 'HRNet-W44-C', '1908.07919'),
+    _entry('hrnet_w48', 'HRNet-W48-C', '1908.07919'),
+    _entry('hrnet_w64', 'HRNet-W64-C', '1908.07919'),
 ]

 for m in model_list:
--- a/timm/models/densenet.py
+++ b/timm/models/densenet.py
@ -10,7 +10,7 @@ import torch.nn.functional as F

 from .registry import register_model
 from .helpers import load_pretrained
-from .adaptive_avgmax_pool import select_adaptive_pool2d
+from .adaptive_avgmax_pool import SelectAdaptivePool2d
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 import re

@ -88,8 +88,8 @@ class DenseNet(nn.Module):
    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
                 num_init_features=64, bn_size=4, drop_rate=0,
                 num_classes=1000, in_chans=3, global_pool='avg'):
-        self.global_pool = global_pool
        self.num_classes = num_classes
+        self.drop_rate = drop_rate
        super(DenseNet, self).__init__()

        # First convolution
@ -117,32 +117,31 @@ class DenseNet(nn.Module):
        self.features.add_module('norm5', nn.BatchNorm2d(num_features))

        # Linear layer
-        self.classifier = nn.Linear(num_features, num_classes)
-
        self.num_features = num_features
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)

    def get_classifier(self):
        return self.classifier

    def reset_classifier(self, num_classes, global_pool='avg'):
-        self.global_pool = global_pool
        self.num_classes = num_classes
-        del self.classifier
-        if num_classes:
-            self.classifier = nn.Linear(self.num_features, num_classes)
-        else:
-            self.classifier = None
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.classifier = nn.Linear(
+            self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        x = self.features(x)
        x = F.relu(x, inplace=True)
-        if pool:
-            x = select_adaptive_pool2d(x, self.global_pool)
-            x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
-        return self.classifier(self.forward_features(x, pool=True))
+        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
+        if self.drop_rate > 0.:
+            x = F.dropout(x, p=self.drop_rate, training=self.training)
+        x = self.classifier(x)
+        return x


 def _filter_pretrained(state_dict):
--- a/timm/models/dla.py
+++ b/timm/models/dla.py
@ -276,8 +276,7 @@ class DLA(nn.Module):

        self.num_features = channels[-1]
        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.fc = nn.Conv2d(self.num_features * self.global_pool.feat_mult(), num_classes,
-                            kernel_size=1, stride=1, padding=0, bias=True)
+        self.fc = nn.Conv2d(self.num_features * self.global_pool.feat_mult(), num_classes, 1, bias=True)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
@ -302,15 +301,14 @@ class DLA(nn.Module):
        return self.fc

    def reset_classifier(self, num_classes, global_pool='avg'):
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        self.num_classes = num_classes
-        del self.fc
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        if num_classes:
-            self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)
+            self.fc = nn.Conv2d(self.num_features * self.global_pool.feat_mult(), num_classes, 1, bias=True)
        else:
            self.fc = None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        x = self.base_layer(x)
        x = self.level0(x)
        x = self.level1(x)
@ -318,17 +316,15 @@ class DLA(nn.Module):
        x = self.level3(x)
        x = self.level4(x)
        x = self.level5(x)
-        if pool:
-            x = self.global_pool(x)
        return x

    def forward(self, x):
        x = self.forward_features(x)
+        x = self.global_pool(x)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        x = self.fc(x)
-        x = x.flatten(1)
-        return x
+        return x.flatten(1)


@register_model
--- a/timm/models/dpn.py
+++ b/timm/models/dpn.py
@ -16,7 +16,7 @@ from collections import OrderedDict

 from .registry import register_model
 from .helpers import load_pretrained
-from .adaptive_avgmax_pool import select_adaptive_pool2d
+from .adaptive_avgmax_pool import SelectAdaptivePool2d
 from timm.data import IMAGENET_DPN_MEAN, IMAGENET_DPN_STD


@ -160,7 +160,6 @@ class DPN(nn.Module):
        super(DPN, self).__init__()
        self.num_classes = num_classes
        self.drop_rate = drop_rate
-        self.global_pool = global_pool
        self.b = b
        bw_factor = 1 if small else 4

@ -218,32 +217,32 @@ class DPN(nn.Module):
        self.features = nn.Sequential(blocks)

        # Using 1x1 conv for the FC layer to allow the extra pooling scheme
-        self.classifier = nn.Conv2d(in_chs, num_classes, kernel_size=1, bias=True)
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.classifier = nn.Conv2d(
+            self.num_features * self.global_pool.feat_mult(), num_classes, kernel_size=1, bias=True)

    def get_classifier(self):
        return self.classifier

    def reset_classifier(self, num_classes, global_pool='avg'):
        self.num_classes = num_classes
-        self.global_pool = global_pool
-        del self.classifier
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        if num_classes:
-            self.classifier = nn.Conv2d(self.num_features, num_classes, kernel_size=1, bias=True)
+            self.classifier = nn.Conv2d(
+                self.num_features * self.global_pool.feat_mult(), num_classes, kernel_size=1, bias=True)
        else:
            self.classifier = None

-    def forward_features(self, x, pool=True):
-        x = self.features(x)
-        if pool:
-            x = select_adaptive_pool2d(x, pool_type=self.global_pool)
-        return x
+    def forward_features(self, x):
+        return self.features(x)

    def forward(self, x):
        x = self.forward_features(x)
+        x = self.global_pool(x)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        out = self.classifier(x)
-        return out.view(out.size(0), -1)
+        return out.flatten(1)


@register_model
--- a/timm/models/efficientnet.py
+++ b/timm/models/efficientnet.py
@ -211,8 +211,7 @@ class EfficientNet(nn.Module):
    def __init__(self, block_args, num_classes=1000, num_features=1280, in_chans=3, stem_size=32,
                 channel_multiplier=1.0, channel_divisor=8, channel_min=None,
                 pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0.,
-                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
-                 global_pool='avg', weight_init='goog'):
+                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'):
        super(EfficientNet, self).__init__()
        norm_kwargs = norm_kwargs or {}

@ -245,11 +244,7 @@ class EfficientNet(nn.Module):
        # Classifier
        self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), self.num_classes)

-        for m in self.modules():
-            if weight_init == 'goog':
-                efficientnet_init_goog(m)
-            else:
-                efficientnet_init_default(m)
+        efficientnet_init_weights(self)

    def as_sequential(self):
        layers = [self.conv_stem, self.bn1, self.act1]
@ -262,14 +257,10 @@ class EfficientNet(nn.Module):
        return self.classifier

    def reset_classifier(self, num_classes, global_pool='avg'):
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        self.num_classes = num_classes
-        del self.classifier
-        if num_classes:
-            self.classifier = nn.Linear(
-                self.num_features * self.global_pool.feat_mult(), num_classes)
-        else:
-            self.classifier = None
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.classifier = nn.Linear(
+            self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

    def forward_features(self, x):
        x = self.conv_stem(x)
@ -300,7 +291,7 @@ class EfficientNetFeatures(nn.Module):
    def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl',
                 in_chans=3, stem_size=32, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
                 output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0.,
-                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, weight_init='goog'):
+                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None):
        super(EfficientNetFeatures, self).__init__()
        norm_kwargs = norm_kwargs or {}

@ -326,12 +317,7 @@ class EfficientNetFeatures(nn.Module):
        self.feature_info = builder.features  # builder provides info about feature channels for each block
        self._in_chs = builder.in_chs

-        for m in self.modules():
-            if weight_init == 'goog':
-                efficientnet_init_goog(m)
-            else:
-                efficientnet_init_default(m)
-
+        efficientnet_init_weights(self)
        if _DEBUG:
            for k, v in self.feature_info.items():
                print('Feature idx: {}: Name: {}, Channels: {}'.format(k, v['name'], v['num_chs']))
--- a/timm/models/efficientnet_builder.py
+++ b/timm/models/efficientnet_builder.py
@ -358,9 +358,13 @@ class EfficientNetBuilder:
        return stages


-def efficientnet_init_goog(m, n=''):
-    # weight init as per Tensorflow Official impl
-    # https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
+def _init_weight_goog(m, n=''):
+    """ Weight initialization as per Tensorflow official implementations.
+
+    Handles layers in EfficientNet, EfficientNet-CondConv, MixNet, MnasNet, MobileNetV3, etc:
+    * https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
+    * https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
+    """
    if isinstance(m, CondConv2d):
        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        init_weight_fn = get_condconv_initializer(
@ -386,7 +390,8 @@ def efficientnet_init_goog(m, n=''):
        m.bias.data.zero_()


-def efficientnet_init_default(m, n=''):
+def _init_weight_default(m, n=''):
+    """ Basic ResNet (Kaiming) style weight init"""
    if isinstance(m, CondConv2d):
        init_fn = get_condconv_initializer(partial(
            nn.init.kaiming_normal_, mode='fan_out', nonlinearity='relu'), m.num_experts, m.weight_shape)
@ -400,3 +405,8 @@ def efficientnet_init_default(m, n=''):
        nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='linear')


+def efficientnet_init_weights(model: nn.Module, init_fn=None):
+    init_fn = init_fn or _init_weight_goog
+    for n, m in model.named_modules():
+        init_fn(m, n)
+
--- a/timm/models/gluon_xception.py
+++ b/timm/models/gluon_xception.py
@ -13,7 +13,7 @@ from collections import OrderedDict

 from .registry import register_model
 from .helpers import load_pretrained
-from .adaptive_avgmax_pool import select_adaptive_pool2d
+from .adaptive_avgmax_pool import SelectAdaptivePool2d
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD

 __all__ = ['Xception65', 'Xception71']
@ -185,7 +185,6 @@ class Xception65(nn.Module):
        super(Xception65, self).__init__()
        self.num_classes = num_classes
        self.drop_rate = drop_rate
-        self.global_pool = global_pool
        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
        if output_stride == 32:
            entry_block3_stride = 2
@ -249,21 +248,18 @@ class Xception65(nn.Module):
            1536, self.num_features, 3, stride=1, dilation=exit_block_dilations[1],
            norm_layer=norm_layer, norm_kwargs=norm_kwargs)
        self.bn5 = norm_layer(num_features=self.num_features, **norm_kwargs)
-        self.fc = nn.Linear(in_features=self.num_features, out_features=num_classes)
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)

    def get_classifier(self):
        return self.fc

    def reset_classifier(self, num_classes, global_pool='avg'):
        self.num_classes = num_classes
-        self.global_pool = global_pool
-        del self.fc
-        if num_classes:
-            self.fc = nn.Linear(self.num_features, num_classes)
-        else:
-            self.fc = None
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        # Entry flow
        x = self.conv1(x)
        x = self.bn1(x)
@ -299,14 +295,11 @@ class Xception65(nn.Module):
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu(x)
-
-        if pool:
-            x = select_adaptive_pool2d(x, pool_type=self.global_pool)
-            x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate:
            F.dropout(x, self.drop_rate, training=self.training)
        x = self.fc(x)
@ -322,7 +315,6 @@ class Xception71(nn.Module):
        super(Xception71, self).__init__()
        self.num_classes = num_classes
        self.drop_rate = drop_rate
-        self.global_pool = global_pool
        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
        if output_stride == 32:
            entry_block3_stride = 2
@ -393,21 +385,18 @@ class Xception71(nn.Module):
            1536, self.num_features, 3, stride=1, dilation=exit_block_dilations[1],
            norm_layer=norm_layer, norm_kwargs=norm_kwargs)
        self.bn5 = norm_layer(num_features=self.num_features, **norm_kwargs)
-        self.fc = nn.Linear(in_features=self.num_features, out_features=num_classes)
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)

    def get_classifier(self):
        return self.fc

    def reset_classifier(self, num_classes, global_pool='avg'):
        self.num_classes = num_classes
-        self.global_pool = global_pool
-        del self.fc
-        if num_classes:
-            self.fc = nn.Linear(self.num_features, num_classes)
-        else:
-            self.fc = None
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        # Entry flow
        x = self.conv1(x)
        x = self.bn1(x)
@ -443,14 +432,11 @@ class Xception71(nn.Module):
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu(x)
-
-        if pool:
-            x = select_adaptive_pool2d(x, pool_type=self.global_pool)
-            x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate:
            F.dropout(x, self.drop_rate, training=self.training)
        x = self.fc(x)
--- a/timm/models/hrnet.py
+++ b/timm/models/hrnet.py
--- a/timm/models/inception_resnet_v2.py
+++ b/timm/models/inception_resnet_v2.py
@ -8,7 +8,7 @@ import torch.nn.functional as F

 from .registry import register_model
 from .helpers import load_pretrained
-from .adaptive_avgmax_pool import select_adaptive_pool2d
+from .adaptive_avgmax_pool import SelectAdaptivePool2d
 from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD

 __all__ = ['InceptionResnetV2']
@ -226,7 +226,6 @@ class InceptionResnetV2(nn.Module):
    def __init__(self, num_classes=1001, in_chans=3, drop_rate=0., global_pool='avg'):
        super(InceptionResnetV2, self).__init__()
        self.drop_rate = drop_rate
-        self.global_pool = global_pool
        self.num_classes = num_classes
        self.num_features = 1536

@ -287,22 +286,20 @@ class InceptionResnetV2(nn.Module):
        )
        self.block8 = Block8(noReLU=True)
        self.conv2d_7b = BasicConv2d(2080, self.num_features, kernel_size=1, stride=1)
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        # NOTE some variants/checkpoints for this model may have 'last_linear' as the name for the FC
-        self.classif = nn.Linear(self.num_features, num_classes)
+        self.classif = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)

    def get_classifier(self):
        return self.classif

    def reset_classifier(self, num_classes, global_pool='avg'):
-        self.global_pool = global_pool
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        self.num_classes = num_classes
-        del self.classif
-        if num_classes:
-            self.classif = torch.nn.Linear(self.num_features, num_classes)
-        else:
-            self.classif = None
+        self.classif = nn.Linear(
+            self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        x = self.conv2d_1a(x)
        x = self.conv2d_2a(x)
        x = self.conv2d_2b(x)
@ -318,14 +315,11 @@ class InceptionResnetV2(nn.Module):
        x = self.repeat_2(x)
        x = self.block8(x)
        x = self.conv2d_7b(x)
-        if pool:
-            x = select_adaptive_pool2d(x, self.global_pool)
-            #x = F.avg_pool2d(x, 8, count_include_pad=False)
-            x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
-        x = self.forward_features(x, pool=True)
+        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate > 0:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        x = self.classif(x)
--- a/timm/models/inception_v4.py
+++ b/timm/models/inception_v4.py
@ -8,7 +8,7 @@ import torch.nn.functional as F

 from .registry import register_model
 from .helpers import load_pretrained
-from .adaptive_avgmax_pool import select_adaptive_pool2d
+from .adaptive_avgmax_pool import SelectAdaptivePool2d
 from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD

 __all__ = ['InceptionV4']
@ -244,7 +244,6 @@ class InceptionV4(nn.Module):
    def __init__(self, num_classes=1001, in_chans=3, drop_rate=0., global_pool='avg'):
        super(InceptionV4, self).__init__()
        self.drop_rate = drop_rate
-        self.global_pool = global_pool
        self.num_classes = num_classes
        self.num_features = 1536

@ -272,25 +271,24 @@ class InceptionV4(nn.Module):
            Inception_C(),
            Inception_C(),
        )
-        self.last_linear = nn.Linear(self.num_features, num_classes)
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.last_linear = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)

    def get_classifier(self):
-        return self.classif
+        return self.last_linear

    def reset_classifier(self, num_classes, global_pool='avg'):
-        self.global_pool = global_pool
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        self.num_classes = num_classes
-        self.classif = nn.Linear(self.num_features, num_classes)
+        self.last_linear = nn.Linear(
+            self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

-    def forward_features(self, x, pool=True):
-        x = self.features(x)
-        if pool:
-            x = select_adaptive_pool2d(x, self.global_pool)
-            x = x.view(x.size(0), -1)
-        return x
+    def forward_features(self, x):
+        return self.features(x)

    def forward(self, x):
        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate > 0:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        x = self.last_linear(x)
--- a/timm/models/mobilenetv3.py
+++ b/timm/models/mobilenetv3.py
@ -75,8 +75,7 @@ class MobileNetV3(nn.Module):

    def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True,
                 channel_multiplier=1.0, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0.,
-                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
-                 global_pool='avg', weight_init='goog'):
+                 se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'):
        super(MobileNetV3, self).__init__()
        
        self.num_classes = num_classes
@ -107,11 +106,7 @@ class MobileNetV3(nn.Module):
        # Classifier
        self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), self.num_classes)

-        for m in self.modules():
-            if weight_init == 'goog':
-                efficientnet_init_goog(m)
-            else:
-                efficientnet_init_default(m)
+        efficientnet_init_weights(self)

    def as_sequential(self):
        layers = [self.conv_stem, self.bn1, self.act1]
@ -126,12 +121,8 @@ class MobileNetV3(nn.Module):
    def reset_classifier(self, num_classes, global_pool='avg'):
        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        self.num_classes = num_classes
-        del self.classifier
-        if num_classes:
-            self.classifier = nn.Linear(
-                self.num_features * self.global_pool.feat_mult(), num_classes)
-        else:
-            self.classifier = None
+        self.classifier = nn.Linear(
+            self.num_features * self.global_pool.feat_mult(), num_classes) if self.num_classes else None

    def forward_features(self, x):
        x = self.conv_stem(x)
@ -161,7 +152,7 @@ class MobileNetV3Features(nn.Module):
    def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl',
                 in_chans=3, stem_size=16, channel_multiplier=1.0, output_stride=32, pad_type='',
                 act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0., se_kwargs=None,
-                 norm_layer=nn.BatchNorm2d, norm_kwargs=None, weight_init='goog'):
+                 norm_layer=nn.BatchNorm2d, norm_kwargs=None):
        super(MobileNetV3Features, self).__init__()
        norm_kwargs = norm_kwargs or {}

@ -187,12 +178,7 @@ class MobileNetV3Features(nn.Module):
        self.feature_info = builder.features  # builder provides info about feature channels for each block
        self._in_chs = builder.in_chs

-        for m in self.modules():
-            if weight_init == 'goog':
-                efficientnet_init_goog(m)
-            else:
-                efficientnet_init_default(m)
-
+        efficientnet_init_weights(self)
        if _DEBUG:
            for k, v in self.feature_info.items():
                print('Feature idx: {}: Name: {}, Channels: {}'.format(k, v['name'], v['num_chs']))
--- a/timm/models/nasnet.py
+++ b/timm/models/nasnet.py
@ -556,8 +556,18 @@ class NASNetALarge(nn.Module):
        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        self.last_linear = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)

-    def forward_features(self, input, pool=True):
-        x_conv0 = self.conv0(input)
+    def get_classifier(self):
+        return self.last_linear
+
+    def reset_classifier(self, num_classes, global_pool='avg'):
+        self.num_classes = num_classes
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        del self.last_linear
+        self.last_linear = nn.Linear(
+            self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None
+
+    def forward_features(self, x):
+        x_conv0 = self.conv0(x)
        x_stem_0 = self.cell_stem_0(x_conv0)
        x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)

@ -586,13 +596,11 @@ class NASNetALarge(nn.Module):
        x_cell_16 = self.cell_16(x_cell_15, x_cell_14)
        x_cell_17 = self.cell_17(x_cell_16, x_cell_15)
        x = self.relu(x_cell_17)
-        if pool:
-            x = self.global_pool(x)
-            x = x.view(x.size(0), -1)
        return x

-    def forward(self, input):
-        x = self.forward_features(input)
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate > 0:
            x = F.dropout(x, self.drop_rate, training=self.training)
        x = self.last_linear(x)
--- a/timm/models/pnasnet.py
+++ b/timm/models/pnasnet.py
@ -355,7 +355,7 @@ class PNASNet5Large(nn.Module):
        else:
            self.last_linear = None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        x_conv_0 = self.conv_0(x)
        x_stem_0 = self.cell_stem_0(x_conv_0)
        x_stem_1 = self.cell_stem_1(x_conv_0, x_stem_0)
@ -372,13 +372,11 @@ class PNASNet5Large(nn.Module):
        x_cell_10 = self.cell_10(x_cell_8, x_cell_9)
        x_cell_11 = self.cell_11(x_cell_9, x_cell_10)
        x = self.relu(x_cell_11)
-        if pool:
-            x = self.global_pool(x)
-            x = x.view(x.size(0), -1)
        return x

-    def forward(self, input):
-        x = self.forward_features(input)
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate > 0:
            x = F.dropout(x, self.drop_rate, training=self.training)
        x = self.last_linear(x)
--- a/timm/models/resnet.py
+++ b/timm/models/resnet.py
@ -17,7 +17,7 @@ from .adaptive_avgmax_pool import SelectAdaptivePool2d
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD


-__all__ = ['ResNet']  # model_registry will add each entrypoint fn to this
+__all__ = ['ResNet', 'BasicBlock', 'Bottleneck']  # model_registry will add each entrypoint fn to this


 def _cfg(url='', **kwargs):
@ -374,12 +374,9 @@ class ResNet(nn.Module):
        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        self.num_classes = num_classes
        del self.fc
-        if num_classes:
-            self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)
-        else:
-            self.fc = None
+        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
@ -389,14 +386,11 @@ class ResNet(nn.Module):
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
-
-        if pool:
-            x = self.global_pool(x)
-            x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        x = self.fc(x)
--- a/timm/models/senet.py
+++ b/timm/models/senet.py
@ -274,6 +274,7 @@ class SENet(nn.Module):
        super(SENet, self).__init__()
        self.inplanes = inplanes
        self.num_classes = num_classes
+        self.drop_rate = drop_rate
        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(in_chans, 64, 3, stride=2, padding=1, bias=False)),
@ -337,7 +338,6 @@ class SENet(nn.Module):
            downsample_padding=downsample_padding
        )
        self.avg_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.drop_rate = drop_rate
        self.num_features = 512 * block.expansion
        self.last_linear = nn.Linear(self.num_features, num_classes)

@ -366,26 +366,25 @@ class SENet(nn.Module):
    def get_classifier(self):
        return self.last_linear

-    def reset_classifier(self, num_classes):
+    def reset_classifier(self, num_classes, global_pool='avg'):
        self.num_classes = num_classes
+        self.avg_pool = SelectAdaptivePool2d(pool_type=global_pool)
        del self.last_linear
        if num_classes:
-            self.last_linear = nn.Linear(self.num_features, num_classes)
+            self.last_linear = nn.Linear(self.num_features * self.avg_pool.feat_mult(), num_classes)
        else:
            self.last_linear = None

-    def forward_features(self, x, pool=True):
+    def forward_features(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
-        if pool:
-            x = self.avg_pool(x)
-            x = x.view(x.size(0), -1)
        return x

    def logits(self, x):
+        x = self.avg_pool(x).flatten(1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        x = self.last_linear(x)
--- a/timm/models/test_time_pool.py
+++ b/timm/models/test_time_pool.py
@ -20,7 +20,7 @@ class TestTimePoolHead(nn.Module):
        self.base.reset_classifier(0)  # delete original fc layer

    def forward(self, x):
-        x = self.base.forward_features(x, pool=False)
+        x = self.base.forward_features(x)
        x = F.avg_pool2d(x, kernel_size=self.original_pool, stride=1)
        x = self.fc(x)
        x = adaptive_avgmax_pool2d(x, 1)
--- a/timm/models/xception.py
+++ b/timm/models/xception.py
@ -29,7 +29,7 @@ import torch.nn.functional as F

 from .registry import register_model
 from .helpers import load_pretrained
-from .adaptive_avgmax_pool import select_adaptive_pool2d
+from .adaptive_avgmax_pool import SelectAdaptivePool2d

 __all__ = ['Xception']

@ -163,7 +163,8 @@ class Xception(nn.Module):
        self.conv4 = SeparableConv2d(1536, self.num_features, 3, 1, 1)
        self.bn4 = nn.BatchNorm2d(self.num_features)

-        self.fc = nn.Linear(self.num_features, num_classes)
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
+        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)

        # #------- init weights --------
        for m in self.modules():
@ -178,15 +179,12 @@ class Xception(nn.Module):

    def reset_classifier(self, num_classes, global_pool='avg'):
        self.num_classes = num_classes
-        self.global_pool = global_pool
+        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
        del self.fc
-        if num_classes:
-            self.fc = nn.Linear(self.num_features, num_classes)
-        else:
-            self.fc = None
+        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None

-    def forward_features(self, input, pool=True):
-        x = self.conv1(input)
+    def forward_features(self, x):
+        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

@ -214,14 +212,11 @@ class Xception(nn.Module):
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)
-
-        if pool:
-            x = select_adaptive_pool2d(x, pool_type=self.global_pool)
-            x = x.view(x.size(0), -1)
        return x

-    def forward(self, input):
-        x = self.forward_features(input)
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.global_pool(x).flatten(1)
        if self.drop_rate:
            F.dropout(x, self.drop_rate, training=self.training)
        x = self.fc(x)