diff --git a/tests/test_models.py b/tests/test_models.py index b6a61727..d0d15951 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -129,7 +129,7 @@ def test_model_backward(model_name, batch_size): @pytest.mark.timeout(300) -@pytest.mark.parametrize('model_name', list_models(exclude_filters=NON_STD_FILTERS)) +@pytest.mark.parametrize('model_name', list_models(exclude_filters=NON_STD_FILTERS, include_tags=True)) @pytest.mark.parametrize('batch_size', [1]) def test_model_default_cfgs(model_name, batch_size): """Run a single forward pass with each model""" @@ -191,7 +191,7 @@ def test_model_default_cfgs(model_name, batch_size): @pytest.mark.timeout(300) -@pytest.mark.parametrize('model_name', list_models(filter=NON_STD_FILTERS, exclude_filters=NON_STD_EXCLUDE_FILTERS)) +@pytest.mark.parametrize('model_name', list_models(filter=NON_STD_FILTERS, exclude_filters=NON_STD_EXCLUDE_FILTERS, include_tags=True)) @pytest.mark.parametrize('batch_size', [1]) def test_model_default_cfgs_non_std(model_name, batch_size): """Run a single forward pass with each model""" @@ -304,7 +304,7 @@ if 'GITHUB_ACTIONS' in os.environ: # and 'Linux' in platform.system(): @pytest.mark.timeout(120) -@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FEAT_FILTERS)) +@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FEAT_FILTERS, include_tags=True)) @pytest.mark.parametrize('batch_size', [1]) def test_model_forward_features(model_name, batch_size): """Run a single forward pass with each model in feature extraction mode""" diff --git a/timm/models/convnext.py b/timm/models/convnext.py index 69a53c51..e9214429 100644 --- a/timm/models/convnext.py +++ b/timm/models/convnext.py @@ -2,7 +2,20 @@ Papers: * `A ConvNet for the 2020s` - https://arxiv.org/pdf/2201.03545.pdf +@Article{liu2022convnet, + author = {Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie}, + title = {A ConvNet for the 2020s}, + journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2022}, +} + * `ConvNeXt-V2 - Co-designing and Scaling ConvNets with Masked Autoencoders` - https://arxiv.org/abs/2301.00808 +@article{Woo2023ConvNeXtV2, + title={ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders}, + author={Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon and Saining Xie}, + year={2023}, + journal={arXiv preprint arXiv:2301.00808}, +} Original code and weights from: * https://github.com/facebookresearch/ConvNeXt, original copyright below @@ -401,6 +414,20 @@ def _cfg(url='', **kwargs): } +def _cfgv2(url='', **kwargs): + return { + 'url': url, + 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7), + 'crop_pct': 0.875, 'interpolation': 'bicubic', + 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, + 'first_conv': 'stem.0', 'classifier': 'head.fc', + 'license': 'cc-by-nc-4.0', 'paper_ids': 'arXiv:2301.00808', + 'paper_name': 'ConvNeXt-V2: Co-designing and Scaling ConvNets with Masked Autoencoders', + 'origin_url': 'https://github.com/facebookresearch/ConvNeXt-V2', + **kwargs + } + + default_cfgs = generate_default_cfgs({ # timm specific variants 'convnext_atto.d2_in1k': _cfg( @@ -529,146 +556,112 @@ default_cfgs = generate_default_cfgs({ hf_hub_id='timm/', num_classes=21841), - 'convnextv2_nano.fcmae_ft_in22k_in1k': _cfg( + 'convnextv2_nano.fcmae_ft_in22k_in1k': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_nano_22k_224_ema.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_nano.fcmae_ft_in22k_in1k_384': _cfg( + 'convnextv2_nano.fcmae_ft_in22k_in1k_384': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_nano_22k_384_ema.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', - crop_pct=0.95, input_size=(3, 384, 384), test_input_size=(3, 416, 416), test_crop_pct=1.0, crop_mode='squash'), - 'convnextv2_tiny.fcmae_ft_in22k_in1k': _cfg( + hf_hub_id='timm/', + input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'), + 'convnextv2_tiny.fcmae_ft_in22k_in1k': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_tiny_22k_224_ema.pt", - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_tiny.fcmae_ft_in22k_in1k_384': _cfg( + 'convnextv2_tiny.fcmae_ft_in22k_in1k_384': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_tiny_22k_384_ema.pt", - # hf_hub_id='timm/', - license='cc-by-nc-4.0', - crop_pct=0.95, input_size=(3, 384, 384), test_input_size=(3, 416, 416), test_crop_pct=1.0, crop_mode='squash'), - 'convnextv2_base.fcmae_ft_in22k_in1k': _cfg( + hf_hub_id='timm/', + input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'), + 'convnextv2_base.fcmae_ft_in22k_in1k': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_base_22k_224_ema.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_base.fcmae_ft_in22k_in1k_384': _cfg( + 'convnextv2_base.fcmae_ft_in22k_in1k_384': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_base_22k_384_ema.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', - crop_pct=0.95, input_size=(3, 384, 384), test_input_size=(3, 416, 416), test_crop_pct=1.0, crop_mode='squash'), - 'convnextv2_large.fcmae_ft_in22k_in1k': _cfg( + hf_hub_id='timm/', + input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'), + 'convnextv2_large.fcmae_ft_in22k_in1k': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_large_22k_224_ema.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_large.fcmae_ft_in22k_in1k_384': _cfg( + 'convnextv2_large.fcmae_ft_in22k_in1k_384': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_large_22k_384_ema.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', - crop_pct=0.95, input_size=(3, 384, 384), test_input_size=(3, 416, 416), test_crop_pct=1.0, crop_mode='squash'), - 'convnextv2_huge.fcmae_ft_in22k_in1k_384': _cfg( + hf_hub_id='timm/', + input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'), + 'convnextv2_huge.fcmae_ft_in22k_in1k_384': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_huge_22k_384_ema.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', - crop_pct=0.95, input_size=(3, 384, 384), test_input_size=(3, 416, 416), test_crop_pct=1.0, crop_mode='squash'), - 'convnextv2_huge.fcmae_ft_in22k_in1k_512': _cfg( + hf_hub_id='timm/', + input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0, crop_mode='squash'), + 'convnextv2_huge.fcmae_ft_in22k_in1k_512': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_huge_22k_512_ema.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', - crop_pct=0.95, input_size=(3, 512, 512), test_input_size=(3, 576, 576), test_crop_pct=1.0, crop_mode='squash'), + hf_hub_id='timm/', + input_size=(3, 512, 512), pool_size=(15, 15), crop_pct=1.0, crop_mode='squash'), - 'convnextv2_atto.fcmae_ft_in1k': _cfg( + 'convnextv2_atto.fcmae_ft_in1k': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_atto_1k_224_ema.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=0.95), - 'convnextv2_femto.fcmae_ft_in1k': _cfg( + 'convnextv2_femto.fcmae_ft_in1k': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_femto_1k_224_ema.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=0.95), - 'convnextv2_pico.fcmae_ft_in1k': _cfg( + 'convnextv2_pico.fcmae_ft_in1k': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_pico_1k_224_ema.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=0.95), - 'convnextv2_nano.fcmae_ft_in1k': _cfg( + 'convnextv2_nano.fcmae_ft_in1k': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_nano_1k_224_ema.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_tiny.fcmae_ft_in1k': _cfg( + 'convnextv2_tiny.fcmae_ft_in1k': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_tiny_1k_224_ema.pt", - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_base.fcmae_ft_in1k': _cfg( + 'convnextv2_base.fcmae_ft_in1k': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_base_1k_224_ema.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_large.fcmae_ft_in1k': _cfg( + 'convnextv2_large.fcmae_ft_in1k': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_large_1k_224_ema.pt", - #hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_huge.fcmae_ft_in1k': _cfg( + 'convnextv2_huge.fcmae_ft_in1k': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_huge_1k_224_ema.pt", - # hf_hub_id='timm/', - license='cc-by-nc-4.0', + hf_hub_id='timm/', test_input_size=(3, 288, 288), test_crop_pct=1.0), - 'convnextv2_atto.fcmae': _cfg( + 'convnextv2_atto.fcmae': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_atto_1k_224_fcmae.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', - num_classes=0, - ), - 'convnextv2_femto.fcmae': _cfg( + hf_hub_id='timm/', + num_classes=0), + 'convnextv2_femto.fcmae': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_femto_1k_224_fcmae.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', - num_classes=0, - ), - 'convnextv2_pico.fcmae': _cfg( + hf_hub_id='timm/', + num_classes=0), + 'convnextv2_pico.fcmae': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_pico_1k_224_fcmae.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', - num_classes=0, - ), - 'convnextv2_nano.fcmae': _cfg( + hf_hub_id='timm/', + num_classes=0), + 'convnextv2_nano.fcmae': _cfgv2( url='https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_nano_1k_224_fcmae.pt', - #hf_hub_id='timm/', - license='cc-by-nc-4.0', - num_classes=0, - ), - 'convnextv2_tiny.fcmae': _cfg( + hf_hub_id='timm/', + num_classes=0), + 'convnextv2_tiny.fcmae': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_tiny_1k_224_fcmae.pt", - #hf_hub_id='timm/', - license='cc-by-nc-4.0', - num_classes=0, - ), - 'convnextv2_base.fcmae': _cfg( + hf_hub_id='timm/', + num_classes=0), + 'convnextv2_base.fcmae': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_base_1k_224_fcmae.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', - num_classes=0, - ), - 'convnextv2_large.fcmae': _cfg( + hf_hub_id='timm/', + num_classes=0), + 'convnextv2_large.fcmae': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_large_1k_224_fcmae.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', - num_classes=0, - ), - 'convnextv2_huge.fcmae': _cfg( + hf_hub_id='timm/', + num_classes=0), + 'convnextv2_huge.fcmae': _cfgv2( url="https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_huge_1k_224_fcmae.pt", - #hf_hub_id='timm/' - license='cc-by-nc-4.0', - num_classes=0, - ), + hf_hub_id='timm/', + num_classes=0), 'convnextv2_small.untrained': _cfg(), })