Compare commits

...

147 Commits

Author SHA1 Message Date
Benjamin Bossan a5b01ec04e Add type annotations to _registry.py
1 year ago
Benjamin Bossan c9406ce608
Some additions to the CONTRIBUTING guide (#1685)
1 year ago
Ross Wightman a32c4eff69
Create CONTRIBUTING.md
1 year ago
Ross Wightman a0772f03e0
Update README.md
1 year ago
Ross Wightman 47f1de9bec Version bump
1 year ago
Ross Wightman 11f7b589e5 Update setup.py for huggingface changes.
1 year ago
Ross Wightman 4d9c3ae2fb Add laion2b 320x320 ConvNeXt-Large CLIP weights
1 year ago
Ross Wightman d0b45c9b4d Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent).
1 year ago
Ross Wightman 7d9e321b76 Improve tracing of window attn models with simpler reshape logic
1 year ago
Ross Wightman a3c6685e20
Delete requirements-modelindex.txt
1 year ago
Ross Wightman 022403ce0a Update README
1 year ago
Ross Wightman 2e38d53dca Remove dead line
1 year ago
Ross Wightman f77c04ff36 Torchscript fixes/hacks for rms_norm, refactor ParallelScalingBlock with manual combination of input projections, closer paper match
1 year ago
Ross Wightman 122621daef Add Final annotation to attn_fas to avoid symbol lookup of new scaled_dot_product_attn fn on old PyTorch in jit
1 year ago
Ross Wightman 621e1b2182 Add ideas from 'Scaling ViT to 22-B Params', testing PyTorch 2.0 fused F.scaled_dot_product_attention impl in vit, vit_relpos, maxxvit / coatnet.
1 year ago
Ross Wightman a3d528524a Version 0.8.12dev0
1 year ago
testbot a09d403c24 changed warning to info
1 year ago
testbot 8470e29541 Add support to load safetensors weights
1 year ago
Ross Wightman f35d6ea57b Add multi-tensor (foreach) version of Lion in style of upcoming PyTorch 2.0 optimizers
1 year ago
Ross Wightman 709d5e0d9d Add Lion optimizer
1 year ago
Ross Wightman 624266148d Remove unused imports from _hub helpers
1 year ago
Ross Wightman 2cfff0581b Add grad_checkpointing support to features_only, test in EfficientDet.
1 year ago
Ross Wightman 45af496197 Version 0.8.11dev0
1 year ago
Ross Wightman 9c14654a0d Improve support for custom dataset label name/description through HF hub export, via pretrained_cfg
1 year ago
Ross Wightman 1e0b347227 Fix README
1 year ago
Ross Wightman 497be8343c Update README and version
1 year ago
Ross Wightman 0d33127df2 Add 384x384 convnext_large_mlp laion2b fine-tune on in1k
1 year ago
Ross Wightman 88a5b8491d
Merge pull request #1662 from rwightman/dataset_info
1 year ago
Ross Wightman 7a0bd095cb Update model prune loader to use pkgutil
1 year ago
Ross Wightman 0f2803de7a Move ImageNet metadata (aka info) files to timm/data/_info. Add helper classes to make info available for labelling. Update inference.py for first use.
1 year ago
Ross Wightman 89b0452171 Add PyTorch 1.13 inference benchmark numbers
1 year ago
Ross Wightman 7a13be67a5
Update version.py
1 year ago
Ross Wightman 4b383e8ffe
Merge pull request #1655 from rwightman/levit_efficientformer_redux
1 year ago
Ross Wightman 13acac8c5e Update head metadata for effformerv2
1 year ago
Ross Wightman 8682528096 Add first conv metadata for efficientformer_v2
1 year ago
Ross Wightman 72fba669a8 is_scripting() guard on checkpoint_seq
1 year ago
Ross Wightman 95ec255f7f Finish timm mode api for efficientformer_v2, add grad checkpointing support to both efficientformers
1 year ago
Ross Wightman 9d03c6f526 Merge remote-tracking branch 'origin/main' into levit_efficientformer_redux
1 year ago
Ross Wightman 086bd55a94 Add EfficientFormer-V2, refactor EfficientFormer and Levit for more uniformity across the 3 related arch. Add features_out support to levit conv models and efficientformer_v2. All weights on hub.
1 year ago
Ross Wightman 2cb2699dc8 Apply fix from #1649 to main
1 year ago
Ross Wightman e0a5911072
Merge pull request #1645 from rwightman/norm_mlp_classifier
1 year ago
Ross Wightman b3042081b4 Add laion -> in1k fine-tuned base and large_mlp weights for convnext
1 year ago
Ross Wightman 316bdf8955 Add mlp head support for convnext_large, add laion2b CLIP weights, prep fine-tuned weight tags
1 year ago
Ross Wightman 6f28b562c6 Factor NormMlpClassifierHead from MaxxViT and use across MaxxViT / ConvNeXt / DaViT, refactor some type hints & comments
1 year ago
Ross Wightman 29fda20e6d Merge branch 'fffffgggg54-main'
1 year ago
Ross Wightman 9a53c3f727 Finalize DaViT, some formatting and modelling simplifications (separate PatchEmbed to Stem + Downsample, weights on HF hub.
1 year ago
Fredo Guan fb717056da Merge remote-tracking branch 'upstream/main'
1 year ago
Ross Wightman 2bbc26dd82 version 0.8.8dev0
1 year ago
Ross Wightman 64667bfa0e Add 'gigantic' vit clip variant for feature extraction and future fine-tuning
1 year ago
Ross Wightman 3aa31f537d
Merge pull request #1641 from rwightman/maxxvit_hub
1 year ago
Ross Wightman 9983ed7721 xlarge maxvit killing the tests
1 year ago
Ross Wightman c2822568ec Update version to 0.8.7dev0
1 year ago
Ross Wightman 0417a9dd81 Update README
1 year ago
Ross Wightman 36989cfae4 Factor out readme generation in hub helper, add more readme fields
1 year ago
Ross Wightman 32f252381d Change order of checkpoitn filtering fn application in builder, try dict, model variant first
1 year ago
Ross Wightman e9f1376cde Cleanup resolve data config fns, add 'model' variant that takes model as first arg, make 'args' arg optional in original fn
1 year ago
Ross Wightman bed350f5e5 Push all MaxxViT weights to HF hub, cleanup impl, add feature map extraction support and prompote to 'std' architecture. Fix norm head for proper embedding / feat map output. Add new in12k + ft 1k weights.
1 year ago
Ross Wightman ca38e1e73f Update ClassifierHead module, add reset() method, update in_chs -> in_features for consistency
1 year ago
Ross Wightman 8ab573cd26 Add convnext_tiny and convnext_small 384x384 fine-tunes of in12k weights, fix pool size for laion CLIP convnext weights
1 year ago
Fredo Guan e58a884c1c Merge remote-tracking branch 'upstream/main'
1 year ago
Fredo Guan 81ca323751
Davit update formatting and fix grad checkpointing (#7)
1 year ago
Ross Wightman e9aac412de Correct mean/std for CLIP convnexts
1 year ago
Ross Wightman 42bd8f7bcb Add convnext_base CLIP image tower weights for fine-tuning / features
1 year ago
Ross Wightman 65aea97067
Update tests.yml
1 year ago
Ross Wightman dd60c45044
Merge pull request #1633 from rwightman/freeze_norm_revisit
1 year ago
Ross Wightman e520553e3e Update batchnorm freezing to handle NormAct variants, Add GroupNorm1Act, update BatchNormAct2d tracing change from PyTorch
1 year ago
Ross Wightman a2c14c2064 Add tiny/small in12k pretrained and fine-tuned ConvNeXt models
1 year ago
Ross Wightman 01aea8c1bf Version 0.8.6dev0
1 year ago
Ross Wightman 2e83bba142 Revert head norm changes to ConvNeXt as it broke some downstream use, alternate workaround for fcmae weights
1 year ago
Ikko Eltociear Ashimine 2c24cb98f1 Fix typo in results/README.md
1 year ago
Ross Wightman 1825b5e314 maxxvit type
1 year ago
Ross Wightman 5078b28f8a More kwarg handling tweaks, maxvit_base_rw def added
1 year ago
Ross Wightman c0d7388a1b Improving kwarg merging in more models
1 year ago
Ross Wightman 94a91598c3
Update README.md
1 year ago
Ross Wightman d2ef5a3a94
Update README.md
1 year ago
Ross Wightman ae9153052f
Update version.py
1 year ago
Ross Wightman 60ebb6cefa Re-order vit pretrained entries for more sensible default weights (no .tag specified)
1 year ago
Ross Wightman e861b74cf8 Pass through --model-kwargs (and --opt-kwargs for train) from command line through to model __init__. Update some models to improve arg overlay. Cleanup along the way.
1 year ago
Ross Wightman add3fb864e Working on improved model card template for push_to_hf_hub
1 year ago
Xa9aX ツ 13c7183c52 Update installation.mdx
1 year ago
Ross Wightman eb83eb3bd1 Rotate changelogs, add redirects to mkdocs -> equivalent HF docs pages
1 year ago
Ross Wightman dd0bb327e9
Update version.py
1 year ago
Ross Wightman 6e5553da5f
Add ConvNeXt-V2 support (model additions and weights) (#1614)
1 year ago
nateraw 3698e79ac5 🐛 fix github source links in hf docs
1 year ago
Nathan Raw 9f5bba9ef9
Structure Hugging Face Docs (#1575)
1 year ago
Ross Wightman 960f5f92e6 Update results csv with latest val/test set runs
1 year ago
Ross Wightman 6902c48a5f Fix ResNet based models to work w/ norm layers w/o affine params. Reformat long arg lists into vertical form.
1 year ago
Ross Wightman d5aa17e415 Remove print from auto_augment
1 year ago
Ross Wightman 7c846d9970 Better vmap compat across recent torch versions
1 year ago
Ross Wightman 130458988a
Update README.md
1 year ago
Ross Wightman d96538f1d2 Update README
1 year ago
Ross Wightman 4e24f75289
Merge pull request #1593 from rwightman/multi-weight_effnet_convnext
1 year ago
Ross Wightman 8ece53e194 Switch BEiT to HF hub weights
1 year ago
Ross Wightman d1bfa9a000 Support HF datasets and TFSD w/ a sub-path by fixing split, fix #1598 ... add class mapping support to HF datasets in case class label isn't in info.
1 year ago
Ross Wightman 35fb00c779 Add flexivit to non-std tests list
1 year ago
Ross Wightman e2fc43bc63 Version 0.8.2dev0
1 year ago
Ross Wightman 9a51e4ea2e Add FlexiViT models and weights, refactoring, push more weights
1 year ago
Fredo Guan 10b3f696b4
Davit std (#6)
1 year ago
Ross Wightman 656e1776de Convert mobilenetv3 to multi-weight, tweak PretrainedCfg metadata
1 year ago
Fredo Guan 546590c5f5
Merge branch 'rwightman:main' into main
1 year ago
Ross Wightman 18ec173f95
Merge pull request #1592 from twmht/add_adan_to_init
1 year ago
Ross Wightman 6a01101905 Update efficientnet.py and convnext.py to multi-weight, add ImageNet-12k pretrained EfficientNet-B5 and ConvNeXt-Nano.
1 year ago
alec.tu 74d6afb4cd Add Adan to __init__.py
1 year ago
Fredo Guan 84178fca60
Merge branch 'rwightman:main' into main
1 year ago
Fredo Guan c43340ddd4
Davit std (#5)
1 year ago
Ross Wightman e7da205345 Fix aa min_max level clamp
1 year ago
Ross Wightman e3b2f5be0a Add 3-Augment support to auto_augment.py, clean up weighted choice handling, and allow adjust per op prob via arg string
1 year ago
Ross Wightman e98c93264c
Merge pull request #1581 from rwightman/refactor-imports
1 year ago
Ross Wightman d5e7d6b27e Merge remote-tracking branch 'origin/main' into refactor-imports
1 year ago
Ross Wightman cda39b35bd Add a deprecation phase to module re-org
1 year ago
Ross Wightman 0fe90449e5
Update README.md
1 year ago
Ross Wightman 1733177c75
Update README.md
1 year ago
Ross Wightman 9e47d8ad59
Update README.md
1 year ago
Ross Wightman f266f841a0
Merge pull request #1586 from lorenzbaraldi/eval_loss
1 year ago
Lorenzo Baraldi 3d6bc42aa1 Put validation loss under amp_autocast
1 year ago
Fredo Guan edea013dd1
Davit std (#3)
1 year ago
Ross Wightman 7c4ed4d5a4 Add EVA-large models
1 year ago
Fredo Guan 434a03937d
Merge branch 'rwightman:main' into main
1 year ago
Ross Wightman 6a92587e0d
Update README.md
1 year ago
Ross Wightman 98047ef5e3 Add EVA FT results, hopefully fix BEiT test failures
1 year ago
Ross Wightman 3cc4d7a894 Fix missing register for 224 eva model
1 year ago
Ross Wightman eba07b0de7 Add eva models to beit.py
1 year ago
Fredo Guan 3bd96609c8
Davit (#1)
1 year ago
Ross Wightman 927f031293 Major module / path restructure, timm.models.layers -> timm.layers, add _ prefix to all non model modules in timm.models
1 year ago
Ross Wightman da6644b6ba Update README.md
1 year ago
Ross Wightman 61531f091e Update multi-weight release README
1 year ago
Ross Wightman 3785c234d7 Remove clip vit models that won't be ft and comment two that aren't uploaded yet
1 year ago
Ross Wightman f82239b30e multi-weight branch version -> 0.8.0dev
1 year ago
Ross Wightman 755570e2d6 Rename _pretrained.py -> pretrained.py, not feasible to change the other files to same scheme without breaking uses
1 year ago
Ross Wightman 72cfa57761 Add ported Tensorflow MaxVit weights. Add a few more CLIP ViT fine-tunes. Tweak some model tag names. Improve model tag name sorting. Update HF hub push config layout.
1 year ago
Ross Wightman dbe7531aa3 Update scripts to support torch.compile(). Make --results_file arg more consistent across benchmark/validate/inference. Fix #1570
1 year ago
Ross Wightman 05637a4bb0 More inference script changes, arg naming, multiple output fmts at once
1 year ago
Ross Wightman eceeb9409a Significant ugprade to inference.py, support for different formats, formatting, etc.
1 year ago
Ross Wightman 4d5c395160 MaxVit, ViT, ConvNeXt, and EfficientNet-v2 updates
1 year ago
Ross Wightman 3db4e346e0 Switch TFDS dataset to use INTEGER_ACCURATE jpeg decode by default
1 year ago
Ross Wightman 9da7e3a799 Add crop_mode for pretraind config / image transforms. Add support for dynamo compilation to benchmark/train/validate
1 year ago
Ross Wightman 8fca002c06 Add ImageNet22k and 12k subset sysnet/index maps
1 year ago
Ross Wightman b2b6285af7 Add two more FT clip weights
1 year ago
Ross Wightman 5895056dc4 Add openai b32 ft
1 year ago
Ross Wightman 9dea5143d5 Adding more clip ft variants
1 year ago
Ross Wightman 444dcba4ad CLIP B16 12k weights added
1 year ago
Ross Wightman dff4717cbf Add clip b16 384x384 finetunes
1 year ago
Ross Wightman 883fa2eeaa Add fine-tuned B/16 224x224 in1k clip models
1 year ago
Ross Wightman 9a3d2ac2d5 Add latest CLIP ViT fine-tune pretrained configs / model entrypt updates
1 year ago
Ross Wightman 42bbbddee9 Add missing model config
1 year ago
Ross Wightman def68befa7 Updating vit model defs for mult-weight support trial (vit first). Prepping for CLIP (laion2b and openai) fine-tuned weights.
1 year ago
Ross Wightman 0dadb4a6e9 Initial multi-weight support, handled so old pretraing config handling co-exists with new tags.
1 year ago

@ -16,5 +16,6 @@ jobs:
package_name: timm
repo_owner: rwightman
path_to_docs: pytorch-image-models/hfdocs/source
version_tag_suffix: ""
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}

@ -17,3 +17,4 @@ jobs:
package_name: timm
repo_owner: rwightman
path_to_docs: pytorch-image-models/hfdocs/source
version_tag_suffix: ""

@ -40,9 +40,10 @@ jobs:
- name: Install torch on ubuntu
if: startsWith(matrix.os, 'ubuntu')
run: |
pip install --no-cache-dir torch==${{ matrix.torch }}+cpu torchvision==${{ matrix.torchvision }}+cpu -f https://download.pytorch.org/whl/torch_stable.html
sudo sed -i 's/azure\.//' /etc/apt/sources.list
sudo apt update
sudo apt install -y google-perftools
pip install --no-cache-dir torch==${{ matrix.torch }}+cpu torchvision==${{ matrix.torchvision }}+cpu -f https://download.pytorch.org/whl/torch_stable.html
- name: Install requirements
run: |
pip install -r requirements.txt

10
.gitignore vendored

@ -106,6 +106,16 @@ output/
*.tar
*.pth
*.pt
*.torch
*.gz
Untitled.ipynb
Testing notebook.ipynb
# Root dir exclusions
/*.csv
/*.yaml
/*.json
/*.jpg
/*.png
/*.zip
/*.tar.*

@ -0,0 +1,112 @@
*This guideline is very much a work-in-progress.*
Contriubtions to `timm` for code, documentation, tests are more than welcome!
There haven't been any formal guidelines to date so please bear with me, and feel free to add to this guide.
# Coding style
Code linting and auto-format (black) are not currently in place but open to consideration. In the meantime, the style to follow is (mostly) aligned with Google's guide: https://google.github.io/styleguide/pyguide.html.
A few specific differences from Google style (or black)
1. Line length is 120 char. Going over is okay in some cases (e.g. I prefer not to break URL across lines).
2. Hanging indents are always prefered, please avoid aligning arguments with closing brackets or braces.
Example, from Google guide, but this is a NO here:
```
# Aligned with opening delimiter.
foo = long_function_name(var_one, var_two,
var_three, var_four)
meal = (spam,
beans)
# Aligned with opening delimiter in a dictionary.
foo = {
'long_dictionary_key': value1 +
value2,
...
}
```
This is YES:
```
# 4-space hanging indent; nothing on first line,
# closing parenthesis on a new line.
foo = long_function_name(
var_one, var_two, var_three,
var_four
)
meal = (
spam,
beans,
)
# 4-space hanging indent in a dictionary.
foo = {
'long_dictionary_key':
long_dictionary_value,
...
}
```
When there is descrepancy in a given source file (there are many origins for various bits of code and not all have been updated to what I consider current goal), please follow the style in a given file.
In general, if you add new code, formatting it with black using the following options should result in a style that is compatible with the rest of the code base:
```
black --skip-string-normalization --line-length 120 <path-to-file>
```
Avoid formatting code that is unrelated to your PR though.
PR with pure formatting / style fixes will be accepted but only in isolation from functional changes, best to ask before starting such a change.
# Documentation
As with code style, docstrings style based on the Google guide: guide: https://google.github.io/styleguide/pyguide.html
The goal for the code is to eventually move to have all major functions and `__init__` methods use PEP484 type annotations.
When type annotations are used for a function, as per the Google pyguide, they should **NOT** be duplicated in the docstrings, please leave annotations as the one source of truth re typing.
There are a LOT of gaps in current documentation relative to the functionality in timm, please, document away!
# Installation
Create a Python virtual environment using Python 3.10. Inside the environment, install the following test dependencies:
```
python -m pip install pytest pytest-timeout pytest-xdist pytest-forked expecttest
```
Install `torch` and `torchvision` using the instructions matching your system as listed on the [PyTorch website](https://pytorch.org/).
Then install the remaining dependencies:
```
python -m pip install -r requirements.txt
python -m pip install --no-cache-dir git+https://github.com/mapillary/inplace_abn.git
python -m pip install -e .
```
## Unit tests
Run the tests using:
```
pytest tests/
```
Since the whole test suite takes a lot of time to run locally (a few hours), you may want to select a subset of tests relating to the changes you made by using the `-k` option of [`pytest`](https://docs.pytest.org/en/7.1.x/example/markers.html#using-k-expr-to-select-tests-based-on-their-name). Moreover, running tests in parallel (in this example 4 processes) with the `-n` option may help:
```
pytest -k "substring-to-match" -n 4 tests/
```
## Building documentation
Please refer to [this document](https://github.com/huggingface/pytorch-image-models/tree/main/hfdocs).
# Questions
If you have any questions about contribution, where / how to contribute, please ask in the [Discussions](https://github.com/huggingface/pytorch-image-models/discussions/categories/contributing) (there is a `Contributing` topic).

@ -1,2 +1,3 @@
include timm/models/pruned/*.txt
include timm/models/_pruned/*.txt
include timm/data/_info/*.txt
include timm/data/_info/*.json

@ -21,6 +21,204 @@ And a big thanks to all GitHub sponsors who helped with some of my costs before
## What's New
* ❗Updates after Oct 10, 2022 are available in 0.8.x pre-releases (`pip install --pre timm`) or cloning main❗
* Stable releases are 0.6.x and available by normal pip install or clone from [0.6.x](https://github.com/rwightman/pytorch-image-models/tree/0.6.x) branch.
### Feb 20, 2023
* Add 320x320 `convnext_large_mlp.clip_laion2b_ft_320` and `convnext_lage_mlp.clip_laion2b_ft_soup_320` CLIP image tower weights for features & fine-tune
* 0.8.13dev0 pypi release for latest changes w/ move to huggingface org
### Feb 16, 2023
* `safetensor` checkpoint support added
* Add ideas from 'Scaling Vision Transformers to 22 B. Params' (https://arxiv.org/abs/2302.05442) -- qk norm, RmsNorm, parallel block
* Add F.scaled_dot_product_attention support (PyTorch 2.0 only) to `vit_*`, `vit_relpos*`, `coatnet` / `maxxvit` (to start)
* Lion optimizer (w/ multi-tensor option) added (https://arxiv.org/abs/2302.06675)
* gradient checkpointing works with `features_only=True`
### Feb 7, 2023
* New inference benchmark numbers added in [results](results/) folder.
* Add convnext LAION CLIP trained weights and initial set of in1k fine-tunes
* `convnext_base.clip_laion2b_augreg_ft_in1k` - 86.2% @ 256x256
* `convnext_base.clip_laiona_augreg_ft_in1k_384` - 86.5% @ 384x384
* `convnext_large_mlp.clip_laion2b_augreg_ft_in1k` - 87.3% @ 256x256
* `convnext_large_mlp.clip_laion2b_augreg_ft_in1k_384` - 87.9% @ 384x384
* Add DaViT models. Supports `features_only=True`. Adapted from https://github.com/dingmyu/davit by [Fredo](https://github.com/fffffgggg54).
* Use a common NormMlpClassifierHead across MaxViT, ConvNeXt, DaViT
* Add EfficientFormer-V2 model, update EfficientFormer, and refactor LeViT (closely related architectures). Weights on HF hub.
* New EfficientFormer-V2 arch, significant refactor from original at (https://github.com/snap-research/EfficientFormer). Supports `features_only=True`.
* Minor updates to EfficientFormer.
* Refactor LeViT models to stages, add `features_only=True` support to new `conv` variants, weight remap required.
* Move ImageNet meta-data (synsets, indices) from `/results` to [`timm/data/_info`](timm/data/_info/).
* Add ImageNetInfo / DatasetInfo classes to provide labelling for various ImageNet classifier layouts in `timm`
* Update `inference.py` to use, try: `python inference.py /folder/to/images --model convnext_small.in12k --label-type detail --topk 5`
* Ready for 0.8.10 pypi pre-release (final testing).
### Jan 20, 2023
* Add two convnext 12k -> 1k fine-tunes at 384x384
* `convnext_tiny.in12k_ft_in1k_384` - 85.1 @ 384
* `convnext_small.in12k_ft_in1k_384` - 86.2 @ 384
* Push all MaxxViT weights to HF hub, and add new ImageNet-12k -> 1k fine-tunes for `rw` base MaxViT and CoAtNet 1/2 models
|model |top1 |top5 |samples / sec |Params (M) |GMAC |Act (M)|
|------------------------------------------------------------------------------------------------------------------------|----:|----:|--------------:|--------------:|-----:|------:|
|[maxvit_xlarge_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k) |88.53|98.64| 21.76| 475.77|534.14|1413.22|
|[maxvit_xlarge_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k) |88.32|98.54| 42.53| 475.32|292.78| 668.76|
|[maxvit_base_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k) |88.20|98.53| 50.87| 119.88|138.02| 703.99|
|[maxvit_large_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k) |88.04|98.40| 36.42| 212.33|244.75| 942.15|
|[maxvit_large_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k) |87.98|98.56| 71.75| 212.03|132.55| 445.84|
|[maxvit_base_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k) |87.92|98.54| 104.71| 119.65| 73.80| 332.90|
|[maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k) |87.81|98.37| 106.55| 116.14| 70.97| 318.95|
|[maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k) |87.47|98.37| 149.49| 116.09| 72.98| 213.74|
|[coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k) |87.39|98.31| 160.80| 73.88| 47.69| 209.43|
|[maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k) |86.89|98.02| 375.86| 116.14| 23.15| 92.64|
|[maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k) |86.64|98.02| 501.03| 116.09| 24.20| 62.77|
|[maxvit_base_tf_512.in1k](https://huggingface.co/timm/maxvit_base_tf_512.in1k) |86.60|97.92| 50.75| 119.88|138.02| 703.99|
|[coatnet_2_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_2_rw_224.sw_in12k_ft_in1k) |86.57|97.89| 631.88| 73.87| 15.09| 49.22|
|[maxvit_large_tf_512.in1k](https://huggingface.co/timm/maxvit_large_tf_512.in1k) |86.52|97.88| 36.04| 212.33|244.75| 942.15|
|[coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k) |86.49|97.90| 620.58| 73.88| 15.18| 54.78|
|[maxvit_base_tf_384.in1k](https://huggingface.co/timm/maxvit_base_tf_384.in1k) |86.29|97.80| 101.09| 119.65| 73.80| 332.90|
|[maxvit_large_tf_384.in1k](https://huggingface.co/timm/maxvit_large_tf_384.in1k) |86.23|97.69| 70.56| 212.03|132.55| 445.84|
|[maxvit_small_tf_512.in1k](https://huggingface.co/timm/maxvit_small_tf_512.in1k) |86.10|97.76| 88.63| 69.13| 67.26| 383.77|
|[maxvit_tiny_tf_512.in1k](https://huggingface.co/timm/maxvit_tiny_tf_512.in1k) |85.67|97.58| 144.25| 31.05| 33.49| 257.59|
|[maxvit_small_tf_384.in1k](https://huggingface.co/timm/maxvit_small_tf_384.in1k) |85.54|97.46| 188.35| 69.02| 35.87| 183.65|
|[maxvit_tiny_tf_384.in1k](https://huggingface.co/timm/maxvit_tiny_tf_384.in1k) |85.11|97.38| 293.46| 30.98| 17.53| 123.42|
|[maxvit_large_tf_224.in1k](https://huggingface.co/timm/maxvit_large_tf_224.in1k) |84.93|96.97| 247.71| 211.79| 43.68| 127.35|
|[coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k) |84.90|96.96| 1025.45| 41.72| 8.11| 40.13|
|[maxvit_base_tf_224.in1k](https://huggingface.co/timm/maxvit_base_tf_224.in1k) |84.85|96.99| 358.25| 119.47| 24.04| 95.01|
|[maxxvit_rmlp_small_rw_256.sw_in1k](https://huggingface.co/timm/maxxvit_rmlp_small_rw_256.sw_in1k) |84.63|97.06| 575.53| 66.01| 14.67| 58.38|
|[coatnet_rmlp_2_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_224.sw_in1k) |84.61|96.74| 625.81| 73.88| 15.18| 54.78|
|[maxvit_rmlp_small_rw_224.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_small_rw_224.sw_in1k) |84.49|96.76| 693.82| 64.90| 10.75| 49.30|
|[maxvit_small_tf_224.in1k](https://huggingface.co/timm/maxvit_small_tf_224.in1k) |84.43|96.83| 647.96| 68.93| 11.66| 53.17|
|[maxvit_rmlp_tiny_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_tiny_rw_256.sw_in1k) |84.23|96.78| 807.21| 29.15| 6.77| 46.92|
|[coatnet_1_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_1_rw_224.sw_in1k) |83.62|96.38| 989.59| 41.72| 8.04| 34.60|
|[maxvit_tiny_rw_224.sw_in1k](https://huggingface.co/timm/maxvit_tiny_rw_224.sw_in1k) |83.50|96.50| 1100.53| 29.06| 5.11| 33.11|
|[maxvit_tiny_tf_224.in1k](https://huggingface.co/timm/maxvit_tiny_tf_224.in1k) |83.41|96.59| 1004.94| 30.92| 5.60| 35.78|
|[coatnet_rmlp_1_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_rmlp_1_rw_224.sw_in1k) |83.36|96.45| 1093.03| 41.69| 7.85| 35.47|
|[maxxvitv2_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxxvitv2_nano_rw_256.sw_in1k) |83.11|96.33| 1276.88| 23.70| 6.26| 23.05|
|[maxxvit_rmlp_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxxvit_rmlp_nano_rw_256.sw_in1k) |83.03|96.34| 1341.24| 16.78| 4.37| 26.05|
|[maxvit_rmlp_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_nano_rw_256.sw_in1k) |82.96|96.26| 1283.24| 15.50| 4.47| 31.92|
|[maxvit_nano_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_nano_rw_256.sw_in1k) |82.93|96.23| 1218.17| 15.45| 4.46| 30.28|
|[coatnet_bn_0_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_bn_0_rw_224.sw_in1k) |82.39|96.19| 1600.14| 27.44| 4.67| 22.04|
|[coatnet_0_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_0_rw_224.sw_in1k) |82.39|95.84| 1831.21| 27.44| 4.43| 18.73|
|[coatnet_rmlp_nano_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_rmlp_nano_rw_224.sw_in1k) |82.05|95.87| 2109.09| 15.15| 2.62| 20.34|
|[coatnext_nano_rw_224.sw_in1k](https://huggingface.co/timm/coatnext_nano_rw_224.sw_in1k) |81.95|95.92| 2525.52| 14.70| 2.47| 12.80|
|[coatnet_nano_rw_224.sw_in1k](https://huggingface.co/timm/coatnet_nano_rw_224.sw_in1k) |81.70|95.64| 2344.52| 15.14| 2.41| 15.41|
|[maxvit_rmlp_pico_rw_256.sw_in1k](https://huggingface.co/timm/maxvit_rmlp_pico_rw_256.sw_in1k) |80.53|95.21| 1594.71| 7.52| 1.85| 24.86|
### Jan 11, 2023
* Update ConvNeXt ImageNet-12k pretrain series w/ two new fine-tuned weights (and pre FT `.in12k` tags)
* `convnext_nano.in12k_ft_in1k` - 82.3 @ 224, 82.9 @ 288 (previously released)
* `convnext_tiny.in12k_ft_in1k` - 84.2 @ 224, 84.5 @ 288
* `convnext_small.in12k_ft_in1k` - 85.2 @ 224, 85.3 @ 288
### Jan 6, 2023
* Finally got around to adding `--model-kwargs` and `--opt-kwargs` to scripts to pass through rare args directly to model classes from cmd line
* `train.py /imagenet --model resnet50 --amp --model-kwargs output_stride=16 act_layer=silu`
* `train.py /imagenet --model vit_base_patch16_clip_224 --img-size 240 --amp --model-kwargs img_size=240 patch_size=12`
* Cleanup some popular models to better support arg passthrough / merge with model configs, more to go.
### Jan 5, 2023
* ConvNeXt-V2 models and weights added to existing `convnext.py`
* Paper: [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](http://arxiv.org/abs/2301.00808)
* Reference impl: https://github.com/facebookresearch/ConvNeXt-V2 (NOTE: weights currently CC-BY-NC)
### Dec 23, 2022 🎄☃
* Add FlexiViT models and weights from https://github.com/google-research/big_vision (check out paper at https://arxiv.org/abs/2212.08013)
* NOTE currently resizing is static on model creation, on-the-fly dynamic / train patch size sampling is a WIP
* Many more models updated to multi-weight and downloadable via HF hub now (convnext, efficientnet, mobilenet, vision_transformer*, beit)
* More model pretrained tag and adjustments, some model names changed (working on deprecation translations, consider main branch DEV branch right now, use 0.6.x for stable use)
* More ImageNet-12k (subset of 22k) pretrain models popping up:
* `efficientnet_b5.in12k_ft_in1k` - 85.9 @ 448x448
* `vit_medium_patch16_gap_384.in12k_ft_in1k` - 85.5 @ 384x384
* `vit_medium_patch16_gap_256.in12k_ft_in1k` - 84.5 @ 256x256
* `convnext_nano.in12k_ft_in1k` - 82.9 @ 288x288
### Dec 8, 2022
* Add 'EVA l' to `vision_transformer.py`, MAE style ViT-L/14 MIM pretrain w/ EVA-CLIP targets, FT on ImageNet-1k (w/ ImageNet-22k intermediate for some)
* original source: https://github.com/baaivision/EVA
| model | top1 | param_count | gmac | macts | hub |
|:------------------------------------------|-----:|------------:|------:|------:|:----------------------------------------|
| eva_large_patch14_336.in22k_ft_in22k_in1k | 89.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
| eva_large_patch14_336.in22k_ft_in1k | 88.7 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
| eva_large_patch14_196.in22k_ft_in22k_in1k | 88.6 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
| eva_large_patch14_196.in22k_ft_in1k | 87.9 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
### Dec 6, 2022
* Add 'EVA g', BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to `beit.py`.
* original source: https://github.com/baaivision/EVA
* paper: https://arxiv.org/abs/2211.07636
| model | top1 | param_count | gmac | macts | hub |
|:-----------------------------------------|-------:|--------------:|-------:|--------:|:----------------------------------------|
| eva_giant_patch14_560.m30m_ft_in22k_in1k | 89.8 | 1014.4 | 1906.8 | 2577.2 | [link](https://huggingface.co/BAAI/EVA) |
| eva_giant_patch14_336.m30m_ft_in22k_in1k | 89.6 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
| eva_giant_patch14_336.clip_ft_in1k | 89.4 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
| eva_giant_patch14_224.clip_ft_in1k | 89.1 | 1012.6 | 267.2 | 192.6 | [link](https://huggingface.co/BAAI/EVA) |
### Dec 5, 2022
* Pre-release (`0.8.0dev0`) of multi-weight support (`model_arch.pretrained_tag`). Install with `pip install --pre timm`
* vision_transformer, maxvit, convnext are the first three model impl w/ support
* model names are changing with this (previous _21k, etc. fn will merge), still sorting out deprecation handling
* bugs are likely, but I need feedback so please try it out
* if stability is needed, please use 0.6.x pypi releases or clone from [0.6.x branch](https://github.com/rwightman/pytorch-image-models/tree/0.6.x)
* Support for PyTorch 2.0 compile is added in train/validate/inference/benchmark, use `--torchcompile` argument
* Inference script allows more control over output, select k for top-class index + prob json, csv or parquet output
* Add a full set of fine-tuned CLIP image tower weights from both LAION-2B and original OpenAI CLIP models
| model | top1 | param_count | gmac | macts | hub |
|:-------------------------------------------------|-------:|--------------:|-------:|--------:|:-------------------------------------------------------------------------------------|
| vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k | 88.6 | 632.5 | 391 | 407.5 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_336.openai_ft_in12k_in1k | 88.3 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.openai_ft_in12k_in1k) |
| vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k | 88.2 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_336.laion2b_ft_in12k_in1k | 88.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_224.openai_ft_in12k_in1k | 88.2 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in12k_in1k) |
| vit_large_patch14_clip_224.laion2b_ft_in12k_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_224.openai_ft_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in1k) |
| vit_large_patch14_clip_336.laion2b_ft_in1k | 87.9 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in1k) |
| vit_huge_patch14_clip_224.laion2b_ft_in1k | 87.6 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in1k) |
| vit_large_patch14_clip_224.laion2b_ft_in1k | 87.3 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in1k) |
| vit_base_patch16_clip_384.laion2b_ft_in12k_in1k | 87.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_384.openai_ft_in12k_in1k | 87 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in12k_in1k) |
| vit_base_patch16_clip_384.laion2b_ft_in1k | 86.6 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in1k) |
| vit_base_patch16_clip_384.openai_ft_in1k | 86.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in1k) |
| vit_base_patch16_clip_224.laion2b_ft_in12k_in1k | 86.2 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_224.openai_ft_in12k_in1k | 85.9 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in12k_in1k) |
| vit_base_patch32_clip_448.laion2b_ft_in12k_in1k | 85.8 | 88.3 | 17.9 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_224.laion2b_ft_in1k | 85.5 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in1k) |
| vit_base_patch32_clip_384.laion2b_ft_in12k_in1k | 85.4 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_224.openai_ft_in1k | 85.3 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in1k) |
| vit_base_patch32_clip_384.openai_ft_in12k_in1k | 85.2 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.openai_ft_in12k_in1k) |
| vit_base_patch32_clip_224.laion2b_ft_in12k_in1k | 83.3 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k) |
| vit_base_patch32_clip_224.laion2b_ft_in1k | 82.6 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in1k) |
| vit_base_patch32_clip_224.openai_ft_in1k | 81.9 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.openai_ft_in1k) |
* Port of MaxViT Tensorflow Weights from official impl at https://github.com/google-research/maxvit
* There was larger than expected drops for the upscaled 384/512 in21k fine-tune weights, possible detail missing, but the 21k FT did seem sensitive to small preprocessing
| model | top1 | param_count | gmac | macts | hub |
|:-----------------------------------|-------:|--------------:|-------:|--------:|:-----------------------------------------------------------------------|
| maxvit_xlarge_tf_512.in21k_ft_in1k | 88.5 | 475.8 | 534.1 | 1413.2 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k) |
| maxvit_xlarge_tf_384.in21k_ft_in1k | 88.3 | 475.3 | 292.8 | 668.8 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k) |
| maxvit_base_tf_512.in21k_ft_in1k | 88.2 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k) |
| maxvit_large_tf_512.in21k_ft_in1k | 88 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k) |
| maxvit_large_tf_384.in21k_ft_in1k | 88 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k) |
| maxvit_base_tf_384.in21k_ft_in1k | 87.9 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k) |
| maxvit_base_tf_512.in1k | 86.6 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in1k) |
| maxvit_large_tf_512.in1k | 86.5 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in1k) |
| maxvit_base_tf_384.in1k | 86.3 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in1k) |
| maxvit_large_tf_384.in1k | 86.2 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in1k) |
| maxvit_small_tf_512.in1k | 86.1 | 69.1 | 67.3 | 383.8 | [link](https://huggingface.co/timm/maxvit_small_tf_512.in1k) |
| maxvit_tiny_tf_512.in1k | 85.7 | 31 | 33.5 | 257.6 | [link](https://huggingface.co/timm/maxvit_tiny_tf_512.in1k) |
| maxvit_small_tf_384.in1k | 85.5 | 69 | 35.9 | 183.6 | [link](https://huggingface.co/timm/maxvit_small_tf_384.in1k) |
| maxvit_tiny_tf_384.in1k | 85.1 | 31 | 17.5 | 123.4 | [link](https://huggingface.co/timm/maxvit_tiny_tf_384.in1k) |
| maxvit_large_tf_224.in1k | 84.9 | 211.8 | 43.7 | 127.4 | [link](https://huggingface.co/timm/maxvit_large_tf_224.in1k) |
| maxvit_base_tf_224.in1k | 84.9 | 119.5 | 24 | 95 | [link](https://huggingface.co/timm/maxvit_base_tf_224.in1k) |
| maxvit_small_tf_224.in1k | 84.4 | 68.9 | 11.7 | 53.2 | [link](https://huggingface.co/timm/maxvit_small_tf_224.in1k) |
| maxvit_tiny_tf_224.in1k | 83.4 | 30.9 | 5.6 | 35.8 | [link](https://huggingface.co/timm/maxvit_tiny_tf_224.in1k) |
### Oct 15, 2022
* Train and validation script enhancements
* Non-GPU (ie CPU) device support
@ -233,46 +431,6 @@ More models, more fixes
* TinyNet models added by [rsomani95](https://github.com/rsomani95)
* LCNet added via MobileNetV3 architecture
### Nov 22, 2021
* A number of updated weights anew new model defs
* `eca_halonext26ts` - 79.5 @ 256
* `resnet50_gn` (new) - 80.1 @ 224, 81.3 @ 288
* `resnet50` - 80.7 @ 224, 80.9 @ 288 (trained at 176, not replacing current a1 weights as default since these don't scale as well to higher res, [weights](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1h2_176-001a1197.pth))
* `resnext50_32x4d` - 81.1 @ 224, 82.0 @ 288
* `sebotnet33ts_256` (new) - 81.2 @ 224
* `lamhalobotnet50ts_256` - 81.5 @ 256
* `halonet50ts` - 81.7 @ 256
* `halo2botnet50ts_256` - 82.0 @ 256
* `resnet101` - 82.0 @ 224, 82.8 @ 288
* `resnetv2_101` (new) - 82.1 @ 224, 83.0 @ 288
* `resnet152` - 82.8 @ 224, 83.5 @ 288
* `regnetz_d8` (new) - 83.5 @ 256, 84.0 @ 320
* `regnetz_e8` (new) - 84.5 @ 256, 85.0 @ 320
* `vit_base_patch8_224` (85.8 top-1) & `in21k` variant weights added thanks [Martins Bruveris](https://github.com/martinsbruveris)
* Groundwork in for FX feature extraction thanks to [Alexander Soare](https://github.com/alexander-soare)
* models updated for tracing compatibility (almost full support with some distlled transformer exceptions)
### Oct 19, 2021
* ResNet strikes back (https://arxiv.org/abs/2110.00476) weights added, plus any extra training components used. Model weights and some more details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-rsb-weights)
* BCE loss and Repeated Augmentation support for RSB paper
* 4 series of ResNet based attention model experiments being added (implemented across byobnet.py/byoanet.py). These include all sorts of attention, from channel attn like SE, ECA to 2D QKV self-attention layers such as Halo, Bottlneck, Lambda. Details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* Working implementations of the following 2D self-attention modules (likely to be differences from paper or eventual official impl):
* Halo (https://arxiv.org/abs/2103.12731)
* Bottleneck Transformer (https://arxiv.org/abs/2101.11605)
* LambdaNetworks (https://arxiv.org/abs/2102.08602)
* A RegNetZ series of models with some attention experiments (being added to). These do not follow the paper (https://arxiv.org/abs/2103.06877) in any way other than block architecture, details of official models are not available. See more here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* ConvMixer (https://openreview.net/forum?id=TVHS5Y4dNvM), CrossVit (https://arxiv.org/abs/2103.14899), and BeiT (https://arxiv.org/abs/2106.08254) architectures + weights added
* freeze/unfreeze helpers by [Alexander Soare](https://github.com/alexander-soare)
### Aug 18, 2021
* Optimizer bonanza!
* Add LAMB and LARS optimizers, incl trust ratio clipping options. Tweaked to work properly in PyTorch XLA (tested on TPUs w/ `timm bits` [branch](https://github.com/rwightman/pytorch-image-models/tree/bits_and_tpu/timm/bits))
* Add MADGRAD from FB research w/ a few tweaks (decoupled decay option, step handling that works with PyTorch XLA)
* Some cleanup on all optimizers and factory. No more `.data`, a bit more consistency, unit tests for all!
* SGDP and AdamP still won't work with PyTorch XLA but others should (have yet to test Adabelief, Adafactor, Adahessian myself).
* EfficientNet-V2 XL TF ported weights added, but they don't validate well in PyTorch (L is better). The pre-processing for the V2 TF training is a bit diff and the fine-tuned 21k -> 1k weights are very sensitive and less robust than the 1k weights.
* Added PyTorch trained EfficientNet-V2 'Tiny' w/ GlobalContext attn weights. Only .1-.2 top-1 better than the SE so more of a curiosity for those interested.
## Introduction
Py**T**orch **Im**age **M**odels (`timm`) is a collection of image models, layers, utilities, optimizers, schedulers, data-loaders / augmentations, and reference training / validation scripts that aim to pull together a wide variety of SOTA models with ability to reproduce ImageNet training results.
@ -293,6 +451,7 @@ A full version of the list below with source links can be found in the [document
* CoaT (Co-Scale Conv-Attentional Image Transformers) - https://arxiv.org/abs/2104.06399
* CoAtNet (Convolution and Attention) - https://arxiv.org/abs/2106.04803
* ConvNeXt - https://arxiv.org/abs/2201.03545
* ConvNeXt-V2 - http://arxiv.org/abs/2301.00808
* ConViT (Soft Convolutional Inductive Biases Vision Transformers)- https://arxiv.org/abs/2103.10697
* CspNet (Cross-Stage Partial Networks) - https://arxiv.org/abs/1911.11929
* DeiT - https://arxiv.org/abs/2012.12877
@ -314,6 +473,8 @@ A full version of the list below with source links can be found in the [document
* MobileNet-V2 - https://arxiv.org/abs/1801.04381
* Single-Path NAS - https://arxiv.org/abs/1904.02877
* TinyNet - https://arxiv.org/abs/2010.14819
* EVA - https://arxiv.org/abs/2211.07636
* FlexiViT - https://arxiv.org/abs/2212.08013
* GCViT (Global Context Vision Transformer) - https://arxiv.org/abs/2206.09959
* GhostNet - https://arxiv.org/abs/1911.11907
* gMLP - https://arxiv.org/abs/2105.08050
@ -439,7 +600,7 @@ Several (less common) features that I often utilize in my projects are included.
## Results
Model validation results can be found in the [documentation](https://rwightman.github.io/pytorch-image-models/results/) and in the [results tables](results/README.md)
Model validation results can be found in the [results tables](results/README.md)
## Getting Started (Documentation)

@ -16,21 +16,31 @@ import argparse
import os
import glob
import hashlib
from timm.models.helpers import load_state_dict
from timm.models import load_state_dict
try:
import safetensors.torch
_has_safetensors = True
except ImportError:
_has_safetensors = False
DEFAULT_OUTPUT = "./averaged.pth"
DEFAULT_SAFE_OUTPUT = "./averaged.safetensors"
parser = argparse.ArgumentParser(description='PyTorch Checkpoint Averager')
parser.add_argument('--input', default='', type=str, metavar='PATH',
help='path to base input folder containing checkpoints')
parser.add_argument('--filter', default='*.pth.tar', type=str, metavar='WILDCARD',
help='checkpoint filter (path wildcard)')
parser.add_argument('--output', default='./averaged.pth', type=str, metavar='PATH',
help='output filename')
parser.add_argument('--output', default=DEFAULT_OUTPUT, type=str, metavar='PATH',
help=f'Output filename. Defaults to {DEFAULT_SAFE_OUTPUT} when passing --safetensors.')
parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',
help='Force not using ema version of weights (if present)')
parser.add_argument('--no-sort', dest='no_sort', action='store_true',
help='Do not sort and select by checkpoint metric, also makes "n" argument irrelevant')
parser.add_argument('-n', type=int, default=10, metavar='N',
help='Number of checkpoints to average')
parser.add_argument('--safetensors', action='store_true',
help='Save weights using safetensors instead of the default torch way (pickle).')
def checkpoint_metric(checkpoint_path):
@ -55,8 +65,23 @@ def main():
# by default sort by checkpoint metric (if present) and avg top n checkpoints
args.sort = not args.no_sort
if os.path.exists(args.output):
print("Error: Output filename ({}) already exists.".format(args.output))
if args.safetensors and args.output == DEFAULT_OUTPUT:
# Default path changes if using safetensors
args.output = DEFAULT_SAFE_OUTPUT
output, output_ext = os.path.splitext(args.output)
if not output_ext:
output_ext = ('.safetensors' if args.safetensors else '.pth')
output = output + output_ext
if args.safetensors and not output_ext == ".safetensors":
print(
"Warning: saving weights as safetensors but output file extension is not "
f"set to '.safetensors': {args.output}"
)
if os.path.exists(output):
print("Error: Output filename ({}) already exists.".format(output))
exit(1)
pattern = args.input
@ -73,22 +98,27 @@ def main():
checkpoint_metrics.append((metric, c))
checkpoint_metrics = list(sorted(checkpoint_metrics))
checkpoint_metrics = checkpoint_metrics[-args.n:]
print("Selected checkpoints:")
[print(m, c) for m, c in checkpoint_metrics]
if checkpoint_metrics:
print("Selected checkpoints:")
[print(m, c) for m, c in checkpoint_metrics]
avg_checkpoints = [c for m, c in checkpoint_metrics]
else:
avg_checkpoints = checkpoints
print("Selected checkpoints:")
[print(c) for c in checkpoints]
if avg_checkpoints:
print("Selected checkpoints:")
[print(c) for c in checkpoints]
if not avg_checkpoints:
print('Error: No checkpoints found to average.')
exit(1)
avg_state_dict = {}
avg_counts = {}
for c in avg_checkpoints:
new_state_dict = load_state_dict(c, args.use_ema)
if not new_state_dict:
print("Error: Checkpoint ({}) doesn't exist".format(args.checkpoint))
print(f"Error: Checkpoint ({c}) doesn't exist")
continue
for k, v in new_state_dict.items():
if k not in avg_state_dict:
avg_state_dict[k] = v.clone().to(dtype=torch.float64)
@ -107,14 +137,15 @@ def main():
v = v.clamp(float32_info.min, float32_info.max)
final_state_dict[k] = v.to(dtype=torch.float32)
try:
torch.save(final_state_dict, args.output, _use_new_zipfile_serialization=False)
except:
torch.save(final_state_dict, args.output)
if args.safetensors:
assert _has_safetensors, "`pip install safetensors` to use .safetensors"
safetensors.torch.save_file(final_state_dict, output)
else:
torch.save(final_state_dict, output)
with open(args.output, 'rb') as f:
with open(output, 'rb') as f:
sha_hash = hashlib.sha256(f.read()).hexdigest()
print("=> Saved state_dict to '{}, SHA256: {}'".format(args.output, sha_hash))
print(f"=> Saved state_dict to '{output}, SHA256: {sha_hash}'")
if __name__ == '__main__':

@ -19,9 +19,10 @@ import torch.nn as nn
import torch.nn.parallel
from timm.data import resolve_data_config
from timm.models import create_model, is_model, list_models, set_fast_norm
from timm.layers import set_fast_norm
from timm.models import create_model, is_model, list_models
from timm.optim import create_optimizer_v2
from timm.utils import setup_default_logging, set_jit_fuser, decay_batch_step, check_batch_size_retry
from timm.utils import setup_default_logging, set_jit_fuser, decay_batch_step, check_batch_size_retry, ParseKwargs
has_apex = False
try:
@ -56,6 +57,7 @@ try:
except ImportError as e:
has_functorch = False
has_compile = hasattr(torch, 'compile')
if torch.cuda.is_available():
torch.backends.cuda.matmul.allow_tf32 = True
@ -74,12 +76,16 @@ parser.add_argument('--detail', action='store_true', default=False,
help='Provide train fwd/bwd/opt breakdown detail if True. Defaults to False')
parser.add_argument('--no-retry', action='store_true', default=False,
help='Do not decay batch size and retry on error.')
parser.add_argument('--results-file', default='', type=str, metavar='FILENAME',
parser.add_argument('--results-file', default='', type=str,
help='Output csv file for validation results (summary)')
parser.add_argument('--results-format', default='csv', type=str,
help='Format for results file one of (csv, json) (default: csv).')
parser.add_argument('--num-warm-iter', default=10, type=int,
metavar='N', help='Number of warmup iterations (default: 10)')
help='Number of warmup iterations (default: 10)')
parser.add_argument('--num-bench-iter', default=40, type=int,
metavar='N', help='Number of benchmark iterations (default: 40)')
help='Number of benchmark iterations (default: 40)')
parser.add_argument('--device', default='cuda', type=str,
help="device to run benchmark on")
# common inference / train args
parser.add_argument('--model', '-m', metavar='NAME', default='resnet50',
@ -102,17 +108,24 @@ parser.add_argument('--grad-checkpointing', action='store_true', default=False,
help='Enable gradient checkpointing through model blocks/stages')
parser.add_argument('--amp', action='store_true', default=False,
help='use PyTorch Native AMP for mixed precision training. Overrides --precision arg.')
parser.add_argument('--amp-dtype', default='float16', type=str,
help='lower precision AMP dtype (default: float16). Overrides --precision arg if args.amp True.')
parser.add_argument('--precision', default='float32', type=str,
help='Numeric precision. One of (amp, float32, float16, bfloat16, tf32)')
parser.add_argument('--fuser', default='', type=str,
help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
parser.add_argument('--fast-norm', default=False, action='store_true',
help='enable experimental fast-norm')
parser.add_argument('--model-kwargs', nargs='*', default={}, action=ParseKwargs)
# codegen (model compilation) options
scripting_group = parser.add_mutually_exclusive_group()
scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true',
help='convert model torchscript for inference')
help='convert model torchscript for inference')
scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor',
help="Enable compilation w/ specified backend (default: inductor).")
scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
help="Enable AOT Autograd support. (It's recommended to use this option with `--fuser nvfuser` together)")
scripting_group.add_argument('--fast-norm', default=False, action='store_true',
help='enable experimental fast-norm')
help="Enable AOT Autograd optimization.")
# train optimizer parameters
parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
@ -157,19 +170,21 @@ def count_params(model: nn.Module):
def resolve_precision(precision: str):
assert precision in ('amp', 'float16', 'bfloat16', 'float32')
use_amp = False
assert precision in ('amp', 'amp_bfloat16', 'float16', 'bfloat16', 'float32')
amp_dtype = None # amp disabled
model_dtype = torch.float32
data_dtype = torch.float32
if precision == 'amp':
use_amp = True
amp_dtype = torch.float16
elif precision == 'amp_bfloat16':
amp_dtype = torch.bfloat16
elif precision == 'float16':
model_dtype = torch.float16
data_dtype = torch.float16
elif precision == 'bfloat16':
model_dtype = torch.bfloat16
data_dtype = torch.bfloat16
return use_amp, model_dtype, data_dtype
return amp_dtype, model_dtype, data_dtype
def profile_deepspeed(model, input_size=(3, 224, 224), batch_size=1, detailed=False):
@ -205,6 +220,7 @@ class BenchmarkRunner:
detail=False,
device='cuda',
torchscript=False,
torchcompile=None,
aot_autograd=False,
precision='float32',
fuser='',
@ -216,9 +232,12 @@ class BenchmarkRunner:
self.model_name = model_name
self.detail = detail
self.device = device
self.use_amp, self.model_dtype, self.data_dtype = resolve_precision(precision)
self.amp_dtype, self.model_dtype, self.data_dtype = resolve_precision(precision)
self.channels_last = kwargs.pop('channels_last', False)
self.amp_autocast = partial(torch.cuda.amp.autocast, dtype=torch.float16) if self.use_amp else suppress
if self.amp_dtype is not None:
self.amp_autocast = partial(torch.cuda.amp.autocast, dtype=self.amp_dtype)
else:
self.amp_autocast = suppress
if fuser:
set_jit_fuser(fuser)
@ -231,6 +250,7 @@ class BenchmarkRunner:
drop_rate=kwargs.pop('drop', 0.),
drop_path_rate=kwargs.pop('drop_path', None),
drop_block_rate=kwargs.pop('drop_block', None),
**kwargs.pop('model_kwargs', {}),
)
self.model.to(
device=self.device,
@ -241,16 +261,22 @@ class BenchmarkRunner:
_logger.info('Model %s created, param count: %d' % (model_name, self.param_count))
data_config = resolve_data_config(kwargs, model=self.model, use_test_size=not use_train_size)
self.scripted = False
if torchscript:
self.model = torch.jit.script(self.model)
self.scripted = True
self.input_size = data_config['input_size']
self.batch_size = kwargs.pop('batch_size', 256)
if aot_autograd:
self.compiled = False
if torchscript:
self.model = torch.jit.script(self.model)
self.compiled = True
elif torchcompile:
assert has_compile, 'A version of torch w/ torch.compile() is required, possibly a nightly.'
torch._dynamo.reset()
self.model = torch.compile(self.model, backend=torchcompile)
self.compiled = True
elif aot_autograd:
assert has_functorch, "functorch is needed for --aot-autograd"
self.model = memory_efficient_fusion(self.model)
self.compiled = True
self.example_inputs = None
self.num_warm_iter = num_warm_iter
@ -322,7 +348,7 @@ class InferenceBenchmarkRunner(BenchmarkRunner):
param_count=round(self.param_count / 1e6, 2),
)
retries = 0 if self.scripted else 2 # skip profiling if model is scripted
retries = 0 if self.compiled else 2 # skip profiling if model is scripted
while retries:
retries -= 1
try:
@ -542,7 +568,7 @@ def _try_run(
def benchmark(args):
if args.amp:
_logger.warning("Overriding precision to 'amp' since --amp flag set.")
args.precision = 'amp'
args.precision = 'amp' if args.amp_dtype == 'float16' else '_'.join(['amp', args.amp_dtype])
_logger.info(f'Benchmarking in {args.precision} precision. '
f'{"NHWC" if args.channels_last else "NCHW"} layout. '
f'torchscript {"enabled" if args.torchscript else "disabled"}')
@ -620,7 +646,6 @@ def main():
model_cfgs = [(n, None) for n in model_names]
if len(model_cfgs):
results_file = args.results_file or './benchmark.csv'
_logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
results = []
try:
@ -641,22 +666,30 @@ def main():
sort_key = 'infer_gmacs'
results = filter(lambda x: sort_key in x, results)
results = sorted(results, key=lambda x: x[sort_key], reverse=True)
if len(results):
write_results(results_file, results)
else:
results = benchmark(args)
if args.results_file:
write_results(args.results_file, results, format=args.results_format)
# output results in JSON to stdout w/ delimiter for runner script
print(f'--result\n{json.dumps(results, indent=4)}')
def write_results(results_file, results):
def write_results(results_file, results, format='csv'):
with open(results_file, mode='w') as cf:
dw = csv.DictWriter(cf, fieldnames=results[0].keys())
dw.writeheader()
for r in results:
dw.writerow(r)
cf.flush()
if format == 'json':
json.dump(results, cf, indent=4)
else:
if not isinstance(results, (list, tuple)):
results = [results]
if not results:
return
dw = csv.DictWriter(cf, fieldnames=results[0].keys())
dw.writeheader()
for r in results:
dw.writerow(r)
cf.flush()
if __name__ == '__main__':

@ -12,8 +12,13 @@ import argparse
import os
import hashlib
import shutil
from collections import OrderedDict
from timm.models.helpers import load_state_dict
import tempfile
from timm.models import load_state_dict
try:
import safetensors.torch
_has_safetensors = True
except ImportError:
_has_safetensors = False
parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner')
parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
@ -22,10 +27,12 @@ parser.add_argument('--output', default='', type=str, metavar='PATH',
help='output path')
parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',
help='use ema version of weights if present')
parser.add_argument('--no-hash', dest='no_hash', action='store_true',
help='no hash in output filename')
parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true',
help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint')
_TEMP_NAME = './_checkpoint.pth'
parser.add_argument('--safetensors', action='store_true',
help='Save weights using safetensors instead of the default torch way (pickle).')
def main():
@ -35,10 +42,24 @@ def main():
print("Error: Output filename ({}) already exists.".format(args.output))
exit(1)
clean_checkpoint(args.checkpoint, args.output, not args.no_use_ema, args.clean_aux_bn)
clean_checkpoint(
args.checkpoint,
args.output,
not args.no_use_ema,
args.no_hash,
args.clean_aux_bn,
safe_serialization=args.safetensors,
)
def clean_checkpoint(checkpoint, output='', use_ema=True, clean_aux_bn=False):
def clean_checkpoint(
checkpoint,
output,
use_ema=True,
no_hash=False,
clean_aux_bn=False,
safe_serialization: bool=False,
):
# Load an existing checkpoint to CPU, strip everything but the state_dict and re-save
if checkpoint and os.path.isfile(checkpoint):
print("=> Loading checkpoint '{}'".format(checkpoint))
@ -53,22 +74,36 @@ def clean_checkpoint(checkpoint, output='', use_ema=True, clean_aux_bn=False):
new_state_dict[name] = v
print("=> Loaded state_dict from '{}'".format(checkpoint))
try:
torch.save(new_state_dict, _TEMP_NAME, _use_new_zipfile_serialization=False)
except:
torch.save(new_state_dict, _TEMP_NAME)
with open(_TEMP_NAME, 'rb') as f:
sha_hash = hashlib.sha256(f.read()).hexdigest()
ext = ''
if output:
checkpoint_root, checkpoint_base = os.path.split(output)
checkpoint_base = os.path.splitext(checkpoint_base)[0]
checkpoint_base, ext = os.path.splitext(checkpoint_base)
else:
checkpoint_root = ''
checkpoint_base = os.path.splitext(checkpoint)[0]
final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + '.pth'
shutil.move(_TEMP_NAME, os.path.join(checkpoint_root, final_filename))
checkpoint_base = os.path.split(checkpoint)[1]
checkpoint_base = os.path.splitext(checkpoint_base)[0]
temp_filename = '__' + checkpoint_base
if safe_serialization:
assert _has_safetensors, "`pip install safetensors` to use .safetensors"
safetensors.torch.save_file(new_state_dict, temp_filename)
else:
torch.save(new_state_dict, temp_filename)
with open(temp_filename, 'rb') as f:
sha_hash = hashlib.sha256(f.read()).hexdigest()
if ext:
final_ext = ext
else:
final_ext = ('.safetensors' if safe_serialization else '.pth')
if no_hash:
final_filename = checkpoint_base + final_ext
else:
final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + final_ext
shutil.move(temp_filename, os.path.join(checkpoint_root, final_filename))
print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))
return final_filename
else:

@ -1,5 +1,45 @@
# Archived Changes
### Nov 22, 2021
* A number of updated weights anew new model defs
* `eca_halonext26ts` - 79.5 @ 256
* `resnet50_gn` (new) - 80.1 @ 224, 81.3 @ 288
* `resnet50` - 80.7 @ 224, 80.9 @ 288 (trained at 176, not replacing current a1 weights as default since these don't scale as well to higher res, [weights](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1h2_176-001a1197.pth))
* `resnext50_32x4d` - 81.1 @ 224, 82.0 @ 288
* `sebotnet33ts_256` (new) - 81.2 @ 224
* `lamhalobotnet50ts_256` - 81.5 @ 256
* `halonet50ts` - 81.7 @ 256
* `halo2botnet50ts_256` - 82.0 @ 256
* `resnet101` - 82.0 @ 224, 82.8 @ 288
* `resnetv2_101` (new) - 82.1 @ 224, 83.0 @ 288
* `resnet152` - 82.8 @ 224, 83.5 @ 288
* `regnetz_d8` (new) - 83.5 @ 256, 84.0 @ 320
* `regnetz_e8` (new) - 84.5 @ 256, 85.0 @ 320
* `vit_base_patch8_224` (85.8 top-1) & `in21k` variant weights added thanks [Martins Bruveris](https://github.com/martinsbruveris)
* Groundwork in for FX feature extraction thanks to [Alexander Soare](https://github.com/alexander-soare)
* models updated for tracing compatibility (almost full support with some distlled transformer exceptions)
### Oct 19, 2021
* ResNet strikes back (https://arxiv.org/abs/2110.00476) weights added, plus any extra training components used. Model weights and some more details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-rsb-weights)
* BCE loss and Repeated Augmentation support for RSB paper
* 4 series of ResNet based attention model experiments being added (implemented across byobnet.py/byoanet.py). These include all sorts of attention, from channel attn like SE, ECA to 2D QKV self-attention layers such as Halo, Bottlneck, Lambda. Details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* Working implementations of the following 2D self-attention modules (likely to be differences from paper or eventual official impl):
* Halo (https://arxiv.org/abs/2103.12731)
* Bottleneck Transformer (https://arxiv.org/abs/2101.11605)
* LambdaNetworks (https://arxiv.org/abs/2102.08602)
* A RegNetZ series of models with some attention experiments (being added to). These do not follow the paper (https://arxiv.org/abs/2103.06877) in any way other than block architecture, details of official models are not available. See more here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* ConvMixer (https://openreview.net/forum?id=TVHS5Y4dNvM), CrossVit (https://arxiv.org/abs/2103.14899), and BeiT (https://arxiv.org/abs/2106.08254) architectures + weights added
* freeze/unfreeze helpers by [Alexander Soare](https://github.com/alexander-soare)
### Aug 18, 2021
* Optimizer bonanza!
* Add LAMB and LARS optimizers, incl trust ratio clipping options. Tweaked to work properly in PyTorch XLA (tested on TPUs w/ `timm bits` [branch](https://github.com/rwightman/pytorch-image-models/tree/bits_and_tpu/timm/bits))
* Add MADGRAD from FB research w/ a few tweaks (decoupled decay option, step handling that works with PyTorch XLA)
* Some cleanup on all optimizers and factory. No more `.data`, a bit more consistency, unit tests for all!
* SGDP and AdamP still won't work with PyTorch XLA but others should (have yet to test Adabelief, Adafactor, Adahessian myself).
* EfficientNet-V2 XL TF ported weights added, but they don't validate well in PyTorch (L is better). The pre-processing for the V2 TF training is a bit diff and the fine-tuned 21k -> 1k weights are very sensitive and less robust than the 1k weights.
* Added PyTorch trained EfficientNet-V2 'Tiny' w/ GlobalContext attn weights. Only .1-.2 top-1 better than the SE so more of a curiosity for those interested.
### July 12, 2021
* Add XCiT models from [official facebook impl](https://github.com/facebookresearch/xcit). Contributed by [Alexander Soare](https://github.com/alexander-soare)

@ -1,4 +1,183 @@
# Recent Changes
### Jan 5, 2023
* ConvNeXt-V2 models and weights added to existing `convnext.py`
* Paper: [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](http://arxiv.org/abs/2301.00808)
* Reference impl: https://github.com/facebookresearch/ConvNeXt-V2 (NOTE: weights currently CC-BY-NC)
### Dec 23, 2022 🎄☃
* Add FlexiViT models and weights from https://github.com/google-research/big_vision (check out paper at https://arxiv.org/abs/2212.08013)
* NOTE currently resizing is static on model creation, on-the-fly dynamic / train patch size sampling is a WIP
* Many more models updated to multi-weight and downloadable via HF hub now (convnext, efficientnet, mobilenet, vision_transformer*, beit)
* More model pretrained tag and adjustments, some model names changed (working on deprecation translations, consider main branch DEV branch right now, use 0.6.x for stable use)
* More ImageNet-12k (subset of 22k) pretrain models popping up:
* `efficientnet_b5.in12k_ft_in1k` - 85.9 @ 448x448
* `vit_medium_patch16_gap_384.in12k_ft_in1k` - 85.5 @ 384x384
* `vit_medium_patch16_gap_256.in12k_ft_in1k` - 84.5 @ 256x256
* `convnext_nano.in12k_ft_in1k` - 82.9 @ 288x288
### Dec 8, 2022
* Add 'EVA l' to `vision_transformer.py`, MAE style ViT-L/14 MIM pretrain w/ EVA-CLIP targets, FT on ImageNet-1k (w/ ImageNet-22k intermediate for some)
* original source: https://github.com/baaivision/EVA
| model | top1 | param_count | gmac | macts | hub |
|:------------------------------------------|-----:|------------:|------:|------:|:----------------------------------------|
| eva_large_patch14_336.in22k_ft_in22k_in1k | 89.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
| eva_large_patch14_336.in22k_ft_in1k | 88.7 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/BAAI/EVA) |
| eva_large_patch14_196.in22k_ft_in22k_in1k | 88.6 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
| eva_large_patch14_196.in22k_ft_in1k | 87.9 | 304.1 | 61.6 | 63.5 | [link](https://huggingface.co/BAAI/EVA) |
### Dec 6, 2022
* Add 'EVA g', BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to `beit.py`.
* original source: https://github.com/baaivision/EVA
* paper: https://arxiv.org/abs/2211.07636
| model | top1 | param_count | gmac | macts | hub |
|:-----------------------------------------|-------:|--------------:|-------:|--------:|:----------------------------------------|
| eva_giant_patch14_560.m30m_ft_in22k_in1k | 89.8 | 1014.4 | 1906.8 | 2577.2 | [link](https://huggingface.co/BAAI/EVA) |
| eva_giant_patch14_336.m30m_ft_in22k_in1k | 89.6 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
| eva_giant_patch14_336.clip_ft_in1k | 89.4 | 1013 | 620.6 | 550.7 | [link](https://huggingface.co/BAAI/EVA) |
| eva_giant_patch14_224.clip_ft_in1k | 89.1 | 1012.6 | 267.2 | 192.6 | [link](https://huggingface.co/BAAI/EVA) |
### Dec 5, 2022
* Pre-release (`0.8.0dev0`) of multi-weight support (`model_arch.pretrained_tag`). Install with `pip install --pre timm`
* vision_transformer, maxvit, convnext are the first three model impl w/ support
* model names are changing with this (previous _21k, etc. fn will merge), still sorting out deprecation handling
* bugs are likely, but I need feedback so please try it out
* if stability is needed, please use 0.6.x pypi releases or clone from [0.6.x branch](https://github.com/rwightman/pytorch-image-models/tree/0.6.x)
* Support for PyTorch 2.0 compile is added in train/validate/inference/benchmark, use `--torchcompile` argument
* Inference script allows more control over output, select k for top-class index + prob json, csv or parquet output
* Add a full set of fine-tuned CLIP image tower weights from both LAION-2B and original OpenAI CLIP models
| model | top1 | param_count | gmac | macts | hub |
|:-------------------------------------------------|-------:|--------------:|-------:|--------:|:-------------------------------------------------------------------------------------|
| vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k | 88.6 | 632.5 | 391 | 407.5 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_336.openai_ft_in12k_in1k | 88.3 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.openai_ft_in12k_in1k) |
| vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k | 88.2 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_336.laion2b_ft_in12k_in1k | 88.2 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_224.openai_ft_in12k_in1k | 88.2 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in12k_in1k) |
| vit_large_patch14_clip_224.laion2b_ft_in12k_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in12k_in1k) |
| vit_large_patch14_clip_224.openai_ft_in1k | 87.9 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in1k) |
| vit_large_patch14_clip_336.laion2b_ft_in1k | 87.9 | 304.5 | 191.1 | 270.2 | [link](https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in1k) |
| vit_huge_patch14_clip_224.laion2b_ft_in1k | 87.6 | 632 | 167.4 | 139.4 | [link](https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in1k) |
| vit_large_patch14_clip_224.laion2b_ft_in1k | 87.3 | 304.2 | 81.1 | 88.8 | [link](https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in1k) |
| vit_base_patch16_clip_384.laion2b_ft_in12k_in1k | 87.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_384.openai_ft_in12k_in1k | 87 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in12k_in1k) |
| vit_base_patch16_clip_384.laion2b_ft_in1k | 86.6 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in1k) |
| vit_base_patch16_clip_384.openai_ft_in1k | 86.2 | 86.9 | 55.5 | 101.6 | [link](https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in1k) |
| vit_base_patch16_clip_224.laion2b_ft_in12k_in1k | 86.2 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_224.openai_ft_in12k_in1k | 85.9 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in12k_in1k) |
| vit_base_patch32_clip_448.laion2b_ft_in12k_in1k | 85.8 | 88.3 | 17.9 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_224.laion2b_ft_in1k | 85.5 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in1k) |
| vit_base_patch32_clip_384.laion2b_ft_in12k_in1k | 85.4 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k) |
| vit_base_patch16_clip_224.openai_ft_in1k | 85.3 | 86.6 | 17.6 | 23.9 | [link](https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in1k) |
| vit_base_patch32_clip_384.openai_ft_in12k_in1k | 85.2 | 88.3 | 13.1 | 16.5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_384.openai_ft_in12k_in1k) |
| vit_base_patch32_clip_224.laion2b_ft_in12k_in1k | 83.3 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k) |
| vit_base_patch32_clip_224.laion2b_ft_in1k | 82.6 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in1k) |
| vit_base_patch32_clip_224.openai_ft_in1k | 81.9 | 88.2 | 4.4 | 5 | [link](https://huggingface.co/timm/vit_base_patch32_clip_224.openai_ft_in1k) |
* Port of MaxViT Tensorflow Weights from official impl at https://github.com/google-research/maxvit
* There was larger than expected drops for the upscaled 384/512 in21k fine-tune weights, possible detail missing, but the 21k FT did seem sensitive to small preprocessing
| model | top1 | param_count | gmac | macts | hub |
|:-----------------------------------|-------:|--------------:|-------:|--------:|:-----------------------------------------------------------------------|
| maxvit_xlarge_tf_512.in21k_ft_in1k | 88.5 | 475.8 | 534.1 | 1413.2 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k) |
| maxvit_xlarge_tf_384.in21k_ft_in1k | 88.3 | 475.3 | 292.8 | 668.8 | [link](https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k) |
| maxvit_base_tf_512.in21k_ft_in1k | 88.2 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k) |
| maxvit_large_tf_512.in21k_ft_in1k | 88 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k) |
| maxvit_large_tf_384.in21k_ft_in1k | 88 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k) |
| maxvit_base_tf_384.in21k_ft_in1k | 87.9 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k) |
| maxvit_base_tf_512.in1k | 86.6 | 119.9 | 138 | 704 | [link](https://huggingface.co/timm/maxvit_base_tf_512.in1k) |
| maxvit_large_tf_512.in1k | 86.5 | 212.3 | 244.8 | 942.2 | [link](https://huggingface.co/timm/maxvit_large_tf_512.in1k) |
| maxvit_base_tf_384.in1k | 86.3 | 119.6 | 73.8 | 332.9 | [link](https://huggingface.co/timm/maxvit_base_tf_384.in1k) |
| maxvit_large_tf_384.in1k | 86.2 | 212 | 132.6 | 445.8 | [link](https://huggingface.co/timm/maxvit_large_tf_384.in1k) |
| maxvit_small_tf_512.in1k | 86.1 | 69.1 | 67.3 | 383.8 | [link](https://huggingface.co/timm/maxvit_small_tf_512.in1k) |
| maxvit_tiny_tf_512.in1k | 85.7 | 31 | 33.5 | 257.6 | [link](https://huggingface.co/timm/maxvit_tiny_tf_512.in1k) |
| maxvit_small_tf_384.in1k | 85.5 | 69 | 35.9 | 183.6 | [link](https://huggingface.co/timm/maxvit_small_tf_384.in1k) |
| maxvit_tiny_tf_384.in1k | 85.1 | 31 | 17.5 | 123.4 | [link](https://huggingface.co/timm/maxvit_tiny_tf_384.in1k) |
| maxvit_large_tf_224.in1k | 84.9 | 211.8 | 43.7 | 127.4 | [link](https://huggingface.co/timm/maxvit_large_tf_224.in1k) |
| maxvit_base_tf_224.in1k | 84.9 | 119.5 | 24 | 95 | [link](https://huggingface.co/timm/maxvit_base_tf_224.in1k) |
| maxvit_small_tf_224.in1k | 84.4 | 68.9 | 11.7 | 53.2 | [link](https://huggingface.co/timm/maxvit_small_tf_224.in1k) |
| maxvit_tiny_tf_224.in1k | 83.4 | 30.9 | 5.6 | 35.8 | [link](https://huggingface.co/timm/maxvit_tiny_tf_224.in1k) |
### Oct 15, 2022
* Train and validation script enhancements
* Non-GPU (ie CPU) device support
* SLURM compatibility for train script
* HF datasets support (via ReaderHfds)
* TFDS/WDS dataloading improvements (sample padding/wrap for distributed use fixed wrt sample count estimate)
* in_chans !=3 support for scripts / loader
* Adan optimizer
* Can enable per-step LR scheduling via args
* Dataset 'parsers' renamed to 'readers', more descriptive of purpose
* AMP args changed, APEX via `--amp-impl apex`, bfloat16 supportedf via `--amp-dtype bfloat16`
* main branch switched to 0.7.x version, 0.6x forked for stable release of weight only adds
* master -> main branch rename
### Oct 10, 2022
* More weights in `maxxvit` series, incl first ConvNeXt block based `coatnext` and `maxxvit` experiments:
* `coatnext_nano_rw_224` - 82.0 @ 224 (G) -- (uses ConvNeXt conv block, no BatchNorm)
* `maxxvit_rmlp_nano_rw_256` - 83.0 @ 256, 83.7 @ 320 (G) (uses ConvNeXt conv block, no BN)
* `maxvit_rmlp_small_rw_224` - 84.5 @ 224, 85.1 @ 320 (G)
* `maxxvit_rmlp_small_rw_256` - 84.6 @ 256, 84.9 @ 288 (G) -- could be trained better, hparams need tuning (uses ConvNeXt block, no BN)
* `coatnet_rmlp_2_rw_224` - 84.6 @ 224, 85 @ 320 (T)
* NOTE: official MaxVit weights (in1k) have been released at https://github.com/google-research/maxvit -- some extra work is needed to port and adapt since my impl was created independently of theirs and has a few small differences + the whole TF same padding fun.
### Sept 23, 2022
* LAION-2B CLIP image towers supported as pretrained backbones for fine-tune or features (no classifier)
* vit_base_patch32_224_clip_laion2b
* vit_large_patch14_224_clip_laion2b
* vit_huge_patch14_224_clip_laion2b
* vit_giant_patch14_224_clip_laion2b
### Sept 7, 2022
* Hugging Face [`timm` docs](https://huggingface.co/docs/hub/timm) home now exists, look for more here in the future
* Add BEiT-v2 weights for base and large 224x224 models from https://github.com/microsoft/unilm/tree/master/beit2
* Add more weights in `maxxvit` series incl a `pico` (7.5M params, 1.9 GMACs), two `tiny` variants:
* `maxvit_rmlp_pico_rw_256` - 80.5 @ 256, 81.3 @ 320 (T)
* `maxvit_tiny_rw_224` - 83.5 @ 224 (G)
* `maxvit_rmlp_tiny_rw_256` - 84.2 @ 256, 84.8 @ 320 (T)
### Aug 29, 2022
* MaxVit window size scales with img_size by default. Add new RelPosMlp MaxViT weight that leverages this:
* `maxvit_rmlp_nano_rw_256` - 83.0 @ 256, 83.6 @ 320 (T)
### Aug 26, 2022
* CoAtNet (https://arxiv.org/abs/2106.04803) and MaxVit (https://arxiv.org/abs/2204.01697) `timm` original models
* both found in [`maxxvit.py`](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/maxxvit.py) model def, contains numerous experiments outside scope of original papers
* an unfinished Tensorflow version from MaxVit authors can be found https://github.com/google-research/maxvit
* Initial CoAtNet and MaxVit timm pretrained weights (working on more):
* `coatnet_nano_rw_224` - 81.7 @ 224 (T)
* `coatnet_rmlp_nano_rw_224` - 82.0 @ 224, 82.8 @ 320 (T)
* `coatnet_0_rw_224` - 82.4 (T) -- NOTE timm '0' coatnets have 2 more 3rd stage blocks
* `coatnet_bn_0_rw_224` - 82.4 (T)
* `maxvit_nano_rw_256` - 82.9 @ 256 (T)
* `coatnet_rmlp_1_rw_224` - 83.4 @ 224, 84 @ 320 (T)
* `coatnet_1_rw_224` - 83.6 @ 224 (G)
* (T) = TPU trained with `bits_and_tpu` branch training code, (G) = GPU trained
* GCVit (weights adapted from https://github.com/NVlabs/GCVit, code 100% `timm` re-write for license purposes)
* MViT-V2 (multi-scale vit, adapted from https://github.com/facebookresearch/mvit)
* EfficientFormer (adapted from https://github.com/snap-research/EfficientFormer)
* PyramidVisionTransformer-V2 (adapted from https://github.com/whai362/PVT)
* 'Fast Norm' support for LayerNorm and GroupNorm that avoids float32 upcast w/ AMP (uses APEX LN if available for further boost)
### Aug 15, 2022
* ConvNeXt atto weights added
* `convnext_atto` - 75.7 @ 224, 77.0 @ 288
* `convnext_atto_ols` - 75.9 @ 224, 77.2 @ 288
### Aug 5, 2022
* More custom ConvNeXt smaller model defs with weights
* `convnext_femto` - 77.5 @ 224, 78.7 @ 288
* `convnext_femto_ols` - 77.9 @ 224, 78.9 @ 288
* `convnext_pico` - 79.5 @ 224, 80.4 @ 288
* `convnext_pico_ols` - 79.5 @ 224, 80.5 @ 288
* `convnext_nano_ols` - 80.9 @ 224, 81.6 @ 288
* Updated EdgeNeXt to improve ONNX export, add new base variant and weights from original (https://github.com/mmaaz60/EdgeNeXt)
### July 28, 2022
* Add freshly minted DeiT-III Medium (width=512, depth=12, num_heads=8) model weights. Thanks [Hugo Touvron](https://github.com/TouvronHugo)!
### July 27, 2022
* All runtime benchmark and validation result csv files are up-to-date!
@ -133,42 +312,3 @@ More models, more fixes
* TinyNet models added by [rsomani95](https://github.com/rsomani95)
* LCNet added via MobileNetV3 architecture
### Nov 22, 2021
* A number of updated weights anew new model defs
* `eca_halonext26ts` - 79.5 @ 256
* `resnet50_gn` (new) - 80.1 @ 224, 81.3 @ 288
* `resnet50` - 80.7 @ 224, 80.9 @ 288 (trained at 176, not replacing current a1 weights as default since these don't scale as well to higher res, [weights](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1h2_176-001a1197.pth))
* `resnext50_32x4d` - 81.1 @ 224, 82.0 @ 288
* `sebotnet33ts_256` (new) - 81.2 @ 224
* `lamhalobotnet50ts_256` - 81.5 @ 256
* `halonet50ts` - 81.7 @ 256
* `halo2botnet50ts_256` - 82.0 @ 256
* `resnet101` - 82.0 @ 224, 82.8 @ 288
* `resnetv2_101` (new) - 82.1 @ 224, 83.0 @ 288
* `resnet152` - 82.8 @ 224, 83.5 @ 288
* `regnetz_d8` (new) - 83.5 @ 256, 84.0 @ 320
* `regnetz_e8` (new) - 84.5 @ 256, 85.0 @ 320
* `vit_base_patch8_224` (85.8 top-1) & `in21k` variant weights added thanks [Martins Bruveris](https://github.com/martinsbruveris)
* Groundwork in for FX feature extraction thanks to [Alexander Soare](https://github.com/alexander-soare)
* models updated for tracing compatibility (almost full support with some distlled transformer exceptions)
### Oct 19, 2021
* ResNet strikes back (https://arxiv.org/abs/2110.00476) weights added, plus any extra training components used. Model weights and some more details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-rsb-weights)
* BCE loss and Repeated Augmentation support for RSB paper
* 4 series of ResNet based attention model experiments being added (implemented across byobnet.py/byoanet.py). These include all sorts of attention, from channel attn like SE, ECA to 2D QKV self-attention layers such as Halo, Bottlneck, Lambda. Details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* Working implementations of the following 2D self-attention modules (likely to be differences from paper or eventual official impl):
* Halo (https://arxiv.org/abs/2103.12731)
* Bottleneck Transformer (https://arxiv.org/abs/2101.11605)
* LambdaNetworks (https://arxiv.org/abs/2102.08602)
* A RegNetZ series of models with some attention experiments (being added to). These do not follow the paper (https://arxiv.org/abs/2103.06877) in any way other than block architecture, details of official models are not available. See more here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* ConvMixer (https://openreview.net/forum?id=TVHS5Y4dNvM), CrossVit (https://arxiv.org/abs/2103.14899), and BeiT (https://arxiv.org/abs/2106.08254) architectures + weights added
* freeze/unfreeze helpers by [Alexander Soare](https://github.com/alexander-soare)
### Aug 18, 2021
* Optimizer bonanza!
* Add LAMB and LARS optimizers, incl trust ratio clipping options. Tweaked to work properly in PyTorch XLA (tested on TPUs w/ `timm bits` [branch](https://github.com/rwightman/pytorch-image-models/tree/bits_and_tpu/timm/bits))
* Add MADGRAD from FB research w/ a few tweaks (decoupled decay option, step handling that works with PyTorch XLA)
* Some cleanup on all optimizers and factory. No more `.data`, a bit more consistency, unit tests for all!
* SGDP and AdamP still won't work with PyTorch XLA but others should (have yet to test Adabelief, Adafactor, Adahessian myself).
* EfficientNet-V2 XL TF ported weights added, but they don't validate well in PyTorch (L is better). The pre-processing for the V2 TF training is a bit diff and the fine-tuned 21k -> 1k weights are very sensitive and less robust than the 1k weights.
* Added PyTorch trained EfficientNet-V2 'Tiny' w/ GlobalContext attn weights. Only .1-.2 top-1 better than the SE so more of a curiosity for those interested.

@ -0,0 +1,14 @@
# Hugging Face Timm Docs
## Getting Started
```
pip install git+https://github.com/huggingface/doc-builder.git@main#egg=hf-doc-builder
pip install watchdog black
```
## Preview the Docs Locally
```
doc-builder preview timm hfdocs/source
```

@ -1,149 +1,160 @@
- sections:
- local: index
title: Pytorch Image Models (timm)
title: Home
- local: quickstart
title: Quickstart
- local: installation
title: Installation
title: Get started
- sections:
- local: feature_extraction
title: Using Pretrained Models as Feature Extractors
- local: training_script
title: Training With The Official Training Script
- local: hf_hub
title: Share and Load Models from the 🤗 Hugging Face Hub
title: Tutorials
- sections:
- local: models
title: Model Summaries
- local: results
title: Results
- local: scripts
title: Scripts
- local: training_hparam_examples
title: Training Examples
- local: feature_extraction
title: Feature Extraction
- local: changes
title: Recent Changes
- local: archived_changes
title: Archived Changes
- local: model_pages
title: Model Pages
isExpanded: false
sections:
- local: models/adversarial-inception-v3
title: Adversarial Inception v3
- local: models/advprop
title: AdvProp (EfficientNet)
- local: models/big-transfer
title: Big Transfer (BiT)
- local: models/csp-darknet
title: CSP-DarkNet
- local: models/csp-resnet
title: CSP-ResNet
- local: models/csp-resnext
title: CSP-ResNeXt
- local: models/densenet
title: DenseNet
- local: models/dla
title: Deep Layer Aggregation
- local: models/dpn
title: Dual Path Network (DPN)
- local: models/ecaresnet
title: ECA-ResNet
- local: models/efficientnet
title: EfficientNet
- local: models/efficientnet-pruned
title: EfficientNet (Knapsack Pruned)
- local: models/ensemble-adversarial
title: Ensemble Adversarial Inception ResNet v2
- local: models/ese-vovnet
title: ESE-VoVNet
- local: models/fbnet
title: FBNet
- local: models/gloun-inception-v3
title: (Gluon) Inception v3
- local: models/gloun-resnet
title: (Gluon) ResNet
- local: models/gloun-resnext
title: (Gluon) ResNeXt
- local: models/gloun-senet
title: (Gluon) SENet
- local: models/gloun-seresnext
title: (Gluon) SE-ResNeXt
- local: models/gloun-xception
title: (Gluon) Xception
- local: models/hrnet
title: HRNet
- local: models/ig-resnext
title: Instagram ResNeXt WSL
- local: models/inception-resnet-v2
title: Inception ResNet v2
- local: models/inception-v3
title: Inception v3
- local: models/inception-v4
title: Inception v4
- local: models/legacy-se-resnet
title: (Legacy) SE-ResNet
- local: models/legacy-se-resnext
title: (Legacy) SE-ResNeXt
- local: models/legacy-senet
title: (Legacy) SENet
- local: models/mixnet
title: MixNet
- local: models/mnasnet
title: MnasNet
- local: models/mobilenet-v2
title: MobileNet v2
- local: models/mobilenet-v3
title: MobileNet v3
- local: models/nasnet
title: NASNet
- local: models/noisy-student
title: Noisy Student (EfficientNet)
- local: models/pnasnet
title: PNASNet
- local: models/regnetx
title: RegNetX
- local: models/regnety
title: RegNetY
- local: models/res2net
title: Res2Net
- local: models/res2next
title: Res2NeXt
- local: models/resnest
title: ResNeSt
- local: models/resnet
title: ResNet
- local: models/resnet-d
title: ResNet-D
- local: models/resnext
title: ResNeXt
- local: models/rexnet
title: RexNet
- local: models/se-resnet
title: SE-ResNet
- local: models/selecsls
title: SelecSLS
- local: models/seresnext
title: SE-ResNeXt
- local: models/skresnet
title: SK-ResNet
- local: models/skresnext
title: SK-ResNeXt
- local: models/spnasnet
title: SPNASNet
- local: models/ssl-resnet
title: SSL ResNet
- local: models/swsl-resnet
title: SWSL ResNet
- local: models/swsl-resnext
title: SWSL ResNeXt
- local: models/tf-efficientnet
title: (Tensorflow) EfficientNet
- local: models/tf-efficientnet-condconv
title: (Tensorflow) EfficientNet CondConv
- local: models/tf-efficientnet-lite
title: (Tensorflow) EfficientNet Lite
- local: models/tf-inception-v3
title: (Tensorflow) Inception v3
- local: models/tf-mixnet
title: (Tensorflow) MixNet
- local: models/tf-mobilenet-v3
title: (Tensorflow) MobileNet v3
- local: models/tresnet
title: TResNet
- local: models/wide-resnet
title: Wide ResNet
- local: models/xception
title: Xception
title: Get started
- local: models/adversarial-inception-v3
title: Adversarial Inception v3
- local: models/advprop
title: AdvProp (EfficientNet)
- local: models/big-transfer
title: Big Transfer (BiT)
- local: models/csp-darknet
title: CSP-DarkNet
- local: models/csp-resnet
title: CSP-ResNet
- local: models/csp-resnext
title: CSP-ResNeXt
- local: models/densenet
title: DenseNet
- local: models/dla
title: Deep Layer Aggregation
- local: models/dpn
title: Dual Path Network (DPN)
- local: models/ecaresnet
title: ECA-ResNet
- local: models/efficientnet
title: EfficientNet
- local: models/efficientnet-pruned
title: EfficientNet (Knapsack Pruned)
- local: models/ensemble-adversarial
title: Ensemble Adversarial Inception ResNet v2
- local: models/ese-vovnet
title: ESE-VoVNet
- local: models/fbnet
title: FBNet
- local: models/gloun-inception-v3
title: (Gluon) Inception v3
- local: models/gloun-resnet
title: (Gluon) ResNet
- local: models/gloun-resnext
title: (Gluon) ResNeXt
- local: models/gloun-senet
title: (Gluon) SENet
- local: models/gloun-seresnext
title: (Gluon) SE-ResNeXt
- local: models/gloun-xception
title: (Gluon) Xception
- local: models/hrnet
title: HRNet
- local: models/ig-resnext
title: Instagram ResNeXt WSL
- local: models/inception-resnet-v2
title: Inception ResNet v2
- local: models/inception-v3
title: Inception v3
- local: models/inception-v4
title: Inception v4
- local: models/legacy-se-resnet
title: (Legacy) SE-ResNet
- local: models/legacy-se-resnext
title: (Legacy) SE-ResNeXt
- local: models/legacy-senet
title: (Legacy) SENet
- local: models/mixnet
title: MixNet
- local: models/mnasnet
title: MnasNet
- local: models/mobilenet-v2
title: MobileNet v2
- local: models/mobilenet-v3
title: MobileNet v3
- local: models/nasnet
title: NASNet
- local: models/noisy-student
title: Noisy Student (EfficientNet)
- local: models/pnasnet
title: PNASNet
- local: models/regnetx
title: RegNetX
- local: models/regnety
title: RegNetY
- local: models/res2net
title: Res2Net
- local: models/res2next
title: Res2NeXt
- local: models/resnest
title: ResNeSt
- local: models/resnet
title: ResNet
- local: models/resnet-d
title: ResNet-D
- local: models/resnext
title: ResNeXt
- local: models/rexnet
title: RexNet
- local: models/se-resnet
title: SE-ResNet
- local: models/selecsls
title: SelecSLS
- local: models/seresnext
title: SE-ResNeXt
- local: models/skresnet
title: SK-ResNet
- local: models/skresnext
title: SK-ResNeXt
- local: models/spnasnet
title: SPNASNet
- local: models/ssl-resnet
title: SSL ResNet
- local: models/swsl-resnet
title: SWSL ResNet
- local: models/swsl-resnext
title: SWSL ResNeXt
- local: models/tf-efficientnet
title: (Tensorflow) EfficientNet
- local: models/tf-efficientnet-condconv
title: (Tensorflow) EfficientNet CondConv
- local: models/tf-efficientnet-lite
title: (Tensorflow) EfficientNet Lite
- local: models/tf-inception-v3
title: (Tensorflow) Inception v3
- local: models/tf-mixnet
title: (Tensorflow) MixNet
- local: models/tf-mobilenet-v3
title: (Tensorflow) MobileNet v3
- local: models/tresnet
title: TResNet
- local: models/wide-resnet
title: Wide ResNet
- local: models/xception
title: Xception
title: Model Pages
isExpanded: false
- sections:
- local: reference/models
title: Models
- local: reference/data
title: Data
- local: reference/optimizers
title: Optimizers
- local: reference/schedulers
title: Learning Rate Schedulers
title: Reference

@ -1,418 +0,0 @@
# Archived Changes
### July 12, 2021
* Add XCiT models from [official facebook impl](https://github.com/facebookresearch/xcit). Contributed by [Alexander Soare](https://github.com/alexander-soare)
### July 5-9, 2021
* Add `efficientnetv2_rw_t` weights, a custom 'tiny' 13.6M param variant that is a bit better than (non NoisyStudent) B3 models. Both faster and better accuracy (at same or lower res)
* top-1 82.34 @ 288x288 and 82.54 @ 320x320
* Add [SAM pretrained](https://arxiv.org/abs/2106.01548) in1k weight for ViT B/16 (`vit_base_patch16_sam_224`) and B/32 (`vit_base_patch32_sam_224`) models.
* Add 'Aggregating Nested Transformer' (NesT) w/ weights converted from official [Flax impl](https://github.com/google-research/nested-transformer). Contributed by [Alexander Soare](https://github.com/alexander-soare).
* `jx_nest_base` - 83.534, `jx_nest_small` - 83.120, `jx_nest_tiny` - 81.426
### June 23, 2021
* Reproduce gMLP model training, `gmlp_s16_224` trained to 79.6 top-1, matching [paper](https://arxiv.org/abs/2105.08050). Hparams for this and other recent MLP training [here](https://gist.github.com/rwightman/d6c264a9001f9167e06c209f630b2cc6)
### June 20, 2021
* Release Vision Transformer 'AugReg' weights from [How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers](https://arxiv.org/abs/2106.10270)
* .npz weight loading support added, can load any of the 50K+ weights from the [AugReg series](https://console.cloud.google.com/storage/browser/vit_models/augreg)
* See [example notebook](https://colab.research.google.com/github/google-research/vision_transformer/blob/master/vit_jax_augreg.ipynb) from [official impl](https://github.com/google-research/vision_transformer/) for navigating the augreg weights
* Replaced all default weights w/ best AugReg variant (if possible). All AugReg 21k classifiers work.
* Highlights: `vit_large_patch16_384` (87.1 top-1), `vit_large_r50_s32_384` (86.2 top-1), `vit_base_patch16_384` (86.0 top-1)
* `vit_deit_*` renamed to just `deit_*`
* Remove my old small model, replace with DeiT compatible small w/ AugReg weights
* Add 1st training of my `gmixer_24_224` MLP /w GLU, 78.1 top-1 w/ 25M params.
* Add weights from official ResMLP release (https://github.com/facebookresearch/deit)
* Add `eca_nfnet_l2` weights from my 'lightweight' series. 84.7 top-1 at 384x384.
* Add distilled BiT 50x1 student and 152x2 Teacher weights from [Knowledge distillation: A good teacher is patient and consistent](https://arxiv.org/abs/2106.05237)
* NFNets and ResNetV2-BiT models work w/ Pytorch XLA now
* weight standardization uses F.batch_norm instead of std_mean (std_mean wasn't lowered)
* eps values adjusted, will be slight differences but should be quite close
* Improve test coverage and classifier interface of non-conv (vision transformer and mlp) models
* Cleanup a few classifier / flatten details for models w/ conv classifiers or early global pool
* Please report any regressions, this PR touched quite a few models.
### June 8, 2021
* Add first ResMLP weights, trained in PyTorch XLA on TPU-VM w/ my XLA branch. 24 block variant, 79.2 top-1.
* Add ResNet51-Q model w/ pretrained weights at 82.36 top-1.
* NFNet inspired block layout with quad layer stem and no maxpool
* Same param count (35.7M) and throughput as ResNetRS-50 but +1.5 top-1 @ 224x224 and +2.5 top-1 at 288x288
### May 25, 2021
* Add LeViT, Visformer, Convit (PR by Aman Arora), Twins (PR by paper authors) transformer models
* Cleanup input_size/img_size override handling and testing for all vision transformer models
* Add `efficientnetv2_rw_m` model and weights (started training before official code). 84.8 top-1, 53M params.
### May 14, 2021
* Add EfficientNet-V2 official model defs w/ ported weights from official [Tensorflow/Keras](https://github.com/google/automl/tree/master/efficientnetv2) impl.
* 1k trained variants: `tf_efficientnetv2_s/m/l`
* 21k trained variants: `tf_efficientnetv2_s/m/l_in21k`
* 21k pretrained -> 1k fine-tuned: `tf_efficientnetv2_s/m/l_in21ft1k`
* v2 models w/ v1 scaling: `tf_efficientnetv2_b0` through `b3`
* Rename my prev V2 guess `efficientnet_v2s` -> `efficientnetv2_rw_s`
* Some blank `efficientnetv2_*` models in-place for future native PyTorch training
### May 5, 2021
* Add MLP-Mixer models and port pretrained weights from [Google JAX impl](https://github.com/google-research/vision_transformer/tree/linen)
* Add CaiT models and pretrained weights from [FB](https://github.com/facebookresearch/deit)
* Add ResNet-RS models and weights from [TF](https://github.com/tensorflow/tpu/tree/master/models/official/resnet/resnet_rs). Thanks [Aman Arora](https://github.com/amaarora)
* Add CoaT models and weights. Thanks [Mohammed Rizin](https://github.com/morizin)
* Add new ImageNet-21k weights & finetuned weights for TResNet, MobileNet-V3, ViT models. Thanks [mrT](https://github.com/mrT23)
* Add GhostNet models and weights. Thanks [Kai Han](https://github.com/iamhankai)
* Update ByoaNet attention modles
* Improve SA module inits
* Hack together experimental stand-alone Swin based attn module and `swinnet`
* Consistent '26t' model defs for experiments.
* Add improved Efficientnet-V2S (prelim model def) weights. 83.8 top-1.
* WandB logging support
### April 13, 2021
* Add Swin Transformer models and weights from https://github.com/microsoft/Swin-Transformer
### April 12, 2021
* Add ECA-NFNet-L1 (slimmed down F1 w/ SiLU, 41M params) trained with this code. 84% top-1 @ 320x320. Trained at 256x256.
* Add EfficientNet-V2S model (unverified model definition) weights. 83.3 top-1 @ 288x288. Only trained single res 224. Working on progressive training.
* Add ByoaNet model definition (Bring-your-own-attention) w/ SelfAttention block and corresponding SA/SA-like modules and model defs
* Lambda Networks - https://arxiv.org/abs/2102.08602
* Bottleneck Transformers - https://arxiv.org/abs/2101.11605
* Halo Nets - https://arxiv.org/abs/2103.12731
* Adabelief optimizer contributed by Juntang Zhuang
### April 1, 2021
* Add snazzy `benchmark.py` script for bulk `timm` model benchmarking of train and/or inference
* Add Pooling-based Vision Transformer (PiT) models (from https://github.com/naver-ai/pit)
* Merged distilled variant into main for torchscript compatibility
* Some `timm` cleanup/style tweaks and weights have hub download support
* Cleanup Vision Transformer (ViT) models
* Merge distilled (DeiT) model into main so that torchscript can work
* Support updated weight init (defaults to old still) that closer matches original JAX impl (possibly better training from scratch)
* Separate hybrid model defs into different file and add several new model defs to fiddle with, support patch_size != 1 for hybrids
* Fix fine-tuning num_class changes (PiT and ViT) and pos_embed resizing (Vit) with distilled variants
* nn.Sequential for block stack (does not break downstream compat)
* TnT (Transformer-in-Transformer) models contributed by author (from https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/TNT)
* Add RegNetY-160 weights from DeiT teacher model
* Add new NFNet-L0 w/ SE attn (rename `nfnet_l0b`->`nfnet_l0`) weights 82.75 top-1 @ 288x288
* Some fixes/improvements for TFDS dataset wrapper
### March 7, 2021
* First 0.4.x PyPi release w/ NFNets (& related), ByoB (GPU-Efficient, RepVGG, etc).
* Change feature extraction for pre-activation nets (NFNets, ResNetV2) to return features before activation.
### Feb 18, 2021
* Add pretrained weights and model variants for NFNet-F* models from [DeepMind Haiku impl](https://github.com/deepmind/deepmind-research/tree/master/nfnets).
* Models are prefixed with `dm_`. They require SAME padding conv, skipinit enabled, and activation gains applied in act fn.
* These models are big, expect to run out of GPU memory. With the GELU activiation + other options, they are roughly 1/2 the inference speed of my SiLU PyTorch optimized `s` variants.
* Original model results are based on pre-processing that is not the same as all other models so you'll see different results in the results csv (once updated).
* Matching the original pre-processing as closely as possible I get these results:
* `dm_nfnet_f6` - 86.352
* `dm_nfnet_f5` - 86.100
* `dm_nfnet_f4` - 85.834
* `dm_nfnet_f3` - 85.676
* `dm_nfnet_f2` - 85.178
* `dm_nfnet_f1` - 84.696
* `dm_nfnet_f0` - 83.464
### Feb 16, 2021
* Add Adaptive Gradient Clipping (AGC) as per https://arxiv.org/abs/2102.06171. Integrated w/ PyTorch gradient clipping via mode arg that defaults to prev 'norm' mode. For backward arg compat, clip-grad arg must be specified to enable when using train.py.
* AGC w/ default clipping factor `--clip-grad .01 --clip-mode agc`
* PyTorch global norm of 1.0 (old behaviour, always norm), `--clip-grad 1.0`
* PyTorch value clipping of 10, `--clip-grad 10. --clip-mode value`
* AGC performance is definitely sensitive to the clipping factor. More experimentation needed to determine good values for smaller batch sizes and optimizers besides those in paper. So far I've found .001-.005 is necessary for stable RMSProp training w/ NFNet/NF-ResNet.
### Feb 12, 2021
* Update Normalization-Free nets to include new NFNet-F (https://arxiv.org/abs/2102.06171) model defs
### Feb 10, 2021
* More model archs, incl a flexible ByobNet backbone ('Bring-your-own-blocks')
* GPU-Efficient-Networks (https://github.com/idstcv/GPU-Efficient-Networks), impl in `byobnet.py`
* RepVGG (https://github.com/DingXiaoH/RepVGG), impl in `byobnet.py`
* classic VGG (from torchvision, impl in `vgg`)
* Refinements to normalizer layer arg handling and normalizer+act layer handling in some models
* Default AMP mode changed to native PyTorch AMP instead of APEX. Issues not being fixed with APEX. Native works with `--channels-last` and `--torchscript` model training, APEX does not.
* Fix a few bugs introduced since last pypi release
### Feb 8, 2021
* Add several ResNet weights with ECA attention. 26t & 50t trained @ 256, test @ 320. 269d train @ 256, fine-tune @320, test @ 352.
* `ecaresnet26t` - 79.88 top-1 @ 320x320, 79.08 @ 256x256
* `ecaresnet50t` - 82.35 top-1 @ 320x320, 81.52 @ 256x256
* `ecaresnet269d` - 84.93 top-1 @ 352x352, 84.87 @ 320x320
* Remove separate tiered (`t`) vs tiered_narrow (`tn`) ResNet model defs, all `tn` changed to `t` and `t` models removed (`seresnext26t_32x4d` only model w/ weights that was removed).
* Support model default_cfgs with separate train vs test resolution `test_input_size` and remove extra `_320` suffix ResNet model defs that were just for test.
### Jan 30, 2021
* Add initial "Normalization Free" NF-RegNet-B* and NF-ResNet model definitions based on [paper](https://arxiv.org/abs/2101.08692)
### Jan 25, 2021
* Add ResNetV2 Big Transfer (BiT) models w/ ImageNet-1k and 21k weights from https://github.com/google-research/big_transfer
* Add official R50+ViT-B/16 hybrid models + weights from https://github.com/google-research/vision_transformer
* ImageNet-21k ViT weights are added w/ model defs and representation layer (pre logits) support
* NOTE: ImageNet-21k classifier heads were zero'd in original weights, they are only useful for transfer learning
* Add model defs and weights for DeiT Vision Transformer models from https://github.com/facebookresearch/deit
* Refactor dataset classes into ImageDataset/IterableImageDataset + dataset specific parser classes
* Add Tensorflow-Datasets (TFDS) wrapper to allow use of TFDS image classification sets with train script
* Ex: `train.py /data/tfds --dataset tfds/oxford_iiit_pet --val-split test --model resnet50 -b 256 --amp --num-classes 37 --opt adamw --lr 3e-4 --weight-decay .001 --pretrained -j 2`
* Add improved .tar dataset parser that reads images from .tar, folder of .tar files, or .tar within .tar
* Run validation on full ImageNet-21k directly from tar w/ BiT model: `validate.py /data/fall11_whole.tar --model resnetv2_50x1_bitm_in21k --amp`
* Models in this update should be stable w/ possible exception of ViT/BiT, possibility of some regressions with train/val scripts and dataset handling
### Jan 3, 2021
* Add SE-ResNet-152D weights
* 256x256 val, 0.94 crop top-1 - 83.75
* 320x320 val, 1.0 crop - 84.36
* Update results files
### Dec 18, 2020
* Add ResNet-101D, ResNet-152D, and ResNet-200D weights trained @ 256x256
* 256x256 val, 0.94 crop (top-1) - 101D (82.33), 152D (83.08), 200D (83.25)
* 288x288 val, 1.0 crop - 101D (82.64), 152D (83.48), 200D (83.76)
* 320x320 val, 1.0 crop - 101D (83.00), 152D (83.66), 200D (84.01)
### Dec 7, 2020
* Simplify EMA module (ModelEmaV2), compatible with fully torchscripted models
* Misc fixes for SiLU ONNX export, default_cfg missing from Feature extraction models, Linear layer w/ AMP + torchscript
* PyPi release @ 0.3.2 (needed by EfficientDet)
### Oct 30, 2020
* Test with PyTorch 1.7 and fix a small top-n metric view vs reshape issue.
* Convert newly added 224x224 Vision Transformer weights from official JAX repo. 81.8 top-1 for B/16, 83.1 L/16.
* Support PyTorch 1.7 optimized, native SiLU (aka Swish) activation. Add mapping to 'silu' name, custom swish will eventually be deprecated.
* Fix regression for loading pretrained classifier via direct model entrypoint functions. Didn't impact create_model() factory usage.
* PyPi release @ 0.3.0 version!
### Oct 26, 2020
* Update Vision Transformer models to be compatible with official code release at https://github.com/google-research/vision_transformer
* Add Vision Transformer weights (ImageNet-21k pretrain) for 384x384 base and large models converted from official jax impl
* ViT-B/16 - 84.2
* ViT-B/32 - 81.7
* ViT-L/16 - 85.2
* ViT-L/32 - 81.5
### Oct 21, 2020
* Weights added for Vision Transformer (ViT) models. 77.86 top-1 for 'small' and 79.35 for 'base'. Thanks to [Christof](https://www.kaggle.com/christofhenkel) for training the base model w/ lots of GPUs.
### Oct 13, 2020
* Initial impl of Vision Transformer models. Both patch and hybrid (CNN backbone) variants. Currently trying to train...
* Adafactor and AdaHessian (FP32 only, no AMP) optimizers
* EdgeTPU-M (`efficientnet_em`) model trained in PyTorch, 79.3 top-1
* Pip release, doc updates pending a few more changes...
### Sept 18, 2020
* New ResNet 'D' weights. 72.7 (top-1) ResNet-18-D, 77.1 ResNet-34-D, 80.5 ResNet-50-D
* Added a few untrained defs for other ResNet models (66D, 101D, 152D, 200/200D)
### Sept 3, 2020
* New weights
* Wide-ResNet50 - 81.5 top-1 (vs 78.5 torchvision)
* SEResNeXt50-32x4d - 81.3 top-1 (vs 79.1 cadene)
* Support for native Torch AMP and channels_last memory format added to train/validate scripts (`--channels-last`, `--native-amp` vs `--apex-amp`)
* Models tested with channels_last on latest NGC 20.08 container. AdaptiveAvgPool in attn layers changed to mean((2,3)) to work around bug with NHWC kernel.
### Aug 12, 2020
* New/updated weights from training experiments
* EfficientNet-B3 - 82.1 top-1 (vs 81.6 for official with AA and 81.9 for AdvProp)
* RegNetY-3.2GF - 82.0 top-1 (78.9 from official ver)
* CSPResNet50 - 79.6 top-1 (76.6 from official ver)
* Add CutMix integrated w/ Mixup. See [pull request](https://github.com/rwightman/pytorch-image-models/pull/218) for some usage examples
* Some fixes for using pretrained weights with `in_chans` != 3 on several models.
### Aug 5, 2020
Universal feature extraction, new models, new weights, new test sets.
* All models support the `features_only=True` argument for `create_model` call to return a network that extracts feature maps from the deepest layer at each stride.
* New models
* CSPResNet, CSPResNeXt, CSPDarkNet, DarkNet
* ReXNet
* (Modified Aligned) Xception41/65/71 (a proper port of TF models)
* New trained weights
* SEResNet50 - 80.3 top-1
* CSPDarkNet53 - 80.1 top-1
* CSPResNeXt50 - 80.0 top-1
* DPN68b - 79.2 top-1
* EfficientNet-Lite0 (non-TF ver) - 75.5 (submitted by [@hal-314](https://github.com/hal-314))
* Add 'real' labels for ImageNet and ImageNet-Renditions test set, see [`results/README.md`](results/README.md)
* Test set ranking/top-n diff script by [@KushajveerSingh](https://github.com/KushajveerSingh)
* Train script and loader/transform tweaks to punch through more aug arguments
* README and documentation overhaul. See initial (WIP) documentation at https://rwightman.github.io/pytorch-image-models/
* adamp and sgdp optimizers added by [@hellbell](https://github.com/hellbell)
### June 11, 2020
Bunch of changes:
* DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions
* VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1
* Activation factory added along with new activations:
* select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export)
* hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish
* context mgr for setting exportable/scriptable/no_jit states
* Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface
* Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this
* Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call
* Prep for 0.1.28 pip release
### May 12, 2020
* Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt, paper https://arxiv.org/abs/2004.08955))
### May 3, 2020
* Pruned EfficientNet B1, B2, and B3 (https://arxiv.org/abs/2002.08258) contributed by [Yonathan Aflalo](https://github.com/yoniaflalo)
### May 1, 2020
* Merged a number of execellent contributions in the ResNet model family over the past month
* BlurPool2D and resnetblur models initiated by [Chris Ha](https://github.com/VRandme), I trained resnetblur50 to 79.3.
* TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by [mrT23](https://github.com/mrT23)
* ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per (https://arxiv.org/abs/2002.08258) by [Yonathan Aflalo](https://github.com/yoniaflalo)
* 200 pretrained models in total now with updated results csv in results folder
### April 5, 2020
* Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite
* 3.5M param MobileNet-V2 100 @ 73%
* 4.5M param MobileNet-V2 110d @ 75%
* 6.1M param MobileNet-V2 140 @ 76.5%
* 5.8M param MobileNet-V2 120d @ 77.3%
### March 18, 2020
* Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
* Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams)
### April 5, 2020
* Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite
* 3.5M param MobileNet-V2 100 @ 73%
* 4.5M param MobileNet-V2 110d @ 75%
* 6.1M param MobileNet-V2 140 @ 76.5%
* 5.8M param MobileNet-V2 120d @ 77.3%
### March 18, 2020
* Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
* Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams)
### Feb 29, 2020
* New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1
* IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models
* overall results similar to a bit better training from scratch on a few smaller models tried
* performance early in training seems consistently improved but less difference by end
* set `fix_group_fanout=False` in `_init_weight_goog` fn if you need to reproducte past behaviour
* Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training
### Feb 18, 2020
* Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' (https://arxiv.org/abs/2001.06268):
* Move layer/module impl into `layers` subfolder/module of `models` and organize in a more granular fashion
* ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks
* Add Selective Kernel Nets on top of ResNet base, pretrained weights
* skresnet18 - 73% top-1
* skresnet34 - 76.9% top-1
* skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1
* ECA and CECA (circular padding) attention layer contributed by [Chris Ha](https://github.com/VRandme)
* CBAM attention experiment (not the best results so far, may remove)
* Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the `.se` position for all ResNets
* Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants
* Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights
### Feb 12, 2020
* Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet)
### Feb 6, 2020
* Add RandAugment trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams)
### Feb 1/2, 2020
* Port new EfficientNet-B8 (RandAugment) weights, these are different than the B8 AdvProp, different input normalization.
* Update results csv files on all models for ImageNet validation and three other test sets
* Push PyPi package update
### Jan 31, 2020
* Update ResNet50 weights with a new 79.038 result from further JSD / AugMix experiments. Full command line for reproduction in training section below.
### Jan 11/12, 2020
* Master may be a bit unstable wrt to training, these changes have been tested but not all combos
* Implementations of AugMix added to existing RA and AA. Including numerous supporting pieces like JSD loss (Jensen-Shannon divergence + CE), and AugMixDataset
* SplitBatchNorm adaptation layer added for implementing Auxiliary BN as per AdvProp paper
* ResNet-50 AugMix trained model w/ 79% top-1 added
* `seresnext26tn_32x4d` - 77.99 top-1, 93.75 top-5 added to tiered experiment, higher img/s than 't' and 'd'
### Jan 3, 2020
* Add RandAugment trained EfficientNet-B0 weight with 77.7 top-1. Trained by [Michael Klachko](https://github.com/michaelklachko) with this code and recent hparams (see Training section)
* Add `avg_checkpoints.py` script for post training weight averaging and update all scripts with header docstrings and shebangs.
### Dec 30, 2019
* Merge [Dushyant Mehta's](https://github.com/mehtadushy) PR for SelecSLS (Selective Short and Long Range Skip Connections) networks. Good GPU memory consumption and throughput. Original: https://github.com/mehtadushy/SelecSLS-Pytorch
### Dec 28, 2019
* Add new model weights and training hparams (see Training Hparams section)
* `efficientnet_b3` - 81.5 top-1, 95.7 top-5 at default res/crop, 81.9, 95.8 at 320x320 1.0 crop-pct
* trained with RandAugment, ended up with an interesting but less than perfect result (see training section)
* `seresnext26d_32x4d`- 77.6 top-1, 93.6 top-5
* deep stem (32, 32, 64), avgpool downsample
* stem/dowsample from bag-of-tricks paper
* `seresnext26t_32x4d`- 78.0 top-1, 93.7 top-5
* deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant)
* stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments
### Dec 23, 2019
* Add RandAugment trained MixNet-XL weights with 80.48 top-1.
* `--dist-bn` argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval
### Dec 4, 2019
* Added weights from the first training from scratch of an EfficientNet (B2) with my new RandAugment implementation. Much better than my previous B2 and very close to the official AdvProp ones (80.4 top-1, 95.08 top-5).
### Nov 29, 2019
* Brought EfficientNet and MobileNetV3 up to date with my https://github.com/rwightman/gen-efficientnet-pytorch code. Torchscript and ONNX export compat excluded.
* AdvProp weights added
* Official TF MobileNetv3 weights added
* EfficientNet and MobileNetV3 hook based 'feature extraction' classes added. Will serve as basis for using models as backbones in obj detection/segmentation tasks. Lots more to be done here...
* HRNet classification models and weights added from https://github.com/HRNet/HRNet-Image-Classification
* Consistency in global pooling, `reset_classifer`, and `forward_features` across models
* `forward_features` always returns unpooled feature maps now
* Reasonable chance I broke something... let me know
### Nov 22, 2019
* Add ImageNet training RandAugment implementation alongside AutoAugment. PyTorch Transform compatible format, using PIL. Currently training two EfficientNet models from scratch with promising results... will update.
* `drop-connect` cmd line arg finally added to `train.py`, no need to hack model fns. Works for efficientnet/mobilenetv3 based models, ignored otherwise.

@ -1,187 +0,0 @@
# Recent Changes
### July 27, 2022
* All runtime benchmark and validation result csv files are up-to-date!
* A few more weights & model defs added:
* `darknetaa53` - 79.8 @ 256, 80.5 @ 288
* `convnext_nano` - 80.8 @ 224, 81.5 @ 288
* `cs3sedarknet_l` - 81.2 @ 256, 81.8 @ 288
* `cs3darknet_x` - 81.8 @ 256, 82.2 @ 288
* `cs3sedarknet_x` - 82.2 @ 256, 82.7 @ 288
* `cs3edgenet_x` - 82.2 @ 256, 82.7 @ 288
* `cs3se_edgenet_x` - 82.8 @ 256, 83.5 @ 320
* `cs3*` weights above all trained on TPU w/ `bits_and_tpu` branch. Thanks to TRC program!
* Add output_stride=8 and 16 support to ConvNeXt (dilation)
* deit3 models not being able to resize pos_emb fixed
* Version 0.6.7 PyPi release (/w above bug fixes and new weighs since 0.6.5)
### July 8, 2022
More models, more fixes
* Official research models (w/ weights) added:
* EdgeNeXt from (https://github.com/mmaaz60/EdgeNeXt)
* MobileViT-V2 from (https://github.com/apple/ml-cvnets)
* DeiT III (Revenge of the ViT) from (https://github.com/facebookresearch/deit)
* My own models:
* Small `ResNet` defs added by request with 1 block repeats for both basic and bottleneck (resnet10 and resnet14)
* `CspNet` refactored with dataclass config, simplified CrossStage3 (`cs3`) option. These are closer to YOLO-v5+ backbone defs.
* More relative position vit fiddling. Two `srelpos` (shared relative position) models trained, and a medium w/ class token.
* Add an alternate downsample mode to EdgeNeXt and train a `small` model. Better than original small, but not their new USI trained weights.
* My own model weight results (all ImageNet-1k training)
* `resnet10t` - 66.5 @ 176, 68.3 @ 224
* `resnet14t` - 71.3 @ 176, 72.3 @ 224
* `resnetaa50` - 80.6 @ 224 , 81.6 @ 288
* `darknet53` - 80.0 @ 256, 80.5 @ 288
* `cs3darknet_m` - 77.0 @ 256, 77.6 @ 288
* `cs3darknet_focus_m` - 76.7 @ 256, 77.3 @ 288
* `cs3darknet_l` - 80.4 @ 256, 80.9 @ 288
* `cs3darknet_focus_l` - 80.3 @ 256, 80.9 @ 288
* `vit_srelpos_small_patch16_224` - 81.1 @ 224, 82.1 @ 320
* `vit_srelpos_medium_patch16_224` - 82.3 @ 224, 83.1 @ 320
* `vit_relpos_small_patch16_cls_224` - 82.6 @ 224, 83.6 @ 320
* `edgnext_small_rw` - 79.6 @ 224, 80.4 @ 320
* `cs3`, `darknet`, and `vit_*relpos` weights above all trained on TPU thanks to TRC program! Rest trained on overheating GPUs.
* Hugging Face Hub support fixes verified, demo notebook TBA
* Pretrained weights / configs can be loaded externally (ie from local disk) w/ support for head adaptation.
* Add support to change image extensions scanned by `timm` datasets/parsers. See (https://github.com/rwightman/pytorch-image-models/pull/1274#issuecomment-1178303103)
* Default ConvNeXt LayerNorm impl to use `F.layer_norm(x.permute(0, 2, 3, 1), ...).permute(0, 3, 1, 2)` via `LayerNorm2d` in all cases.
* a bit slower than previous custom impl on some hardware (ie Ampere w/ CL), but overall fewer regressions across wider HW / PyTorch version ranges.
* previous impl exists as `LayerNormExp2d` in `models/layers/norm.py`
* Numerous bug fixes
* Currently testing for imminent PyPi 0.6.x release
* LeViT pretraining of larger models still a WIP, they don't train well / easily without distillation. Time to add distill support (finally)?
* ImageNet-22k weight training + finetune ongoing, work on multi-weight support (slowly) chugging along (there are a LOT of weights, sigh) ...
### May 13, 2022
* Official Swin-V2 models and weights added from (https://github.com/microsoft/Swin-Transformer). Cleaned up to support torchscript.
* Some refactoring for existing `timm` Swin-V2-CR impl, will likely do a bit more to bring parts closer to official and decide whether to merge some aspects.
* More Vision Transformer relative position / residual post-norm experiments (all trained on TPU thanks to TRC program)
* `vit_relpos_small_patch16_224` - 81.5 @ 224, 82.5 @ 320 -- rel pos, layer scale, no class token, avg pool
* `vit_relpos_medium_patch16_rpn_224` - 82.3 @ 224, 83.1 @ 320 -- rel pos + res-post-norm, no class token, avg pool
* `vit_relpos_medium_patch16_224` - 82.5 @ 224, 83.3 @ 320 -- rel pos, layer scale, no class token, avg pool
* `vit_relpos_base_patch16_gapcls_224` - 82.8 @ 224, 83.9 @ 320 -- rel pos, layer scale, class token, avg pool (by mistake)
* Bring 512 dim, 8-head 'medium' ViT model variant back to life (after using in a pre DeiT 'small' model for first ViT impl back in 2020)
* Add ViT relative position support for switching btw existing impl and some additions in official Swin-V2 impl for future trials
* Sequencer2D impl (https://arxiv.org/abs/2205.01972), added via PR from author (https://github.com/okojoalg)
### May 2, 2022
* Vision Transformer experiments adding Relative Position (Swin-V2 log-coord) (`vision_transformer_relpos.py`) and Residual Post-Norm branches (from Swin-V2) (`vision_transformer*.py`)
* `vit_relpos_base_patch32_plus_rpn_256` - 79.5 @ 256, 80.6 @ 320 -- rel pos + extended width + res-post-norm, no class token, avg pool
* `vit_relpos_base_patch16_224` - 82.5 @ 224, 83.6 @ 320 -- rel pos, layer scale, no class token, avg pool
* `vit_base_patch16_rpn_224` - 82.3 @ 224 -- rel pos + res-post-norm, no class token, avg pool
* Vision Transformer refactor to remove representation layer that was only used in initial vit and rarely used since with newer pretrain (ie `How to Train Your ViT`)
* `vit_*` models support removal of class token, use of global average pool, use of fc_norm (ala beit, mae).
### April 22, 2022
* `timm` models are now officially supported in [fast.ai](https://www.fast.ai/)! Just in time for the new Practical Deep Learning course. `timmdocs` documentation link updated to [timm.fast.ai](http://timm.fast.ai/).
* Two more model weights added in the TPU trained [series](https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights). Some In22k pretrain still in progress.
* `seresnext101d_32x8d` - 83.69 @ 224, 84.35 @ 288
* `seresnextaa101d_32x8d` (anti-aliased w/ AvgPool2d) - 83.85 @ 224, 84.57 @ 288
### March 23, 2022
* Add `ParallelBlock` and `LayerScale` option to base vit models to support model configs in [Three things everyone should know about ViT](https://arxiv.org/abs/2203.09795)
* `convnext_tiny_hnf` (head norm first) weights trained with (close to) A2 recipe, 82.2% top-1, could do better with more epochs.
### March 21, 2022
* Merge `norm_norm_norm`. **IMPORTANT** this update for a coming 0.6.x release will likely de-stabilize the master branch for a while. Branch [`0.5.x`](https://github.com/rwightman/pytorch-image-models/tree/0.5.x) or a previous 0.5.x release can be used if stability is required.
* Significant weights update (all TPU trained) as described in this [release](https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights)
* `regnety_040` - 82.3 @ 224, 82.96 @ 288
* `regnety_064` - 83.0 @ 224, 83.65 @ 288
* `regnety_080` - 83.17 @ 224, 83.86 @ 288
* `regnetv_040` - 82.44 @ 224, 83.18 @ 288 (timm pre-act)
* `regnetv_064` - 83.1 @ 224, 83.71 @ 288 (timm pre-act)
* `regnetz_040` - 83.67 @ 256, 84.25 @ 320
* `regnetz_040h` - 83.77 @ 256, 84.5 @ 320 (w/ extra fc in head)
* `resnetv2_50d_gn` - 80.8 @ 224, 81.96 @ 288 (pre-act GroupNorm)
* `resnetv2_50d_evos` 80.77 @ 224, 82.04 @ 288 (pre-act EvoNormS)
* `regnetz_c16_evos` - 81.9 @ 256, 82.64 @ 320 (EvoNormS)
* `regnetz_d8_evos` - 83.42 @ 256, 84.04 @ 320 (EvoNormS)
* `xception41p` - 82 @ 299 (timm pre-act)
* `xception65` - 83.17 @ 299
* `xception65p` - 83.14 @ 299 (timm pre-act)
* `resnext101_64x4d` - 82.46 @ 224, 83.16 @ 288
* `seresnext101_32x8d` - 83.57 @ 224, 84.270 @ 288
* `resnetrs200` - 83.85 @ 256, 84.44 @ 320
* HuggingFace hub support fixed w/ initial groundwork for allowing alternative 'config sources' for pretrained model definitions and weights (generic local file / remote url support soon)
* SwinTransformer-V2 implementation added. Submitted by [Christoph Reich](https://github.com/ChristophReich1996). Training experiments and model changes by myself are ongoing so expect compat breaks.
* Swin-S3 (AutoFormerV2) models / weights added from https://github.com/microsoft/Cream/tree/main/AutoFormerV2
* MobileViT models w/ weights adapted from https://github.com/apple/ml-cvnets
* PoolFormer models w/ weights adapted from https://github.com/sail-sg/poolformer
* VOLO models w/ weights adapted from https://github.com/sail-sg/volo
* Significant work experimenting with non-BatchNorm norm layers such as EvoNorm, FilterResponseNorm, GroupNorm, etc
* Enhance support for alternate norm + act ('NormAct') layers added to a number of models, esp EfficientNet/MobileNetV3, RegNet, and aligned Xception
* Grouped conv support added to EfficientNet family
* Add 'group matching' API to all models to allow grouping model parameters for application of 'layer-wise' LR decay, lr scale added to LR scheduler
* Gradient checkpointing support added to many models
* `forward_head(x, pre_logits=False)` fn added to all models to allow separate calls of `forward_features` + `forward_head`
* All vision transformer and vision MLP models update to return non-pooled / non-token selected features from `foward_features`, for consistency with CNN models, token selection or pooling now applied in `forward_head`
### Feb 2, 2022
* [Chris Hughes](https://github.com/Chris-hughes10) posted an exhaustive run through of `timm` on his blog yesterday. Well worth a read. [Getting Started with PyTorch Image Models (timm): A Practitioners Guide](https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055)
* I'm currently prepping to merge the `norm_norm_norm` branch back to master (ver 0.6.x) in next week or so.
* The changes are more extensive than usual and may destabilize and break some model API use (aiming for full backwards compat). So, beware `pip install git+https://github.com/rwightman/pytorch-image-models` installs!
* `0.5.x` releases and a `0.5.x` branch will remain stable with a cherry pick or two until dust clears. Recommend sticking to pypi install for a bit if you want stable.
### Jan 14, 2022
* Version 0.5.4 w/ release to be pushed to pypi. It's been a while since last pypi update and riskier changes will be merged to main branch soon....
* Add ConvNeXT models /w weights from official impl (https://github.com/facebookresearch/ConvNeXt), a few perf tweaks, compatible with timm features
* Tried training a few small (~1.8-3M param) / mobile optimized models, a few are good so far, more on the way...
* `mnasnet_small` - 65.6 top-1
* `mobilenetv2_050` - 65.9
* `lcnet_100/075/050` - 72.1 / 68.8 / 63.1
* `semnasnet_075` - 73
* `fbnetv3_b/d/g` - 79.1 / 79.7 / 82.0
* TinyNet models added by [rsomani95](https://github.com/rsomani95)
* LCNet added via MobileNetV3 architecture
### Nov 22, 2021
* A number of updated weights anew new model defs
* `eca_halonext26ts` - 79.5 @ 256
* `resnet50_gn` (new) - 80.1 @ 224, 81.3 @ 288
* `resnet50` - 80.7 @ 224, 80.9 @ 288 (trained at 176, not replacing current a1 weights as default since these don't scale as well to higher res, [weights](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1h2_176-001a1197.pth))
* `resnext50_32x4d` - 81.1 @ 224, 82.0 @ 288
* `sebotnet33ts_256` (new) - 81.2 @ 224
* `lamhalobotnet50ts_256` - 81.5 @ 256
* `halonet50ts` - 81.7 @ 256
* `halo2botnet50ts_256` - 82.0 @ 256
* `resnet101` - 82.0 @ 224, 82.8 @ 288
* `resnetv2_101` (new) - 82.1 @ 224, 83.0 @ 288
* `resnet152` - 82.8 @ 224, 83.5 @ 288
* `regnetz_d8` (new) - 83.5 @ 256, 84.0 @ 320
* `regnetz_e8` (new) - 84.5 @ 256, 85.0 @ 320
* `vit_base_patch8_224` (85.8 top-1) & `in21k` variant weights added thanks [Martins Bruveris](https://github.com/martinsbruveris)
* Groundwork in for FX feature extraction thanks to [Alexander Soare](https://github.com/alexander-soare)
* models updated for tracing compatibility (almost full support with some distlled transformer exceptions)
### Oct 19, 2021
* ResNet strikes back (https://arxiv.org/abs/2110.00476) weights added, plus any extra training components used. Model weights and some more details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-rsb-weights)
* BCE loss and Repeated Augmentation support for RSB paper
* 4 series of ResNet based attention model experiments being added (implemented across byobnet.py/byoanet.py). These include all sorts of attention, from channel attn like SE, ECA to 2D QKV self-attention layers such as Halo, Bottlneck, Lambda. Details here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* Working implementations of the following 2D self-attention modules (likely to be differences from paper or eventual official impl):
* Halo (https://arxiv.org/abs/2103.12731)
* Bottleneck Transformer (https://arxiv.org/abs/2101.11605)
* LambdaNetworks (https://arxiv.org/abs/2102.08602)
* A RegNetZ series of models with some attention experiments (being added to). These do not follow the paper (https://arxiv.org/abs/2103.06877) in any way other than block architecture, details of official models are not available. See more here (https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-attn-weights)
* ConvMixer (https://openreview.net/forum?id=TVHS5Y4dNvM), CrossVit (https://arxiv.org/abs/2103.14899), and BeiT (https://arxiv.org/abs/2106.08254) architectures + weights added
* freeze/unfreeze helpers by [Alexander Soare](https://github.com/alexander-soare)
### Aug 18, 2021
* Optimizer bonanza!
* Add LAMB and LARS optimizers, incl trust ratio clipping options. Tweaked to work properly in PyTorch XLA (tested on TPUs w/ `timm bits` [branch](https://github.com/rwightman/pytorch-image-models/tree/bits_and_tpu/timm/bits))
* Add MADGRAD from FB research w/ a few tweaks (decoupled decay option, step handling that works with PyTorch XLA)
* Some cleanup on all optimizers and factory. No more `.data`, a bit more consistency, unit tests for all!
* SGDP and AdamP still won't work with PyTorch XLA but others should (have yet to test Adabelief, Adafactor, Adahessian myself).
* EfficientNet-V2 XL TF ported weights added, but they don't validate well in PyTorch (L is better). The pre-processing for the V2 TF training is a bit diff and the fine-tuned 21k -> 1k weights are very sensitive and less robust than the 1k weights.
* Added PyTorch trained EfficientNet-V2 'Tiny' w/ GlobalContext attn weights. Only .1-.2 top-1 better than the SE so more of a curiosity for those interested.

@ -0,0 +1,54 @@
# Sharing and Loading Models From the Hugging Face Hub
The `timm` library has a built-in integration with the Hugging Face Hub, making it easy to share and load models from the 🤗 Hub.
In this short guide, we'll see how to:
1. Share a `timm` model on the Hub
2. How to load that model back from the Hub
## Authenticating
First, you'll need to make sure you have the `huggingface_hub` package installed.
```bash
pip install huggingface_hub
```
Then, you'll need to authenticate yourself. You can do this by running the following command:
```bash
huggingface-cli login
```
Or, if you're using a notebook, you can use the `notebook_login` helper:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## Sharing a Model
```py
>>> import timm
>>> model = timm.create_model('resnet18', pretrained=True, num_classes=4)
```
Here is where you would normally train or fine-tune the model. We'll skip that for the sake of this tutorial.
Let's pretend we've now fine-tuned the model. The next step would be to push it to the Hub! We can do this with the `timm.models.hub.push_to_hf_hub` function.
```py
>>> model_cfg = dict(labels=['a', 'b', 'c', 'd'])
>>> timm.models.hub.push_to_hf_hub(model, 'resnet18-random', model_config=model_cfg)
```
Running the above would push the model to `<your-username>/resnet18-random` on the Hub. You can now share this model with your friends, or use it in your own code!
## Loading a Model
Loading a model from the Hub is as simple as calling `timm.create_model` with the `pretrained` argument set to the name of the model you want to load. In this case, we'll use [`nateraw/resnet18-random`](https://huggingface.co/nateraw/resnet18-random), which is the model we just pushed to the Hub.
```py
>>> model_reloaded = timm.create_model('hf_hub:nateraw/resnet18-random', pretrained=True)
```

@ -1,89 +1,22 @@
# Getting Started
# timm
## Welcome
<img class="float-left !m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[150px]" src="https://huggingface.co/front/thumbnails/docs/timm.png"/>
Welcome to the `timm` documentation, a lean set of docs that covers the basics of `timm`.
`timm` is a library containing SOTA computer vision models, layers, utilities, optimizers, schedulers, data-loaders, augmentations, and training/evaluation scripts.
For a more comprehensive set of docs (currently under development), please visit [timmdocs](http://timm.fast.ai) by [Aman Arora](https://github.com/amaarora).
It comes packaged with >700 pretrained models, and is designed to be flexible and easy to use.
## Install
Read the [quick start guide](quickstart) to get up and running with the `timm` library. You will learn how to load, discover, and use pretrained models included in the library.
The library can be installed with pip:
```
pip install timm
```
I update the PyPi (pip) packages when I'm confident there are no significant model regressions from previous releases. If you want to pip install the bleeding edge from GitHub, use:
```
pip install git+https://github.com/rwightman/pytorch-image-models.git
```
### Conda Environment
<Tip>
- All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically 3.7, 3.8, 3.9, 3.10
- Little to no care has been taken to be Python 2.x friendly and will not support it. If you run into any challenges running on Windows, or other OS, I'm definitely open to looking into those issues so long as it's in a reproducible (read Conda) environment.
- PyTorch versions 1.9, 1.10, 1.11 have been tested with the latest versions of this code.
</Tip>
I've tried to keep the dependencies minimal, the setup is as per the PyTorch default install instructions for Conda:
```bash
conda create -n torch-env
conda activate torch-env
conda install pytorch torchvision cudatoolkit=11.3 -c pytorch
conda install pyyaml
```
## Load a Pretrained Model
Pretrained models can be loaded using `timm.create_model`
```py
>>> import timm
>>> m = timm.create_model('mobilenetv3_large_100', pretrained=True)
>>> m.eval()
```
## List Models with Pretrained Weights
```py
>>> import timm
>>> from pprint import pprint
>>> model_names = timm.list_models(pretrained=True)
>>> pprint(model_names)
[
'adv_inception_v3',
'cspdarknet53',
'cspresnext50',
'densenet121',
'densenet161',
'densenet169',
'densenet201',
'densenetblur121d',
'dla34',
'dla46_c',
]
```
## List Model Architectures by Wildcard
```py
>>> import timm
>>> from pprint import pprint
>>> model_names = timm.list_models('*resne*t*')
>>> pprint(model_names)
[
'cspresnet50',
'cspresnet50d',
'cspresnet50w',
'cspresnext50',
...
]
```
<div class="mt-10">
<div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./feature_extraction"
><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div>
<p class="text-gray-700">Learn the basics and become familiar with timm. Start here if you are using timm for the first time!</p>
</a>
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./reference/models"
><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
<p class="text-gray-700">Technical descriptions of how timm classes and methods work.</p>
</a>
</div>
</div>

@ -0,0 +1,74 @@
# Installation
Before you start, you'll need to setup your environment and install the appropriate packages. `timm` is tested on **Python 3+**.
## Virtual Environment
You should install `timm` in a [virtual environment](https://docs.python.org/3/library/venv.html) to keep things tidy and avoid dependency conflicts.
1. Create and navigate to your project directory:
```bash
mkdir ~/my-project
cd ~/my-project
```
2. Start a virtual environment inside your directory:
```bash
python -m venv .env
```
3. Activate and deactivate the virtual environment with the following commands:
```bash
# Activate the virtual environment
source .env/bin/activate
# Deactivate the virtual environment
source .env/bin/deactivate
```
`
Once you've created your virtual environment, you can install `timm` in it.
## Using pip
The most straightforward way to install `timm` is with pip:
```bash
pip install timm
```
Alternatively, you can install `timm` from GitHub directly to get the latest, bleeding-edge version:
```bash
pip install git+https://github.com/rwightman/pytorch-image-models.git
```
Run the following command to check if `timm` has been properly installed:
```bash
python -c "from timm import list_models; print(list_models(pretrained=True)[:5])"
```
This command lists the first five pretrained models available in `timm` (which are sorted alphebetically). You should see the following output:
```python
['adv_inception_v3', 'bat_resnext26ts', 'beit_base_patch16_224', 'beit_base_patch16_224_in22k', 'beit_base_patch16_384']
```
## From Source
Building `timm` from source lets you make changes to the code base. To install from the source, clone the repository and install with the following commands:
```bash
git clone https://github.com/rwightman/pytorch-image-models.git
cd timm
pip install -e .
```
Again, you can check if `timm` was properly installed with the following command:
```bash
python -c "from timm import list_models; print(list_models(pretrained=True)[:5])"
```

@ -1,5 +0,0 @@
# Available Models
`timm` comes bundled with a number of model architectures and corresponding pretrained models.
In these pages, you will find the models available in the `timm` library, as well as information on how to use them.

@ -0,0 +1,228 @@
# Quickstart
This quickstart is intended for developers who are ready to dive into the code and see an example of how to integrate `timm` into their model training workflow.
First, you'll need to install `timm`. For more information on installation, see [Installation](installation).
```bash
pip install timm
```
## Load a Pretrained Model
Pretrained models can be loaded using [`create_model`].
Here, we load the pretrained `mobilenetv3_large_100` model.
```py
>>> import timm
>>> m = timm.create_model('mobilenetv3_large_100', pretrained=True)
>>> m.eval()
```
<Tip>
Note: The returned PyTorch model is set to train mode by default, so you must call .eval() on it if you plan to use it for inference.
</Tip>
## List Models with Pretrained Weights
To list models packaged with `timm`, you can use [`list_models`]. If you specify `pretrained=True`, this function will only return model names that have associated pretrained weights available.
```py
>>> import timm
>>> from pprint import pprint
>>> model_names = timm.list_models(pretrained=True)
>>> pprint(model_names)
[
'adv_inception_v3',
'cspdarknet53',
'cspresnext50',
'densenet121',
'densenet161',
'densenet169',
'densenet201',
'densenetblur121d',
'dla34',
'dla46_c',
]
```
You can also list models with a specific pattern in their name.
```py
>>> import timm
>>> from pprint import pprint
>>> model_names = timm.list_models('*resne*t*')
>>> pprint(model_names)
[
'cspresnet50',
'cspresnet50d',
'cspresnet50w',
'cspresnext50',
...
]
```
## Fine-Tune a Pretrained Model
You can finetune any of the pre-trained models just by changing the classifier (the last layer).
```py
>>> model = timm.create_model('mobilenetv3_large_100', pretrained=True, num_classes=NUM_FINETUNE_CLASSES)
```
To fine-tune on your own dataset, you have to write a PyTorch training loop or adapt `timm`'s [training script](training_script) to use your dataset.
## Use a Pretrained Model for Feature Extraction
Without modifying the network, one can call model.forward_features(input) on any model instead of the usual model(input). This will bypass the head classifier and global pooling for networks.
For a more in depth guide to using `timm` for feature extraction, see [Feature Extraction](feature_extraction).
```py
>>> import timm
>>> import torch
>>> x = torch.randn(1, 3, 224, 224)
>>> model = timm.create_model('mobilenetv3_large_100', pretrained=True)
>>> features = model.forward_features(x)
>>> print(features.shape)
torch.Size([1, 960, 7, 7])
```
## Image Augmentation
To transform images into valid inputs for a model, you can use [`timm.data.create_transform`], providing the desired `input_size` that the model expects.
This will return a generic transform that uses reasonable defaults.
```py
>>> timm.data.create_transform((3, 224, 224))
Compose(
Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
CenterCrop(size=(224, 224))
ToTensor()
Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
)
```
Pretrained models have specific transforms that were applied to images fed into them while training. If you use the wrong transform on your image, the model won't understand what it's seeing!
To figure out which transformations were used for a given pretrained model, we can start by taking a look at its `pretrained_cfg`
```py
>>> model.pretrained_cfg
{'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_large_100_ra-f55367f5.pth',
'num_classes': 1000,
'input_size': (3, 224, 224),
'pool_size': (7, 7),
'crop_pct': 0.875,
'interpolation': 'bicubic',
'mean': (0.485, 0.456, 0.406),
'std': (0.229, 0.224, 0.225),
'first_conv': 'conv_stem',
'classifier': 'classifier',
'architecture': 'mobilenetv3_large_100'}
```
We can then resolve only the data related configuration by using [`timm.data.resolve_data_config`].
```py
>>> timm.data.resolve_data_config(model.pretrained_cfg)
{'input_size': (3, 224, 224),
'interpolation': 'bicubic',
'mean': (0.485, 0.456, 0.406),
'std': (0.229, 0.224, 0.225),
'crop_pct': 0.875}
```
We can pass this data config to [`timm.data.create_transform`] to initialize the model's associated transform.
```py
>>> data_cfg = timm.data.resolve_data_config(model.pretrained_cfg)
>>> transform = timm.data.create_transform(**data_cfg)
>>> transform
Compose(
Resize(size=256, interpolation=bicubic, max_size=None, antialias=None)
CenterCrop(size=(224, 224))
ToTensor()
Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
)
```
<Tip>
Note: Here, the pretrained model's config happens to be the same as the generic config we made earlier. This is not always the case. So, it's safer to use the data config to create the transform as we did here instead of using the generic transform.
</Tip>
## Using Pretrained Models for Inference
Here, we will put together the above sections and use a pretrained model for inference.
First we'll need an image to do inference on. Here we load a picture of a leaf from the web:
```py
>>> import requests
>>> from PIL import Image
>>> from io import BytesIO
>>> url = 'https://datasets-server.huggingface.co/assets/imagenet-1k/--/default/test/12/image/image.jpg'
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> image
```
Here's the image we loaded:
<img src="https://datasets-server.huggingface.co/assets/imagenet-1k/--/default/test/12/image/image.jpg" alt="An Image from a link" width="300"/>
Now, we'll create our model and transforms again. This time, we make sure to set our model in evaluation mode.
```py
>>> model = timm.create_model('mobilenetv3_large_100', pretrained=True).eval()
>>> transform = timm.data.create_transform(
**timm.data.resolve_data_config(model.pretrained_cfg)
)
```
We can prepare this image for the model by passing it to the transform.
```py
>>> image_tensor = transform(image)
>>> image_tensor.shape
torch.Size([3, 224, 224])
```
Now we can pass that image to the model to get the predictions. We use `unsqueeze(0)` in this case, as the model is expecting a batch dimension.
```py
>>> output = model(image_tensor.unsqueeze(0))
>>> output.shape
torch.Size([1, 1000])
```
To get the predicted probabilities, we apply softmax to the output. This leaves us with a tensor of shape `(num_classes,)`.
```py
>>> probabilities = torch.nn.functional.softmax(output[0], dim=0)
>>> probabilities.shape
torch.Size([1000])
```
Now we'll find the top 5 predicted class indexes and values using `torch.topk`.
```py
>>> values, indices = torch.topk(probabilities, 5)
>>> indices
tensor([162, 166, 161, 164, 167])
```
If we check the imagenet labels for the top index, we can see what the model predicted...
```py
>>> IMAGENET_1k_URL = 'https://storage.googleapis.com/bit_models/ilsvrc2012_wordnet_lemmas.txt'
>>> IMAGENET_1k_LABELS = requests.get(IMAGENET_1k_URL).text.strip().split('\n')
>>> [{'label': IMAGENET_1k_LABELS[idx], 'value': val.item()} for val, idx in zip(values, indices)]
[{'label': 'beagle', 'value': 0.8486220836639404},
{'label': 'Walker_hound, Walker_foxhound', 'value': 0.03753996267914772},
{'label': 'basset, basset_hound', 'value': 0.024628572165966034},
{'label': 'bluetick', 'value': 0.010317106731235981},
{'label': 'English_foxhound', 'value': 0.006958036217838526}]
```

@ -0,0 +1,9 @@
# Data
[[autodoc]] timm.data.create_dataset
[[autodoc]] timm.data.create_loader
[[autodoc]] timm.data.create_transform
[[autodoc]] timm.data.resolve_data_config

@ -0,0 +1,5 @@
# Models
[[autodoc]] timm.create_model
[[autodoc]] timm.list_models

@ -0,0 +1,27 @@
# Optimization
This page contains the API reference documentation for learning rate optimizers included in `timm`.
## Optimizers
### Factory functions
[[autodoc]] timm.optim.optim_factory.create_optimizer
[[autodoc]] timm.optim.optim_factory.create_optimizer_v2
### Optimizer Classes
[[autodoc]] timm.optim.adabelief.AdaBelief
[[autodoc]] timm.optim.adafactor.Adafactor
[[autodoc]] timm.optim.adahessian.Adahessian
[[autodoc]] timm.optim.adamp.AdamP
[[autodoc]] timm.optim.adamw.AdamW
[[autodoc]] timm.optim.lamb.Lamb
[[autodoc]] timm.optim.lars.Lars
[[autodoc]] timm.optim.lookahead.Lookahead
[[autodoc]] timm.optim.madgrad.MADGRAD
[[autodoc]] timm.optim.nadam.Nadam
[[autodoc]] timm.optim.nvnovograd.NvNovoGrad
[[autodoc]] timm.optim.radam.RAdam
[[autodoc]] timm.optim.rmsprop_tf.RMSpropTF
[[autodoc]] timm.optim.sgdp.SGDP

@ -0,0 +1,19 @@
# Learning Rate Schedulers
This page contains the API reference documentation for learning rate schedulers included in `timm`.
## Schedulers
### Factory functions
[[autodoc]] timm.scheduler.scheduler_factory.create_scheduler
[[autodoc]] timm.scheduler.scheduler_factory.create_scheduler_v2
### Scheduler Classes
[[autodoc]] timm.scheduler.cosine_lr.CosineLRScheduler
[[autodoc]] timm.scheduler.multistep_lr.MultiStepLRScheduler
[[autodoc]] timm.scheduler.plateau_lr.PlateauLRScheduler
[[autodoc]] timm.scheduler.poly_lr.PolyLRScheduler
[[autodoc]] timm.scheduler.step_lr.StepLRScheduler
[[autodoc]] timm.scheduler.tanh_lr.TanhLRScheduler

@ -1,35 +0,0 @@
# Scripts
A train, validation, inference, and checkpoint cleaning script included in the github root folder. Scripts are not currently packaged in the pip release.
The training and validation scripts evolved from early versions of the [PyTorch Imagenet Examples](https://github.com/pytorch/examples). I have added significant functionality over time, including CUDA specific performance enhancements based on
[NVIDIA's APEX Examples](https://github.com/NVIDIA/apex/tree/master/examples).
## Training Script
The variety of training args is large and not all combinations of options (or even options) have been fully tested. For the training dataset folder, specify the folder to the base that contains a `train` and `validation` folder.
To train an SE-ResNet34 on ImageNet, locally distributed, 4 GPUs, one process per GPU w/ cosine schedule, random-erasing prob of 50% and per-pixel random value:
```bash
./distributed_train.sh 4 /data/imagenet --model seresnet34 --sched cosine --epochs 150 --warmup-epochs 5 --lr 0.4 --reprob 0.5 --remode pixel --batch-size 256 --amp -j 4
```
<Tip>
It is recommended to use PyTorch 1.9+ w/ PyTorch native AMP and DDP instead of APEX AMP. --amp defaults to native AMP as of timm ver 0.4.3. --apex-amp will force use of APEX components if they are installed.
</Tip>
## Validation / Inference Scripts
Validation and inference scripts are similar in usage. One outputs metrics on a validation set and the other outputs topk class ids in a csv. Specify the folder containing validation images, not the base as in training script.
To validate with the model's pretrained weights (if they exist):
```bash
python validate.py /imagenet/validation/ --model seresnext26_32x4d --pretrained
```
To run inference from a checkpoint:
```bash
python inference.py /imagenet/validation/ --model mobilenetv3_large_100 --checkpoint ./output/train/model_best.pth.tar
```

@ -1,6 +1,44 @@
# Training Examples
# Scripts
## EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5
A train, validation, inference, and checkpoint cleaning script included in the github root folder. Scripts are not currently packaged in the pip release.
The training and validation scripts evolved from early versions of the [PyTorch Imagenet Examples](https://github.com/pytorch/examples). I have added significant functionality over time, including CUDA specific performance enhancements based on
[NVIDIA's APEX Examples](https://github.com/NVIDIA/apex/tree/master/examples).
## Training Script
The variety of training args is large and not all combinations of options (or even options) have been fully tested. For the training dataset folder, specify the folder to the base that contains a `train` and `validation` folder.
To train an SE-ResNet34 on ImageNet, locally distributed, 4 GPUs, one process per GPU w/ cosine schedule, random-erasing prob of 50% and per-pixel random value:
```bash
./distributed_train.sh 4 /data/imagenet --model seresnet34 --sched cosine --epochs 150 --warmup-epochs 5 --lr 0.4 --reprob 0.5 --remode pixel --batch-size 256 --amp -j 4
```
<Tip>
It is recommended to use PyTorch 1.9+ w/ PyTorch native AMP and DDP instead of APEX AMP. --amp defaults to native AMP as of timm ver 0.4.3. --apex-amp will force use of APEX components if they are installed.
</Tip>
## Validation / Inference Scripts
Validation and inference scripts are similar in usage. One outputs metrics on a validation set and the other outputs topk class ids in a csv. Specify the folder containing validation images, not the base as in training script.
To validate with the model's pretrained weights (if they exist):
```bash
python validate.py /imagenet/validation/ --model seresnext26_32x4d --pretrained
```
To run inference from a checkpoint:
```bash
python inference.py /imagenet/validation/ --model mobilenetv3_large_100 --checkpoint ./output/train/model_best.pth.tar
```
## Training Examples
### EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5
These params are for dual Titan RTX cards with NVIDIA Apex installed:
@ -8,7 +46,7 @@ These params are for dual Titan RTX cards with NVIDIA Apex installed:
./distributed_train.sh 2 /imagenet/ --model efficientnet_b2 -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-path 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .016
```
## MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5
### MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5
This params are for dual Titan RTX cards with NVIDIA Apex installed:
@ -16,45 +54,45 @@ This params are for dual Titan RTX cards with NVIDIA Apex installed:
./distributed_train.sh 2 /imagenet/ --model mixnet_xl -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .969 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-path 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.3 --amp --lr .016 --dist-bn reduce
```
## SE-ResNeXt-26-D and SE-ResNeXt-26-T
### SE-ResNeXt-26-D and SE-ResNeXt-26-T
These hparams (or similar) work well for a wide range of ResNet architecture, generally a good idea to increase the epoch # as the model size increases... ie approx 180-200 for ResNe(X)t50, and 220+ for larger. Increase batch size and LR proportionally for better GPUs or with AMP enabled. These params were for 2 1080Ti cards:
```bash
./distributed_train.sh 2 /imagenet/ --model seresnext26t_32x4d --lr 0.1 --warmup-epochs 5 --epochs 160 --weight-decay 1e-4 --sched cosine --reprob 0.4 --remode pixel -b 112
```
## EfficientNet-B3 with RandAugment - 81.5 top-1, 95.7 top-5
### EfficientNet-B3 with RandAugment - 81.5 top-1, 95.7 top-5
The training of this model started with the same command line as EfficientNet-B2 w/ RA above. After almost three weeks of training the process crashed. The results weren't looking amazing so I resumed the training several times with tweaks to a few params (increase RE prob, decrease rand-aug, increase ema-decay). Nothing looked great. I ended up averaging the best checkpoints from all restarts. The result is mediocre at default res/crop but oddly performs much better with a full image test crop of 1.0.
## EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5
### EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5
[Michael Klachko](https://github.com/michaelklachko) achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2.
```bash
./distributed_train.sh 2 /imagenet/ --model efficientnet_b0 -b 384 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-path 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .048
```
## ResNet50 with JSD loss and RandAugment (clean + 2x RA augs) - 79.04 top-1, 94.39 top-5
### ResNet50 with JSD loss and RandAugment (clean + 2x RA augs) - 79.04 top-1, 94.39 top-5
Trained on two older 1080Ti cards, this took a while. Only slightly, non statistically better ImageNet validation result than my first good AugMix training of 78.99. However, these weights are more robust on tests with ImageNetV2, ImageNet-Sketch, etc. Unlike my first AugMix runs, I've enabled SplitBatchNorm, disabled random erasing on the clean split, and cranked up random erasing prob on the 2 augmented paths.
```bash
./distributed_train.sh 2 /imagenet -b 64 --model resnet50 --sched cosine --epochs 200 --lr 0.05 --amp --remode pixel --reprob 0.6 --aug-splits 3 --aa rand-m9-mstd0.5-inc1 --resplit --split-bn --jsd --dist-bn reduce
```
## EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5
### EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5
Trained by [Andrew Lavin](https://github.com/andravin) with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training.
```bash
./distributed_train.sh 8 /imagenet --model efficientnet_es -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-path 0.2 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064
```
## MobileNetV3-Large-100 - 75.766 top-1, 92,542 top-5
### MobileNetV3-Large-100 - 75.766 top-1, 92,542 top-5
```bash
./distributed_train.sh 2 /imagenet/ --model mobilenetv3_large_100 -b 512 --sched step --epochs 600 --decay-epochs 2.4 --decay-rate .973 --opt rmsproptf --opt-eps .001 -j 7 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-path 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064 --lr-noise 0.42 0.9
```
## ResNeXt-50 32x4d w/ RandAugment - 79.762 top-1, 94.60 top-5
### ResNeXt-50 32x4d w/ RandAugment - 79.762 top-1, 94.60 top-5
These params will also work well for SE-ResNeXt-50 and SK-ResNeXt-50 and likely 101. I used them for the SK-ResNeXt-50 32x4d that I trained with 2 GPU using a slightly higher LR per effective batch size (lr=0.18, b=192 per GPU). The cmd line below are tuned for 8 GPU training.

@ -1,4 +1,3 @@
dependencies = ['torch']
from timm.models import registry
globals().update(registry._model_entrypoints)
import timm
globals().update(timm.models._registry._model_entrypoints)

@ -5,44 +5,94 @@ An example inference script that outputs top-k class ids for images in a folder
Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
"""
import os
import time
import argparse
import json
import logging
import os
import time
from contextlib import suppress
from functools import partial
import numpy as np
import pandas as pd
import torch
from timm.models import create_model, apply_test_time_pool
from timm.data import ImageDataset, create_loader, resolve_data_config
from timm.utils import AverageMeter, setup_default_logging
from timm.data import create_dataset, create_loader, resolve_data_config, ImageNetInfo, infer_imagenet_subset
from timm.layers import apply_test_time_pool
from timm.models import create_model
from timm.utils import AverageMeter, setup_default_logging, set_jit_fuser, ParseKwargs
try:
from apex import amp
has_apex = True
except ImportError:
has_apex = False
has_native_amp = False
try:
if getattr(torch.cuda.amp, 'autocast') is not None:
has_native_amp = True
except AttributeError:
pass
try:
from functorch.compile import memory_efficient_fusion
has_functorch = True
except ImportError as e:
has_functorch = False
has_compile = hasattr(torch, 'compile')
_FMT_EXT = {
'json': '.json',
'json-record': '.json',
'json-split': '.json',
'parquet': '.parquet',
'csv': '.csv',
}
torch.backends.cudnn.benchmark = True
_logger = logging.getLogger('inference')
parser = argparse.ArgumentParser(description='PyTorch ImageNet Inference')
parser.add_argument('data', metavar='DIR',
help='path to dataset')
parser.add_argument('--output_dir', metavar='DIR', default='./',
help='path to output files')
parser.add_argument('--model', '-m', metavar='MODEL', default='dpn92',
help='model architecture (default: dpn92)')
parser.add_argument('data', nargs='?', metavar='DIR', const=None,
help='path to dataset (*deprecated*, use --data-dir)')
parser.add_argument('--data-dir', metavar='DIR',
help='path to dataset (root dir)')
parser.add_argument('--dataset', metavar='NAME', default='',
help='dataset type + name ("<type>/<name>") (default: ImageFolder or ImageTar if empty)')
parser.add_argument('--split', metavar='NAME', default='validation',
help='dataset split (default: validation)')
parser.add_argument('--model', '-m', metavar='MODEL', default='resnet50',
help='model architecture (default: resnet50)')
parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
help='number of data loading workers (default: 2)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
metavar='N', help='mini-batch size (default: 256)')
parser.add_argument('--img-size', default=None, type=int,
metavar='N', help='Input image dimension')
metavar='N', help='Input image dimension, uses model default if empty')
parser.add_argument('--in-chans', type=int, default=None, metavar='N',
help='Image input channels (default: None => 3)')
parser.add_argument('--input-size', default=None, nargs=3, type=int,
metavar='N N N', help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty')
parser.add_argument('--use-train-size', action='store_true', default=False,
help='force use of train input size, even when test size is specified in pretrained cfg')
parser.add_argument('--crop-pct', default=None, type=float,
metavar='N', help='Input image center crop pct')
parser.add_argument('--crop-mode', default=None, type=str,
metavar='N', help='Input image crop mode (squash, border, center). Model default if None.')
parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN',
help='Override mean pixel value of dataset')
parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
help='Override std deviation of of dataset')
parser.add_argument('--interpolation', default='', type=str, metavar='NAME',
help='Image resize interpolation type (overrides model)')
parser.add_argument('--num-classes', type=int, default=1000,
parser.add_argument('--num-classes', type=int, default=None,
help='Number classes in dataset')
parser.add_argument('--class-map', default='', type=str, metavar='FILENAME',
help='path to class to idx mapping file (default: "")')
parser.add_argument('--log-freq', default=10, type=int,
metavar='N', help='batch logging frequency (default: 10)')
parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
@ -51,10 +101,56 @@ parser.add_argument('--pretrained', dest='pretrained', action='store_true',
help='use pre-trained model')
parser.add_argument('--num-gpu', type=int, default=1,
help='Number of GPUS to use')
parser.add_argument('--no-test-pool', dest='no_test_pool', action='store_true',
help='disable test time pool')
parser.add_argument('--topk', default=5, type=int,
parser.add_argument('--test-pool', dest='test_pool', action='store_true',
help='enable test time pool')
parser.add_argument('--channels-last', action='store_true', default=False,
help='Use channels_last memory layout')
parser.add_argument('--device', default='cuda', type=str,
help="Device (accelerator) to use.")
parser.add_argument('--amp', action='store_true', default=False,
help='use Native AMP for mixed precision training')
parser.add_argument('--amp-dtype', default='float16', type=str,
help='lower precision AMP dtype (default: float16)')
parser.add_argument('--fuser', default='', type=str,
help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
parser.add_argument('--model-kwargs', nargs='*', default={}, action=ParseKwargs)
scripting_group = parser.add_mutually_exclusive_group()
scripting_group.add_argument('--torchscript', default=False, action='store_true',
help='torch.jit.script the full model')
scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor',
help="Enable compilation w/ specified backend (default: inductor).")
scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
help="Enable AOT Autograd support.")
parser.add_argument('--results-dir', type=str, default=None,
help='folder for output results')
parser.add_argument('--results-file', type=str, default=None,
help='results filename (relative to results-dir)')
parser.add_argument('--results-format', type=str, nargs='+', default=['csv'],
help='results format (one of "csv", "json", "json-split", "parquet")')
parser.add_argument('--results-separate-col', action='store_true', default=False,
help='separate output columns per result index.')
parser.add_argument('--topk', default=1, type=int,
metavar='N', help='Top-k to output to CSV')
parser.add_argument('--fullname', action='store_true', default=False,
help='use full sample name in output (not just basename).')
parser.add_argument('--filename-col', type=str, default='filename',
help='name for filename / sample name column')
parser.add_argument('--index-col', type=str, default='index',
help='name for output indices column(s)')
parser.add_argument('--label-col', type=str, default='label',
help='name for output indices column(s)')
parser.add_argument('--output-col', type=str, default=None,
help='name for logit/probs output column(s)')
parser.add_argument('--output-type', type=str, default='prob',
help='output type colum ("prob" for probabilities, "logit" for raw logits)')
parser.add_argument('--label-type', type=str, default='description',
help='type of label to output, one of "none", "name", "description", "detailed"')
parser.add_argument('--include-index', action='store_true', default=False,
help='include the class index in results')
parser.add_argument('--exclude-output', action='store_true', default=False,
help='exclude logits/probs from results, just indices. topk must be set !=0.')
def main():
@ -63,48 +159,132 @@ def main():
# might as well try to do something useful...
args.pretrained = args.pretrained or not args.checkpoint
if torch.cuda.is_available():
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True
device = torch.device(args.device)
# resolve AMP arguments based on PyTorch / Apex availability
amp_autocast = suppress
if args.amp:
assert has_native_amp, 'Please update PyTorch to a version with native AMP (or use APEX).'
assert args.amp_dtype in ('float16', 'bfloat16')
amp_dtype = torch.bfloat16 if args.amp_dtype == 'bfloat16' else torch.float16
amp_autocast = partial(torch.autocast, device_type=device.type, dtype=amp_dtype)
_logger.info('Running inference in mixed precision with native PyTorch AMP.')
else:
_logger.info('Running inference in float32. AMP not enabled.')
if args.fuser:
set_jit_fuser(args.fuser)
# create model
in_chans = 3
if args.in_chans is not None:
in_chans = args.in_chans
elif args.input_size is not None:
in_chans = args.input_size[0]
model = create_model(
args.model,
num_classes=args.num_classes,
in_chans=3,
in_chans=in_chans,
pretrained=args.pretrained,
checkpoint_path=args.checkpoint)
checkpoint_path=args.checkpoint,
**args.model_kwargs,
)
if args.num_classes is None:
assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.'
args.num_classes = model.num_classes
_logger.info(
f'Model {args.model} created, param count: {sum([m.numel() for m in model.parameters()])}')
_logger.info('Model %s created, param count: %d' %
(args.model, sum([m.numel() for m in model.parameters()])))
data_config = resolve_data_config(vars(args), model=model)
test_time_pool = False
if args.test_pool:
model, test_time_pool = apply_test_time_pool(model, data_config)
config = resolve_data_config(vars(args), model=model)
model, test_time_pool = (model, False) if args.no_test_pool else apply_test_time_pool(model, config)
model = model.to(device)
model.eval()
if args.channels_last:
model = model.to(memory_format=torch.channels_last)
if args.torchscript:
model = torch.jit.script(model)
elif args.torchcompile:
assert has_compile, 'A version of torch w/ torch.compile() is required for --compile, possibly a nightly.'
torch._dynamo.reset()
model = torch.compile(model, backend=args.torchcompile)
elif args.aot_autograd:
assert has_functorch, "functorch is needed for --aot-autograd"
model = memory_efficient_fusion(model)
if args.num_gpu > 1:
model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda()
else:
model = model.cuda()
model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu)))
root_dir = args.data or args.data_dir
dataset = create_dataset(
root=root_dir,
name=args.dataset,
split=args.split,
class_map=args.class_map,
)
if test_time_pool:
data_config['crop_pct'] = 1.0
workers = 1 if 'tfds' in args.dataset or 'wds' in args.dataset else args.workers
loader = create_loader(
ImageDataset(args.data),
input_size=config['input_size'],
dataset,
batch_size=args.batch_size,
use_prefetcher=True,
interpolation=config['interpolation'],
mean=config['mean'],
std=config['std'],
num_workers=args.workers,
crop_pct=1.0 if test_time_pool else config['crop_pct'])
num_workers=workers,
**data_config,
)
model.eval()
to_label = None
if args.label_type in ('name', 'description', 'detail'):
imagenet_subset = infer_imagenet_subset(model)
if imagenet_subset is not None:
dataset_info = ImageNetInfo(imagenet_subset)
if args.label_type == 'name':
to_label = lambda x: dataset_info.index_to_label_name(x)
elif args.label_type == 'detail':
to_label = lambda x: dataset_info.index_to_description(x, detailed=True)
else:
to_label = lambda x: dataset_info.index_to_description(x)
to_label = np.vectorize(to_label)
else:
_logger.error("Cannot deduce ImageNet subset from model, no labelling will be performed.")
k = min(args.topk, args.num_classes)
top_k = min(args.topk, args.num_classes)
batch_time = AverageMeter()
end = time.time()
topk_ids = []
all_indices = []
all_labels = []
all_outputs = []
use_probs = args.output_type == 'prob'
with torch.no_grad():
for batch_idx, (input, _) in enumerate(loader):
input = input.cuda()
labels = model(input)
topk = labels.topk(k)[1]
topk_ids.append(topk.cpu().numpy())
with amp_autocast():
output = model(input)
if use_probs:
output = output.softmax(-1)
if top_k:
output, indices = output.topk(top_k)
np_indices = indices.cpu().numpy()
if args.include_index:
all_indices.append(np_indices)
if to_label is not None:
np_labels = to_label(np_indices)
all_labels.append(np_labels)
all_outputs.append(output.cpu().numpy())
# measure elapsed time
batch_time.update(time.time() - end)
@ -114,13 +294,71 @@ def main():
_logger.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
batch_idx, len(loader), batch_time=batch_time))
topk_ids = np.concatenate(topk_ids, axis=0)
all_indices = np.concatenate(all_indices, axis=0) if all_indices else None
all_labels = np.concatenate(all_labels, axis=0) if all_labels else None
all_outputs = np.concatenate(all_outputs, axis=0).astype(np.float32)
filenames = loader.dataset.filenames(basename=not args.fullname)
with open(os.path.join(args.output_dir, './topk_ids.csv'), 'w') as out_file:
filenames = loader.dataset.filenames(basename=True)
for filename, label in zip(filenames, topk_ids):
out_file.write('{0},{1}\n'.format(
filename, ','.join([ str(v) for v in label])))
output_col = args.output_col or ('prob' if use_probs else 'logit')
data_dict = {args.filename_col: filenames}
if args.results_separate_col and all_outputs.shape[-1] > 1:
if all_indices is not None:
for i in range(all_indices.shape[-1]):
data_dict[f'{args.index_col}_{i}'] = all_indices[:, i]
if all_labels is not None:
for i in range(all_labels.shape[-1]):
data_dict[f'{args.label_col}_{i}'] = all_labels[:, i]
for i in range(all_outputs.shape[-1]):
data_dict[f'{output_col}_{i}'] = all_outputs[:, i]
else:
if all_indices is not None:
if all_indices.shape[-1] == 1:
all_indices = all_indices.squeeze(-1)
data_dict[args.index_col] = list(all_indices)
if all_labels is not None:
if all_labels.shape[-1] == 1:
all_labels = all_labels.squeeze(-1)
data_dict[args.label_col] = list(all_labels)
if all_outputs.shape[-1] == 1:
all_outputs = all_outputs.squeeze(-1)
data_dict[output_col] = list(all_outputs)
df = pd.DataFrame(data=data_dict)
results_filename = args.results_file
if results_filename:
filename_no_ext, ext = os.path.splitext(results_filename)
if ext and ext in _FMT_EXT.values():
# if filename provided with one of expected ext,
# remove it as it will be added back
results_filename = filename_no_ext
else:
# base default filename on model name + img-size
img_size = data_config["input_size"][1]
results_filename = f'{args.model}-{img_size}'
if args.results_dir:
results_filename = os.path.join(args.results_dir, results_filename)
for fmt in args.results_format:
save_results(df, results_filename, fmt)
print(f'--result')
print(df.set_index(args.filename_col).to_json(orient='index', indent=4))
def save_results(df, results_filename, results_format='csv', filename_col='filename'):
results_filename += _FMT_EXT[results_format]
if results_format == 'parquet':
df.set_index(filename_col).to_parquet(results_filename)
elif results_format == 'json':
df.set_index(filename_col).to_json(results_filename, indent=4, orient='index')
elif results_format == 'json-records':
df.to_json(results_filename, lines=True, orient='records')
elif results_format == 'json-split':
df.to_json(results_filename, indent=4, orient='split', index=False)
else:
df.to_csv(results_filename, index=False)
if __name__ == '__main__':

@ -44,3 +44,11 @@ markdown_extensions:
plugins:
- search
- awesome-pages
- redirects:
redirect_maps:
'index.md': 'https://huggingface.co/docs/timm/index'
'models.md': 'https://huggingface.co/docs/timm/models'
'results.md': 'https://huggingface.co/docs/timm/results'
'scripts.md': 'https://huggingface.co/docs/timm/training_script'
'training_hparam_examples.md': 'https://huggingface.co/docs/timm/training_script#training-examples'
'feature_extraction.md': 'https://huggingface.co/docs/timm/feature_extraction'

@ -1,4 +1,5 @@
mkdocs
mkdocs-material
mkdocs-redirects
mdx_truly_sane_lists
mkdocs-awesome-pages-plugin
mkdocs-awesome-pages-plugin

@ -1,2 +0,0 @@
model-index==0.1.10
jinja2==2.11.3

@ -2,3 +2,4 @@ torch>=1.7
torchvision
pyyaml
huggingface_hub
safetensors>=0.2

@ -38,7 +38,7 @@ An ImageNet test set of 10,000 images sampled from new images roughly 10 years a
### ImageNet-Adversarial - [`results-imagenet-a.csv`](results-imagenet-a.csv)
A collection of 7500 images covering 200 of the 1000 ImageNet classes. Images are naturally occuring adversarial examples that confuse typical ImageNet classifiers. This is a challenging dataset, your typical ResNet-50 will score 0% top-1.
A collection of 7500 images covering 200 of the 1000 ImageNet classes. Images are naturally occurring adversarial examples that confuse typical ImageNet classifiers. This is a challenging dataset, your typical ResNet-50 will score 0% top-1.
For clean validation with same 200 classes, see [`results-imagenet-a-clean.csv`](results-imagenet-a-clean.csv)

@ -0,0 +1,933 @@
model,infer_samples_per_sec,infer_step_time,infer_batch_size,infer_img_size,infer_gmacs,infer_macts,param_count
tinynet_e,49277.65,20.77,1024,106,0.03,0.69,2.04
mobilenetv3_small_050,45562.75,22.464,1024,224,0.03,0.92,1.59
lcnet_035,41026.68,24.949,1024,224,0.03,1.04,1.64
lcnet_050,37575.13,27.242,1024,224,0.05,1.26,1.88
mobilenetv3_small_075,33062.39,30.961,1024,224,0.05,1.3,2.04
mobilenetv3_small_100,30012.26,34.109,1024,224,0.06,1.42,2.54
tf_mobilenetv3_small_minimal_100,28698.14,35.672,1024,224,0.06,1.41,2.04
tf_mobilenetv3_small_075,27407.51,37.352,1024,224,0.05,1.3,2.04
tinynet_d,27236.47,37.585,1024,152,0.05,1.42,2.34
tf_mobilenetv3_small_100,25103.65,40.781,1024,224,0.06,1.42,2.54
lcnet_075,24140.95,42.406,1024,224,0.1,1.99,2.36
mnasnet_small,20706.43,49.443,1024,224,0.07,2.16,2.03
levit_128s,20595.72,49.709,1024,224,0.31,1.88,7.78
lcnet_100,19684.75,52.01,1024,224,0.16,2.52,2.95
mobilenetv2_035,18358.82,55.767,1024,224,0.07,2.86,1.68
regnetx_002,18244.04,56.117,1024,224,0.2,2.16,2.68
ghostnet_050,17564.96,58.287,1024,224,0.05,1.77,2.59
regnety_002,17006.07,60.202,1024,224,0.2,2.17,3.16
mnasnet_050,15925.32,64.29,1024,224,0.11,3.07,2.22
vit_tiny_r_s16_p8_224,15068.38,67.946,1024,224,0.44,2.06,6.34
mobilenetv2_050,14843.74,68.974,1024,224,0.1,3.64,1.97
tinynet_c,14634.69,69.959,1024,184,0.11,2.87,2.46
semnasnet_050,14248.78,71.855,1024,224,0.11,3.44,2.08
levit_128,14164.26,72.284,1024,224,0.41,2.71,9.21
vit_small_patch32_224,13811.36,74.131,1024,224,1.15,2.5,22.88
mixer_s32_224,13352.85,76.677,1024,224,1.0,2.28,19.1
cs3darknet_focus_s,12798.44,79.999,1024,256,0.69,2.7,3.27
lcnet_150,12783.12,80.094,1024,224,0.34,3.79,4.5
cs3darknet_s,12395.11,82.602,1024,256,0.72,2.97,3.28
regnetx_004,12366.39,82.791,1024,224,0.4,3.14,5.16
mobilenetv3_large_075,12001.32,85.313,1024,224,0.16,4.0,3.99
levit_192,11882.81,86.163,1024,224,0.66,3.2,10.95
resnet10t,11615.84,88.145,1024,224,1.1,2.43,5.44
ese_vovnet19b_slim_dw,11539.4,88.729,1024,224,0.4,5.28,1.9
gernet_s,11496.77,89.058,1024,224,0.75,2.65,8.17
mobilenetv3_rw,10873.77,94.16,1024,224,0.23,4.41,5.48
mobilenetv3_large_100,10705.06,95.645,1024,224,0.23,4.41,5.48
hardcorenas_a,10554.34,97.012,1024,224,0.23,4.38,5.26
tf_mobilenetv3_large_075,10511.12,97.41,1024,224,0.16,4.0,3.99
tf_mobilenetv3_large_minimal_100,10371.16,98.725,1024,224,0.22,4.4,3.92
mnasnet_075,10345.17,98.972,1024,224,0.23,4.77,3.17
hardcorenas_b,9695.74,105.601,1024,224,0.26,5.09,5.18
regnety_004,9655.22,106.046,1024,224,0.41,3.89,4.34
ghostnet_100,9483.99,107.96,1024,224,0.15,3.55,5.18
hardcorenas_c,9481.05,107.994,1024,224,0.28,5.01,5.52
tf_mobilenetv3_large_100,9456.79,108.271,1024,224,0.23,4.41,5.48
regnetx_006,9408.22,108.83,1024,224,0.61,3.98,6.2
mobilenetv2_075,9313.88,109.932,1024,224,0.22,5.86,2.64
tinynet_b,9291.99,110.191,1024,188,0.21,4.44,3.73
mnasnet_b1,9286.4,110.258,1024,224,0.33,5.46,4.38
mnasnet_100,9263.52,110.53,1024,224,0.33,5.46,4.38
gluon_resnet18_v1b,9078.31,112.785,1024,224,1.82,2.48,11.69
semnasnet_075,9069.42,112.895,1024,224,0.23,5.54,2.91
resnet18,9045.63,113.192,1024,224,1.82,2.48,11.69
ssl_resnet18,9045.4,113.196,1024,224,1.82,2.48,11.69
swsl_resnet18,9040.4,113.258,1024,224,1.82,2.48,11.69
levit_256,8921.47,114.768,1024,224,1.13,4.23,18.89
hardcorenas_d,8879.46,115.311,1024,224,0.3,4.93,7.5
regnety_006,8666.48,118.144,1024,224,0.61,4.33,6.06
seresnet18,8542.99,119.851,1024,224,1.82,2.49,11.78
mobilenetv2_100,8507.29,120.356,1024,224,0.31,6.68,3.5
spnasnet_100,8342.04,122.741,1024,224,0.35,6.03,4.42
legacy_seresnet18,8310.8,123.202,1024,224,1.82,2.49,11.78
semnasnet_100,8284.16,123.599,1024,224,0.32,6.23,3.89
mnasnet_a1,8283.57,123.607,1024,224,0.32,6.23,3.89
regnetx_008,7852.75,130.39,1024,224,0.81,5.15,7.26
hardcorenas_f,7809.07,131.117,1024,224,0.35,5.57,8.2
hardcorenas_e,7730.97,132.444,1024,224,0.35,5.65,8.07
efficientnet_lite0,7722.75,132.584,1024,224,0.4,6.74,4.65
levit_256d,7689.03,133.165,1024,224,1.4,4.93,26.21
xcit_nano_12_p16_224_dist,7674.8,133.413,1024,224,0.56,4.17,3.05
xcit_nano_12_p16_224,7670.11,133.492,1024,224,0.56,4.17,3.05
resnet18d,7636.48,134.082,1024,224,2.06,3.29,11.71
ghostnet_130,7625.58,134.274,1024,224,0.24,4.6,7.36
tf_efficientnetv2_b0,7614.25,134.473,1024,224,0.73,4.77,7.14
ese_vovnet19b_slim,7588.4,134.932,1024,224,1.69,3.52,3.17
deit_tiny_distilled_patch16_224,7449.3,137.451,1024,224,1.27,6.01,5.91
deit_tiny_patch16_224,7398.73,138.391,1024,224,1.26,5.97,5.72
vit_tiny_patch16_224,7390.78,138.538,1024,224,1.26,5.97,5.72
regnety_008,7366.88,138.989,1024,224,0.81,5.25,6.26
tinynet_a,7358.6,139.145,1024,192,0.35,5.41,6.19
dla46_c,7311.64,140.038,1024,224,0.58,4.5,1.3
fbnetc_100,7303.94,140.187,1024,224,0.4,6.51,5.57
mobilevitv2_050,7248.37,141.262,1024,256,0.48,8.04,1.37
tf_efficientnet_lite0,6816.26,150.218,1024,224,0.4,6.74,4.65
pit_ti_distilled_224,6788.49,150.832,1024,224,0.71,6.23,5.1
pit_ti_224,6762.99,151.401,1024,224,0.7,6.19,4.85
efficientnet_b0,6687.26,153.115,1024,224,0.4,6.75,5.29
visformer_tiny,6618.81,154.698,1024,224,1.27,5.72,10.32
rexnet_100,6608.65,154.937,1024,224,0.41,7.44,4.8
mnasnet_140,6580.58,155.597,1024,224,0.6,7.71,7.12
efficientnet_b1_pruned,6513.48,157.201,1024,240,0.4,6.21,6.33
rexnetr_100,6491.35,157.737,1024,224,0.43,7.72,4.88
mobilenetv2_110d,6395.98,160.089,1024,224,0.45,8.71,4.52
resnet14t,6341.58,161.462,1024,224,1.69,5.8,10.08
regnetz_005,6208.75,164.916,1024,224,0.52,5.86,7.12
dla46x_c,6145.64,166.61,1024,224,0.54,5.66,1.07
nf_regnet_b0,6055.0,169.104,1024,256,0.64,5.58,8.76
tf_efficientnet_b0,5992.76,170.862,1024,224,0.4,6.75,5.29
hrnet_w18_small,5908.15,173.308,1024,224,1.61,5.72,13.19
edgenext_xx_small,5886.07,173.957,1024,288,0.33,4.21,1.33
semnasnet_140,5856.63,174.833,1024,224,0.6,8.87,6.11
resnetblur18,5839.81,175.336,1024,224,2.34,3.39,11.69
ese_vovnet19b_dw,5825.11,175.779,1024,224,1.34,8.25,6.54
dla60x_c,5790.89,176.817,1024,224,0.59,6.01,1.32
mobilenetv2_140,5780.41,177.139,1024,224,0.6,9.57,6.11
skresnet18,5648.81,181.265,1024,224,1.82,3.24,11.96
mobilevit_xxs,5528.18,185.22,1024,256,0.42,8.34,1.27
efficientnet_b0_gn,5401.88,189.551,1024,224,0.42,6.75,5.29
convnext_atto,5364.13,190.886,1024,288,0.91,6.3,3.7
gluon_resnet34_v1b,5344.34,191.593,1024,224,3.67,3.74,21.8
resnet34,5335.05,191.926,1024,224,3.67,3.74,21.8
efficientnet_lite1,5334.12,191.959,1024,240,0.62,10.14,5.42
tv_resnet34,5332.7,192.011,1024,224,3.67,3.74,21.8
vit_base_patch32_224,5287.0,193.67,1024,224,4.41,5.01,88.22
vit_base_patch32_clip_224,5281.4,193.877,1024,224,4.41,5.01,88.22
levit_384,5276.74,194.047,1024,224,2.36,6.26,39.13
pit_xs_distilled_224,5241.4,195.357,1024,224,1.41,7.76,11.0
pit_xs_224,5237.09,195.517,1024,224,1.4,7.71,10.62
selecsls42,5225.99,195.932,1024,224,2.94,4.62,30.35
selecsls42b,5201.55,196.853,1024,224,2.98,4.62,32.46
gernet_m,5124.67,199.807,1024,224,3.02,5.24,21.14
pvt_v2_b0,5122.72,199.882,1024,224,0.57,7.99,3.67
tf_efficientnetv2_b1,5122.21,199.903,1024,240,1.21,7.34,8.14
mixnet_s,5079.84,201.57,1024,224,0.25,6.25,4.13
convnext_atto_ols,5062.64,202.255,1024,288,0.96,6.8,3.7
seresnet34,5028.88,203.611,1024,224,3.67,3.74,21.96
rexnetr_130,5003.96,204.626,1024,224,0.68,9.81,7.61
fbnetv3_b,5003.0,204.666,1024,256,0.55,9.1,8.6
mixer_b32_224,4982.51,205.508,1024,224,3.24,6.29,60.29
xcit_tiny_12_p16_224_dist,4879.26,209.853,1024,224,1.24,6.29,6.72
legacy_seresnet34,4875.12,210.034,1024,224,3.67,3.74,21.96
xcit_tiny_12_p16_224,4870.16,210.244,1024,224,1.24,6.29,6.72
resnet34d,4834.78,211.786,1024,224,3.91,4.54,21.82
tf_efficientnet_lite1,4822.03,212.348,1024,240,0.62,10.14,5.42
resnet26,4794.98,213.545,1024,224,2.36,7.35,16.0
mobilenetv2_120d,4786.27,213.934,1024,224,0.69,11.97,5.83
rexnet_130,4770.1,214.659,1024,224,0.68,9.71,7.56
efficientnet_b0_g16_evos,4743.69,215.854,1024,224,1.01,7.42,8.11
efficientnet_es,4736.89,216.163,1024,224,1.81,8.73,5.44
efficientnet_es_pruned,4735.25,216.239,1024,224,1.81,8.73,5.44
tf_mixnet_s,4735.17,216.242,1024,224,0.25,6.25,4.13
gmlp_ti16_224,4709.0,217.445,1024,224,1.34,7.55,5.87
convnext_femto,4672.08,219.162,1024,288,1.3,7.56,5.22
mobilevitv2_075,4638.17,220.764,1024,256,1.05,12.06,2.87
resmlp_12_224,4601.92,222.504,1024,224,3.01,5.5,15.35
resmlp_12_distilled_224,4597.97,222.695,1024,224,3.01,5.5,15.35
gmixer_12_224,4543.02,225.388,1024,224,2.67,7.26,12.7
fbnetv3_d,4532.2,225.927,1024,256,0.68,11.1,10.31
tf_efficientnet_es,4518.93,226.591,1024,224,1.81,8.73,5.44
selecsls60,4510.1,227.034,1024,224,3.59,5.52,30.67
mixer_s16_224,4509.29,227.075,1024,224,3.79,5.97,18.53
regnetx_016,4507.02,227.189,1024,224,1.62,7.93,9.19
selecsls60b,4490.35,228.033,1024,224,3.63,5.52,32.77
cs3darknet_focus_m,4487.64,228.171,1024,288,2.51,6.19,9.3
dla34,4481.03,228.505,1024,224,3.07,5.02,15.74
crossvit_tiny_240,4476.83,228.722,1024,240,1.57,9.08,7.01
convnext_femto_ols,4473.25,228.904,1024,288,1.35,8.06,5.23
vit_tiny_r_s16_p8_384,4463.13,229.423,1024,384,1.34,6.49,6.36
cs3darknet_m,4452.94,229.949,1024,288,2.63,6.69,9.31
repvgg_b0,4433.11,230.978,1024,224,3.41,6.15,15.82
resnet26d,4354.59,235.143,1024,224,2.6,8.15,16.01
rexnetr_150,4349.97,235.392,1024,224,0.89,11.13,9.78
resnetaa34d,4309.77,237.588,1024,224,4.43,5.07,21.82
efficientnet_b2_pruned,4309.58,237.598,1024,260,0.73,9.13,8.31
darknet17,4296.61,238.316,1024,256,3.26,7.18,14.3
vit_small_patch32_384,4250.58,240.897,1024,384,3.45,8.25,22.92
crossvit_9_240,4201.98,243.683,1024,240,1.85,9.52,8.55
nf_resnet26,4197.39,243.949,1024,224,2.41,7.35,16.0
efficientnet_b0_g8_gn,4190.39,244.357,1024,224,0.66,6.75,6.56
rexnet_150,4186.31,244.594,1024,224,0.9,11.21,9.73
ecaresnet50d_pruned,4182.62,244.81,1024,224,2.53,6.43,19.94
efficientformer_l1,4075.83,251.225,1024,224,1.3,5.53,12.29
poolformer_s12,4050.19,252.815,1024,224,1.82,5.53,11.92
regnety_016,4035.9,253.712,1024,224,1.63,8.04,11.2
efficientnet_lite2,4013.48,255.128,1024,260,0.89,12.9,6.09
crossvit_9_dagger_240,3992.98,256.437,1024,240,1.99,9.97,8.78
efficientnet_cc_b0_8e,3929.29,260.595,1024,224,0.42,9.42,24.01
efficientnet_cc_b0_4e,3918.01,261.346,1024,224,0.41,9.42,13.31
darknet21,3914.26,261.596,1024,256,3.93,7.47,20.86
efficientnet_b1,3876.9,264.116,1024,256,0.77,12.22,7.79
tf_efficientnet_b1,3834.3,267.052,1024,240,0.71,10.88,7.79
resnest14d,3793.21,269.944,1024,224,2.76,7.33,10.61
sedarknet21,3784.73,270.549,1024,256,3.93,7.47,20.95
resnext26ts,3775.5,271.211,1024,256,2.43,10.52,10.3
tf_efficientnetv2_b2,3727.06,274.735,1024,260,1.72,9.84,10.1
convnext_pico,3702.78,276.537,1024,288,2.27,10.08,9.05
edgenext_x_small,3692.42,277.311,1024,288,0.68,7.5,2.34
tf_efficientnet_cc_b0_8e,3691.33,277.395,1024,224,0.42,9.42,24.01
dpn48b,3689.99,277.494,1024,224,1.69,8.92,9.13
eca_resnext26ts,3675.59,278.583,1024,256,2.43,10.52,10.3
seresnext26ts,3670.33,278.98,1024,256,2.43,10.52,10.39
tf_efficientnet_cc_b0_4e,3665.41,279.357,1024,224,0.41,9.42,13.31
tf_efficientnet_lite2,3662.0,279.618,1024,260,0.89,12.9,6.09
nf_ecaresnet26,3619.99,282.862,1024,224,2.41,7.36,16.0
nf_seresnet26,3618.8,282.955,1024,224,2.41,7.36,17.4
gcresnext26ts,3594.7,284.852,1024,256,2.43,10.53,10.48
mobilevitv2_100,3589.19,213.964,768,256,1.84,16.08,4.9
gernet_l,3556.24,287.933,1024,256,4.57,8.0,31.08
legacy_seresnext26_32x4d,3545.88,288.774,1024,224,2.49,9.39,16.79
convnext_pico_ols,3532.27,289.886,1024,288,2.37,10.74,9.06
resnet26t,3503.33,292.28,1024,256,3.35,10.52,16.01
repvgg_a2,3454.82,296.386,1024,224,5.7,6.26,28.21
mixnet_m,3418.52,299.526,1024,224,0.36,8.19,5.01
efficientnet_b3_pruned,3356.7,305.049,1024,300,1.04,11.86,9.86
nf_regnet_b1,3352.23,305.456,1024,288,1.02,9.2,10.22
ecaresnext50t_32x4d,3339.2,306.649,1024,224,2.7,10.09,15.41
ecaresnext26t_32x4d,3337.18,306.833,1024,224,2.7,10.09,15.41
seresnext26tn_32x4d,3327.66,307.711,1024,224,2.7,10.09,16.81
seresnext26t_32x4d,3327.23,307.751,1024,224,2.7,10.09,16.81
seresnext26d_32x4d,3303.57,309.954,1024,224,2.73,10.19,16.81
tf_mixnet_m,3301.19,310.17,1024,224,0.36,8.19,5.01
convit_tiny,3286.62,311.554,1024,224,1.26,7.94,5.71
mobilevit_xs,3278.19,234.265,768,256,1.05,16.33,2.32
pit_s_224,3268.88,313.245,1024,224,2.88,11.56,23.46
pit_s_distilled_224,3266.72,313.452,1024,224,2.9,11.64,24.04
skresnet34,3242.45,315.8,1024,224,3.67,5.13,22.28
eca_botnext26ts_256,3224.24,317.583,1024,256,2.46,11.6,10.59
ecaresnet101d_pruned,3223.88,317.616,1024,224,3.48,7.69,24.88
deit_small_distilled_patch16_224,3220.79,317.922,1024,224,4.63,12.02,22.44
ecaresnetlight,3215.57,318.439,1024,224,4.11,8.42,30.16
deit_small_patch16_224,3209.05,319.085,1024,224,4.61,11.95,22.05
vit_small_patch16_224,3199.98,319.99,1024,224,4.61,11.95,22.05
eca_halonext26ts,3173.71,322.639,1024,256,2.44,11.46,10.76
convnextv2_atto,3162.98,323.733,1024,288,0.91,6.3,3.71
resnetv2_50,3158.28,324.214,1024,224,4.11,11.11,25.55
nf_regnet_b2,3133.63,326.765,1024,272,1.22,9.27,14.31
rexnetr_200,3133.12,245.111,768,224,1.59,15.11,16.52
botnet26t_256,3123.98,327.772,1024,256,3.32,11.98,12.49
coat_lite_tiny,3113.54,328.874,1024,224,1.6,11.65,5.72
vit_small_r26_s32_224,3112.34,329.001,1024,224,3.56,9.85,36.43
bat_resnext26ts,3103.95,329.89,1024,256,2.53,12.51,10.73
halonet26t,3103.39,329.95,1024,256,3.19,11.69,12.48
pvt_v2_b1,3095.14,330.828,1024,224,2.12,15.39,14.01
cspresnet50,3063.22,334.278,1024,256,4.54,11.5,21.62
resnet32ts,3055.79,335.09,1024,256,4.63,11.58,17.96
rexnet_200,3051.5,251.668,768,224,1.56,14.91,16.37
lambda_resnet26t,3046.2,336.144,1024,256,3.02,11.87,10.96
ssl_resnet50,3030.48,337.887,1024,224,4.11,11.11,25.56
gluon_resnet50_v1b,3027.43,338.23,1024,224,4.11,11.11,25.56
tv_resnet50,3027.39,338.232,1024,224,4.11,11.11,25.56
swsl_resnet50,3027.07,338.268,1024,224,4.11,11.11,25.56
resnet50,3025.4,338.455,1024,224,4.11,11.11,25.56
deit3_small_patch16_224_in21ft1k,3023.02,338.721,1024,224,4.61,11.95,22.06
deit3_small_patch16_224,3017.77,339.312,1024,224,4.61,11.95,22.06
tresnet_m,3006.54,340.578,1024,224,5.74,7.31,31.39
resnet33ts,3005.78,340.665,1024,256,4.76,11.66,19.68
vit_small_resnet26d_224,2994.08,341.995,1024,224,5.07,11.12,63.61
resnetv2_50t,2989.06,342.569,1024,224,4.32,11.82,25.57
regnetx_032,2988.15,342.675,1024,224,3.2,11.37,15.3
dpn68b,2981.13,343.481,1024,224,2.35,10.47,12.61
hrnet_w18_small_v2,2978.67,343.765,1024,224,2.62,9.65,15.6
dpn68,2975.29,344.155,1024,224,2.35,10.47,12.61
resnetv2_50d,2971.15,344.633,1024,224,4.35,11.92,25.57
efficientnet_em,2938.12,348.51,1024,240,3.04,14.34,6.9
vit_base_patch32_plus_256,2934.64,348.925,1024,256,7.79,7.76,119.48
coat_lite_mini,2921.75,350.462,1024,224,2.0,12.25,11.01
tf_efficientnet_b2,2919.63,350.718,1024,260,1.02,13.83,9.11
seresnet33ts,2919.51,350.732,1024,256,4.76,11.66,19.78
eca_resnet33ts,2917.21,351.008,1024,256,4.76,11.66,19.68
haloregnetz_b,2890.29,354.276,1024,224,1.97,11.94,11.68
coatnet_pico_rw_224,2884.58,354.98,1024,224,2.05,14.62,10.85
dla60,2883.99,355.049,1024,224,4.26,10.16,22.04
gluon_resnet50_v1c,2872.58,356.463,1024,224,4.35,11.92,25.58
resnet50t,2869.49,356.844,1024,224,4.32,11.82,25.57
gcresnet33ts,2863.36,357.609,1024,256,4.76,11.68,19.88
gluon_resnet50_v1d,2853.24,358.879,1024,224,4.35,11.92,25.58
cspresnet50d,2852.98,358.911,1024,256,4.86,12.55,21.64
resnet50d,2850.55,359.218,1024,224,4.35,11.92,25.58
vovnet39a,2845.31,359.878,1024,224,7.09,6.73,22.6
cspresnet50w,2835.31,361.148,1024,256,5.04,12.19,28.12
vgg11,2827.53,362.143,1024,224,7.61,7.44,132.86
tf_efficientnet_em,2826.28,362.303,1024,240,3.04,14.34,6.9
visformer_small,2818.88,363.251,1024,224,4.88,11.43,40.22
vit_relpos_small_patch16_224,2792.87,366.637,1024,224,4.59,13.05,21.98
vit_relpos_base_patch32_plus_rpn_256,2784.26,367.771,1024,256,7.68,8.01,119.42
vit_srelpos_small_patch16_224,2781.72,368.106,1024,224,4.59,12.16,21.97
resnest26d,2772.97,369.267,1024,224,3.64,9.97,17.07
cs3darknet_focus_l,2770.5,369.596,1024,288,5.9,10.16,21.15
efficientnet_b2a,2767.64,369.979,1024,288,1.12,16.2,9.11
efficientnet_b2,2766.98,370.065,1024,288,1.12,16.2,9.11
ese_vovnet39b,2760.12,370.986,1024,224,7.09,6.74,24.57
legacy_seresnet50,2753.49,371.881,1024,224,3.88,10.6,28.09
densenet121,2749.79,372.378,1024,224,2.87,6.9,7.98
tv_densenet121,2747.16,372.735,1024,224,2.87,6.9,7.98
eca_vovnet39b,2736.53,374.185,1024,224,7.09,6.74,22.6
coatnet_nano_cc_224,2716.19,376.986,1024,224,2.24,15.02,13.76
convnextv2_femto,2710.95,377.714,1024,288,1.3,7.56,5.23
resnetv2_50x1_bit_distilled,2704.93,378.554,1024,224,4.23,11.11,25.55
selecsls84,2697.2,379.64,1024,224,5.9,7.57,50.95
flexivit_small,2693.55,380.153,1024,240,5.35,14.18,22.06
twins_svt_small,2691.25,380.48,1024,224,2.94,13.75,24.06
mixnet_l,2678.25,382.327,1024,224,0.58,10.84,7.33
seresnet50,2674.61,382.848,1024,224,4.11,11.13,28.09
xcit_nano_12_p16_384_dist,2668.39,383.74,1024,384,1.64,12.15,3.05
cs3darknet_l,2649.93,386.412,1024,288,6.16,10.83,21.16
coatnet_nano_rw_224,2633.36,388.844,1024,224,2.41,15.41,15.14
coatnext_nano_rw_224,2627.24,389.75,1024,224,2.47,12.8,14.7
xcit_tiny_24_p16_224_dist,2617.14,391.253,1024,224,2.34,11.82,12.12
densenet121d,2616.98,391.278,1024,224,3.11,7.7,8.0
xcit_tiny_24_p16_224,2614.91,391.584,1024,224,2.34,11.82,12.12
resnet50_gn,2599.07,393.975,1024,224,4.14,11.11,25.56
vit_relpos_small_patch16_rpn_224,2596.73,394.33,1024,224,4.59,13.05,21.97
res2net50_48w_2s,2593.21,394.865,1024,224,4.18,11.72,25.29
mobilevit_s,2587.93,296.749,768,256,2.03,19.94,5.58
convnext_nano,2579.36,396.983,1024,288,4.06,13.84,15.59
tf_mixnet_l,2577.4,397.288,1024,224,0.58,10.84,7.33
resnetaa50d,2573.35,397.912,1024,224,5.39,12.44,25.58
vgg11_bn,2556.04,400.607,1024,224,7.62,7.44,132.87
seresnet50t,2550.33,401.504,1024,224,4.32,11.83,28.1
ecaresnet50d,2544.16,402.478,1024,224,4.35,11.93,25.58
gcvit_xxtiny,2518.13,406.639,1024,224,2.14,15.36,12.0
cs3sedarknet_l,2502.51,409.176,1024,288,6.16,10.83,21.91
resnetrs50,2497.73,409.96,1024,224,4.48,12.14,35.69
mobilevitv2_125,2489.87,308.438,768,256,2.86,20.1,7.48
resnetblur50,2484.87,412.08,1024,224,5.16,12.02,25.56
cspresnext50,2483.24,412.352,1024,256,4.05,15.86,20.57
gluon_resnet50_v1s,2459.02,416.413,1024,224,5.47,13.52,25.68
efficientnet_cc_b1_8e,2458.85,416.443,1024,240,0.75,15.44,39.72
vit_base_resnet26d_224,2458.01,416.584,1024,224,6.97,13.16,101.4
densenetblur121d,2444.58,418.873,1024,224,3.11,7.9,8.0
tv_resnext50_32x4d,2431.41,421.143,1024,224,4.26,14.4,25.03
ssl_resnext50_32x4d,2431.35,421.155,1024,224,4.26,14.4,25.03
swsl_resnext50_32x4d,2430.87,421.236,1024,224,4.26,14.4,25.03
resnext50_32x4d,2429.56,421.462,1024,224,4.26,14.4,25.03
gluon_resnext50_32x4d,2428.35,421.674,1024,224,4.26,14.4,25.03
dla60x,2414.82,424.035,1024,224,3.54,13.8,17.35
efficientnet_lite3,2407.43,212.664,512,300,1.65,21.85,8.2
regnetx_040,2406.98,425.416,1024,224,3.99,12.2,22.12
semobilevit_s,2404.63,319.371,768,256,2.03,19.95,5.74
gcresnext50ts,2402.57,426.196,1024,256,3.75,15.46,15.67
regnety_040s_gn,2385.11,429.317,1024,224,4.03,12.29,20.65
resnetblur50d,2367.52,432.507,1024,224,5.4,12.82,25.58
vovnet57a,2360.79,433.737,1024,224,8.95,7.52,36.64
tf_efficientnet_cc_b1_8e,2357.71,434.307,1024,240,0.75,15.44,39.72
resmlp_24_distilled_224,2351.85,435.39,1024,224,5.96,10.91,30.02
resmlp_24_224,2345.81,436.509,1024,224,5.96,10.91,30.02
res2net50_14w_8s,2341.48,437.317,1024,224,4.21,13.28,25.06
coatnet_rmlp_nano_rw_224,2340.53,437.494,1024,224,2.62,20.34,15.15
sehalonet33ts,2339.44,328.271,768,256,3.55,14.7,13.69
res2net50_26w_4s,2338.49,437.876,1024,224,4.28,12.61,25.7
convnext_nano_ols,2328.37,439.779,1024,288,4.38,15.5,15.65
lambda_resnet26rpt_256,2324.88,165.158,384,256,3.16,11.87,10.99
gmixer_24_224,2324.82,440.451,1024,224,5.28,14.45,24.72
gcresnet50t,2321.78,441.028,1024,256,5.42,14.67,25.9
resnext50d_32x4d,2317.05,441.929,1024,224,4.5,15.2,25.05
resnest50d_1s4x24d,2309.9,443.296,1024,224,4.43,13.57,25.68
seresnetaa50d,2309.78,443.319,1024,224,5.4,12.46,28.11
dla60_res2net,2301.91,444.834,1024,224,4.15,12.34,20.85
vit_base_r26_s32_224,2301.77,444.864,1024,224,6.81,12.36,101.38
twins_pcpvt_small,2290.09,447.132,1024,224,3.83,18.08,24.11
regnetz_b16,2286.62,447.81,1024,288,2.39,16.43,9.72
ese_vovnet57b,2267.23,451.64,1024,224,8.95,7.52,38.61
gluon_inception_v3,2265.31,452.024,1024,299,5.73,8.97,23.83
inception_v3,2260.97,452.888,1024,299,5.73,8.97,23.83
adv_inception_v3,2258.89,453.305,1024,299,5.73,8.97,23.83
tf_inception_v3,2255.73,453.943,1024,299,5.73,8.97,23.83
densenet169,2232.91,458.582,1024,224,3.4,7.3,14.15
tf_efficientnetv2_b3,2223.64,460.493,1024,300,3.04,15.74,14.36
nf_ecaresnet50,2211.52,463.019,1024,224,4.21,11.13,25.56
nf_seresnet50,2207.21,463.921,1024,224,4.21,11.13,28.09
skresnet50,2206.75,464.017,1024,224,4.11,12.5,25.8
edgenext_small,2206.31,464.109,1024,320,1.97,14.16,5.59
seresnext50_32x4d,2197.09,466.058,1024,224,4.26,14.42,27.56
gluon_seresnext50_32x4d,2196.94,466.091,1024,224,4.26,14.42,27.56
xcit_small_12_p16_224_dist,2195.81,466.33,1024,224,4.82,12.58,26.25
legacy_seresnext50_32x4d,2193.34,466.856,1024,224,4.26,14.42,27.56
xcit_small_12_p16_224,2190.16,467.534,1024,224,4.82,12.58,26.25
repvgg_b1g4,2188.83,467.817,1024,224,8.15,10.64,39.97
tf_efficientnet_lite3,2188.37,233.953,512,300,1.65,21.85,8.2
efficientnetv2_rw_t,2170.03,471.87,1024,288,3.19,16.42,13.65
gmlp_s16_224,2164.56,473.061,1024,224,4.42,15.1,19.42
dla60_res2next,2126.26,481.583,1024,224,3.49,13.17,17.03
gc_efficientnetv2_rw_t,2126.09,481.621,1024,288,3.2,16.45,13.68
skresnet50d,2112.57,484.703,1024,224,4.36,13.31,25.82
mobilevitv2_150,2105.0,243.219,512,256,4.09,24.11,10.59
mobilevitv2_150_in22ft1k,2104.51,243.274,512,256,4.09,24.11,10.59
convnextv2_pico,2092.16,489.434,1024,288,2.27,10.08,9.07
poolformer_s24,2090.38,489.851,1024,224,3.41,10.68,21.39
cs3sedarknet_xdw,2090.04,489.929,1024,256,5.97,17.18,21.6
res2next50,2085.23,491.055,1024,224,4.2,13.71,24.67
cspdarknet53,2084.51,491.231,1024,256,6.57,16.81,27.64
fbnetv3_g,2084.48,491.238,1024,288,1.77,21.09,16.62
crossvit_small_240,2074.04,493.709,1024,240,5.63,18.17,26.86
deit3_medium_patch16_224_in21ft1k,2064.27,496.046,1024,224,8.0,15.93,38.85
deit3_medium_patch16_224,2063.34,496.268,1024,224,8.0,15.93,38.85
xcit_nano_12_p8_224_dist,2049.01,499.742,1024,224,2.16,15.71,3.05
xcit_nano_12_p8_224,2044.48,500.848,1024,224,2.16,15.71,3.05
nf_regnet_b3,2035.39,503.085,1024,320,2.05,14.61,18.59
cs3darknet_focus_x,2017.73,507.488,1024,256,8.03,10.69,35.02
vit_relpos_medium_patch16_cls_224,2000.38,511.89,1024,224,8.03,18.24,38.76
lambda_resnet50ts,1991.21,514.246,1024,256,5.07,17.48,21.54
swin_tiny_patch4_window7_224,1978.72,517.495,1024,224,4.51,17.06,28.29
sebotnet33ts_256,1959.75,195.932,384,256,3.89,17.46,13.7
coatnet_0_rw_224,1957.32,523.148,1024,224,4.43,18.73,27.44
ecaresnet26t,1953.32,524.224,1024,320,5.24,16.44,16.01
regnetx_080,1942.5,527.144,1024,224,8.02,14.06,39.57
gcvit_xtiny,1941.57,527.393,1024,224,2.93,20.26,19.98
resnetv2_101,1925.46,531.806,1024,224,7.83,16.23,44.54
regnetx_064,1920.06,533.303,1024,224,6.49,16.37,26.21
mixnet_xl,1918.85,533.64,1024,224,0.93,14.57,11.9
edgenext_small_rw,1912.9,535.3,1024,320,2.46,14.85,7.83
vit_relpos_medium_patch16_224,1907.96,536.687,1024,224,7.97,17.02,38.75
vit_srelpos_medium_patch16_224,1900.57,538.773,1024,224,7.96,16.21,38.74
resnest50d,1896.74,539.858,1024,224,5.4,14.36,27.48
crossvit_15_240,1894.86,540.397,1024,240,5.81,19.77,27.53
vit_base_resnet50d_224,1892.78,540.989,1024,224,8.73,16.92,110.97
gluon_resnet101_v1b,1879.26,544.883,1024,224,7.83,16.23,44.55
tv_resnet101,1878.26,545.172,1024,224,7.83,16.23,44.55
resnet101,1875.25,546.047,1024,224,7.83,16.23,44.55
dla102,1873.79,546.472,1024,224,7.19,14.18,33.27
efficientformer_l3,1868.08,548.142,1024,224,3.93,12.01,31.41
maxvit_rmlp_pico_rw_256,1866.73,411.402,768,256,1.85,24.86,7.52
resnetv2_101d,1855.94,551.727,1024,224,8.07,17.04,44.56
pvt_v2_b2,1835.92,557.745,1024,224,4.05,27.53,25.36
maxvit_pico_rw_256,1829.44,419.787,768,256,1.83,22.3,7.46
vgg13,1820.36,562.512,1024,224,11.31,12.25,133.05
lamhalobotnet50ts_256,1818.57,563.067,1024,256,5.02,18.44,22.57
crossvit_15_dagger_240,1817.96,563.255,1024,240,6.13,20.43,28.21
gluon_resnet101_v1c,1816.14,563.82,1024,224,8.08,17.04,44.57
res2net50_26w_6s,1811.81,565.168,1024,224,6.33,15.28,37.05
gluon_resnet101_v1d,1808.21,566.295,1024,224,8.08,17.04,44.57
swin_s3_tiny_224,1803.67,567.72,1024,224,4.64,19.13,28.33
coatnet_rmlp_0_rw_224,1803.63,567.733,1024,224,4.72,24.89,27.45
vit_relpos_medium_patch16_rpn_224,1770.72,578.284,1024,224,7.97,17.02,38.73
halonet50ts,1765.73,579.917,1024,256,5.3,19.2,22.73
repvgg_b1,1760.92,581.5,1024,224,13.16,10.64,57.42
coatnet_bn_0_rw_224,1753.99,583.799,1024,224,4.67,22.04,27.44
wide_resnet50_2,1747.87,585.844,1024,224,11.43,14.4,68.88
efficientnet_b3,1741.21,294.036,512,320,2.01,26.52,12.23
efficientnet_b3a,1740.84,294.1,512,320,2.01,26.52,12.23
densenet201,1738.22,589.096,1024,224,4.34,7.85,20.01
coatnet_0_224,1727.45,296.376,512,224,4.58,24.01,25.04
darknetaa53,1721.33,594.876,1024,288,10.08,15.68,36.02
tf_efficientnet_b3,1720.61,297.558,512,300,1.87,23.83,12.23
cait_xxs24_224,1720.1,595.301,1024,224,2.53,20.29,11.96
vit_large_patch32_224,1718.53,595.845,1024,224,15.41,13.32,327.9
mobilevitv2_175,1697.71,301.572,512,256,5.54,28.13,14.25
mobilevitv2_175_in22ft1k,1697.51,301.606,512,256,5.54,28.13,14.25
xcit_tiny_12_p16_384_dist,1694.92,604.145,1024,384,3.64,18.26,6.72
pvt_v2_b2_li,1694.45,604.311,1024,224,3.91,27.6,22.55
coat_lite_small,1694.41,604.328,1024,224,3.96,22.09,19.84
resnetaa101d,1692.59,604.976,1024,224,9.12,17.56,44.57
legacy_seresnet101,1686.93,607.005,1024,224,7.61,15.74,49.33
tresnet_v2_l,1685.52,607.515,1024,224,8.81,16.34,46.17
hrnet_w18,1679.12,609.832,1024,224,4.32,16.31,21.3
vit_medium_patch16_gap_240,1667.0,614.264,1024,240,9.22,18.81,44.4
vit_tiny_patch16_384,1660.88,616.528,1024,384,4.7,25.39,5.79
regnetv_040,1659.81,616.926,1024,288,6.6,20.3,20.64
convnext_tiny_hnf,1659.73,616.951,1024,288,7.39,22.21,28.59
seresnet101,1655.13,618.666,1024,224,7.84,16.27,49.33
vit_base_patch32_384,1651.29,620.109,1024,384,13.06,16.5,88.3
vit_base_patch32_clip_384,1649.72,620.7,1024,384,13.06,16.5,88.3
regnety_040,1647.66,621.47,1024,288,6.61,20.3,20.65
regnety_032,1645.25,622.383,1024,288,5.29,18.61,19.44
gluon_resnet101_v1s,1642.29,623.505,1024,224,9.19,18.64,44.67
vgg13_bn,1634.19,626.596,1024,224,11.33,12.25,133.05
resnetaa50,1631.05,627.803,1024,288,8.52,19.24,25.56
mixer_b16_224_miil,1628.71,628.706,1024,224,12.62,14.53,59.88
mixer_b16_224,1627.79,629.061,1024,224,12.62,14.53,59.88
convnext_tiny,1626.95,629.384,1024,288,7.39,22.21,28.59
nf_resnet101,1620.77,631.785,1024,224,8.01,16.23,44.55
swinv2_cr_tiny_224,1618.15,632.807,1024,224,4.66,28.45,28.33
ecaresnet101d,1609.33,636.276,1024,224,8.08,17.07,44.57
twins_pcpvt_base,1605.41,637.831,1024,224,6.68,25.25,43.83
dla102x,1601.78,639.274,1024,224,5.89,19.42,26.31
ese_vovnet39b_evos,1601.47,639.4,1024,224,7.07,6.74,24.58
darknet53,1597.03,641.177,1024,288,11.78,15.68,41.61
resnetblur101d,1596.24,641.494,1024,224,9.12,17.94,44.57
resnet51q,1592.08,643.172,1024,288,8.07,20.94,35.7
swinv2_cr_tiny_ns_224,1591.39,643.448,1024,224,4.66,28.45,28.33
mixer_l32_224,1583.03,646.85,1024,224,11.27,19.86,206.94
resmlp_36_distilled_224,1577.86,648.967,1024,224,8.91,16.33,44.69
resmlp_36_224,1577.4,649.158,1024,224,8.91,16.33,44.69
resnetv2_50d_gn,1561.87,655.61,1024,288,7.24,19.7,25.57
botnet50ts_256,1556.81,246.643,384,256,5.54,22.23,22.74
nf_resnet50,1548.83,661.132,1024,288,6.88,18.37,25.56
resnetv2_50d_frn,1547.35,661.764,1024,224,4.33,11.92,25.59
halo2botnet50ts_256,1546.64,496.545,768,256,5.02,21.78,22.64
mvitv2_tiny,1534.63,667.247,1024,224,4.7,21.16,24.17
gluon_resnext101_32x4d,1505.04,680.366,1024,224,8.01,21.23,44.18
swsl_resnext101_32x4d,1504.46,680.63,1024,224,8.01,21.23,44.18
cs3darknet_x,1504.38,680.665,1024,288,10.6,14.36,35.05
ssl_resnext101_32x4d,1503.93,680.869,1024,224,8.01,21.23,44.18
resnext101_32x4d,1503.63,681.005,1024,224,8.01,21.23,44.18
resnest50d_4s2x40d,1497.58,683.755,1024,224,4.4,17.94,30.42
convnextv2_nano,1488.75,515.858,768,288,4.06,13.84,15.62
skresnext50_32x4d,1478.83,692.427,1024,224,4.5,17.18,27.48
mobilevitv2_200,1478.44,519.454,768,256,7.22,32.15,18.45
tresnet_l,1477.44,693.076,1024,224,10.88,11.9,55.99
mobilevitv2_200_in22ft1k,1477.37,519.83,768,256,7.22,32.15,18.45
vgg16,1475.59,693.946,1024,224,15.47,13.56,138.36
regnetz_c16,1475.58,693.953,1024,320,3.92,25.88,13.46
resnetv2_50d_evob,1468.61,697.244,1024,224,4.33,11.92,25.59
vit_medium_patch16_gap_256,1467.03,697.996,1024,256,10.59,22.15,38.86
res2net50_26w_8s,1466.52,698.239,1024,224,8.37,17.95,48.4
sequencer2d_s,1465.84,698.562,1024,224,4.96,11.31,27.65
eca_nfnet_l0,1461.61,700.586,1024,288,7.12,17.29,24.14
nfnet_l0,1460.27,701.228,1024,288,7.13,17.29,35.07
cs3sedarknet_x,1435.72,713.217,1024,288,10.6,14.37,35.4
resnet61q,1434.01,714.068,1024,288,9.87,21.52,36.85
res2net101_26w_4s,1424.71,718.728,1024,224,8.1,18.45,45.21
repvgg_b2g4,1415.15,723.581,1024,224,12.63,12.9,61.76
nest_tiny,1413.2,543.434,768,224,5.83,25.48,17.06
poolformer_s36,1408.65,726.922,1024,224,5.0,15.82,30.86
maxvit_rmlp_nano_rw_256,1404.06,546.971,768,256,4.47,31.92,15.5
convit_small,1397.72,732.608,1024,224,5.76,17.87,27.78
jx_nest_tiny,1387.89,553.347,768,224,5.83,25.48,17.06
maxvit_nano_rw_256,1378.18,557.246,768,256,4.46,30.28,15.45
nf_ecaresnet101,1373.28,745.649,1024,224,8.01,16.27,44.55
nf_seresnet101,1369.04,747.958,1024,224,8.02,16.27,49.33
gluon_seresnext101_32x4d,1358.35,753.84,1024,224,8.02,21.26,48.96
legacy_seresnext101_32x4d,1357.27,754.442,1024,224,8.02,21.26,48.96
efficientnet_b3_gn,1357.0,282.964,384,320,2.14,28.83,11.73
nfnet_f0,1356.65,754.786,1024,256,12.62,18.05,71.49
seresnext101_32x4d,1356.0,755.148,1024,224,8.02,21.26,48.96
resnetv2_152,1353.28,756.668,1024,224,11.55,22.56,60.19
xception,1353.17,567.542,768,299,8.4,35.83,22.86
twins_svt_base,1350.54,758.199,1024,224,8.59,26.33,56.07
crossvit_18_240,1343.82,761.996,1024,240,9.05,26.26,43.27
ese_vovnet99b_iabn,1343.72,762.049,1024,224,16.49,11.27,63.2
maxxvit_rmlp_nano_rw_256,1341.45,763.341,1024,256,4.37,26.05,16.78
regnetx_120,1339.05,764.708,1024,224,12.13,21.37,46.11
vgg16_bn,1336.79,765.998,1024,224,15.5,13.56,138.37
dpn92,1330.6,769.562,1024,224,6.54,18.21,37.67
tv_resnet152,1329.75,770.054,1024,224,11.56,22.56,60.19
gcvit_tiny,1328.61,770.718,1024,224,4.79,29.82,28.22
gluon_resnet152_v1b,1328.2,770.954,1024,224,11.56,22.56,60.19
resnet152,1327.13,771.578,1024,224,11.56,22.56,60.19
ese_vovnet99b,1316.93,777.554,1024,224,16.51,11.27,63.2
pvt_v2_b3,1316.31,777.917,1024,224,6.92,37.7,45.24
xcit_tiny_12_p8_224_dist,1300.55,787.348,1024,224,4.81,23.6,6.71
xcit_tiny_12_p8_224,1299.96,787.704,1024,224,4.81,23.6,6.71
crossvit_18_dagger_240,1298.96,788.312,1024,240,9.5,27.03,44.27
hrnet_w32,1297.82,789.002,1024,224,8.97,22.02,41.23
gluon_resnet152_v1c,1296.47,789.825,1024,224,11.8,23.36,60.21
resnetv2_152d,1296.37,789.881,1024,224,11.8,23.36,60.2
gluon_resnet152_v1d,1293.21,791.811,1024,224,11.8,23.36,60.21
vit_small_resnet50d_s16_224,1288.35,794.801,1024,224,13.48,24.82,57.53
cs3edgenet_x,1281.15,799.266,1024,288,14.59,16.36,47.82
edgenext_base,1272.74,804.548,1024,320,6.01,24.32,18.51
regnety_120,1268.38,807.318,1024,224,12.14,21.38,51.82
dla169,1258.34,813.753,1024,224,11.6,20.2,53.39
hrnet_w30,1252.2,817.74,1024,224,8.15,21.21,37.71
xception41p,1249.06,409.896,512,299,9.25,39.86,26.91
maxxvitv2_nano_rw_256,1248.81,819.967,1024,256,6.26,23.05,23.7
ecaresnet50t,1243.91,823.198,1024,320,8.82,24.13,25.57
vgg19,1237.03,827.774,1024,224,19.63,14.86,143.67
swin_small_patch4_window7_224,1228.67,833.406,1024,224,8.77,27.47,49.61
efficientnet_el_pruned,1220.93,838.69,1024,300,8.0,30.7,10.59
densenet161,1220.41,839.05,1024,224,7.79,11.06,28.68
efficientnet_el,1218.76,840.187,1024,300,8.0,30.7,10.59
deit_base_distilled_patch16_224,1211.4,845.292,1024,224,17.68,24.05,87.34
vit_base_patch16_224,1209.0,846.969,1024,224,17.58,23.9,86.57
vit_base_patch16_224_miil,1208.72,847.163,1024,224,17.59,23.91,94.4
deit_base_patch16_224,1208.56,847.275,1024,224,17.58,23.9,86.57
vit_base_patch16_clip_224,1205.77,849.236,1024,224,17.58,23.9,86.57
gluon_resnet152_v1s,1205.41,849.488,1024,224,12.92,24.96,60.32
coatnet_rmlp_1_rw_224,1201.89,851.979,1024,224,7.85,35.47,41.69
maxvit_tiny_rw_224,1200.3,853.107,1024,224,5.11,33.11,29.06
mixnet_xxl,1193.04,643.721,768,224,2.04,23.43,23.96
tf_efficientnet_el,1192.11,858.967,1024,300,8.0,30.7,10.59
swinv2_tiny_window8_256,1191.01,859.761,1024,256,5.96,24.57,28.35
volo_d1_224,1190.57,860.079,1024,224,6.94,24.43,26.63
repvgg_b2,1183.91,864.916,1024,224,20.45,12.9,89.02
legacy_seresnet152,1181.09,866.978,1024,224,11.33,22.08,66.82
xcit_small_24_p16_224_dist,1175.31,871.245,1024,224,9.1,23.64,47.67
xcit_small_24_p16_224,1174.76,871.656,1024,224,9.1,23.64,47.67
inception_v4,1168.76,876.127,1024,299,12.28,15.09,42.68
seresnet152,1166.02,878.19,1024,224,11.57,22.61,66.82
twins_pcpvt_large,1163.18,880.331,1024,224,9.84,35.82,60.99
deit3_base_patch16_224,1159.4,883.201,1024,224,17.58,23.9,86.59
deit3_base_patch16_224_in21ft1k,1159.14,883.404,1024,224,17.58,23.9,86.59
cait_xxs36_224,1156.4,885.493,1024,224,3.77,30.34,17.3
vit_base_patch32_clip_448,1154.9,886.645,1024,448,17.93,23.9,88.34
regnetx_160,1153.07,888.048,1024,224,15.99,25.52,54.28
dm_nfnet_f0,1152.75,888.293,1024,256,12.62,18.05,71.49
sequencer2d_m,1147.71,892.201,1024,224,6.55,14.26,38.31
repvgg_b3g4,1145.87,893.631,1024,224,17.89,15.1,83.83
mvitv2_small_cls,1144.7,894.542,1024,224,7.04,28.17,34.87
mvitv2_small,1143.83,895.224,1024,224,7.0,28.08,34.87
efficientnet_lite4,1139.64,336.935,384,380,4.04,45.66,13.01
tnt_s_patch16_224,1135.12,902.091,1024,224,5.24,24.37,23.76
convmixer_1024_20_ks9_p14,1130.85,905.497,1024,224,5.55,5.51,24.38
vgg19_bn,1127.16,908.464,1024,224,19.66,14.86,143.68
vit_relpos_base_patch16_clsgap_224,1124.58,910.547,1024,224,17.6,25.12,86.43
vit_relpos_base_patch16_cls_224,1122.76,912.026,1024,224,17.6,25.12,86.43
coatnet_rmlp_1_rw2_224,1119.61,914.591,1024,224,8.11,40.13,41.72
beit_base_patch16_224,1109.32,923.073,1024,224,17.58,23.9,86.53
xception41,1107.6,462.251,512,299,9.28,39.86,26.97
tresnet_xl,1106.51,925.423,1024,224,15.17,15.34,78.44
beitv2_base_patch16_224,1106.05,925.798,1024,224,17.58,23.9,86.53
coat_tiny,1099.16,931.604,1024,224,4.35,27.2,5.5
vit_base_patch16_gap_224,1085.51,943.323,1024,224,17.49,25.59,86.57
maxvit_tiny_tf_224,1081.57,710.062,768,224,5.6,35.78,30.92
vit_relpos_base_patch16_224,1078.21,949.713,1024,224,17.51,24.97,86.43
nf_regnet_b4,1075.82,951.823,1024,384,4.7,28.61,30.21
coatnet_1_rw_224,1074.48,953.005,1024,224,8.04,34.6,41.72
dla102x2,1070.83,956.252,1024,224,9.34,29.91,41.28
pit_b_224,1066.8,479.928,512,224,12.42,32.94,73.76
pit_b_distilled_224,1063.31,481.504,512,224,12.5,33.07,74.79
tf_efficientnet_lite4,1058.68,362.703,384,380,4.04,45.66,13.01
efficientnetv2_s,1057.28,968.508,1024,384,8.44,35.77,21.46
vit_large_r50_s32_224,1034.79,989.556,1024,224,19.58,24.41,328.99
vit_small_patch16_36x1_224,1032.1,992.142,1024,224,13.71,35.69,64.67
efficientnet_b3_g8_gn,1031.26,496.465,512,320,3.2,28.83,14.25
tf_efficientnetv2_s,1029.13,995.002,1024,384,8.44,35.77,21.46
flexivit_base,1028.55,995.558,1024,240,20.29,28.36,86.59
vit_base_patch16_rpn_224,1016.66,1007.208,1024,224,17.49,23.75,86.54
vit_small_r26_s32_384,1011.11,1012.73,1024,384,10.43,29.85,36.47
vit_small_patch16_18x2_224,1005.34,1018.547,1024,224,13.71,35.69,64.67
swinv2_cr_small_224,1000.71,1023.259,1024,224,9.07,50.27,49.7
efficientnetv2_rw_s,995.91,1028.19,1024,384,8.72,38.03,23.94
wide_resnet101_2,995.32,1028.801,1024,224,22.8,21.23,126.89
swinv2_cr_small_ns_224,989.25,1035.114,1024,224,9.08,50.27,49.7
vit_relpos_base_patch16_rpn_224,986.84,1037.641,1024,224,17.51,24.97,86.41
coatnet_1_224,984.69,519.944,512,224,8.7,39.0,42.23
resnet200,983.36,1041.314,1024,224,15.07,32.19,64.67
dpn98,982.09,1042.657,1024,224,11.73,25.2,61.57
convnext_small,981.97,1042.782,1024,288,14.39,35.65,50.22
cs3se_edgenet_x,975.89,1049.279,1024,320,18.01,20.21,50.72
regnety_080,969.67,1056.01,1024,288,13.22,29.69,39.18
poolformer_m36,966.97,1058.965,1024,224,8.8,22.02,56.17
resnest101e,963.69,1062.57,1024,256,13.38,28.66,48.28
regnetz_b16_evos,955.65,803.632,768,288,2.36,16.43,9.74
twins_svt_large,954.95,1072.291,1024,224,15.15,35.1,99.27
pvt_v2_b4,952.02,1075.594,1024,224,10.14,53.74,62.56
gluon_resnext101_64x4d,944.48,1084.183,1024,224,15.52,31.21,83.46
regnetv_064,944.32,1084.367,1024,288,10.55,27.11,30.58
regnety_064,944.18,1084.526,1024,288,10.56,27.11,30.58
maxvit_rmlp_tiny_rw_256,941.64,815.588,768,256,6.77,46.92,29.15
regnetz_d8,936.16,1093.814,1024,320,6.19,37.08,23.37
resnetrs101,936.12,1093.858,1024,288,13.56,28.53,63.62
regnetz_d32,933.58,1096.833,1024,320,9.33,37.08,27.58
ig_resnext101_32x8d,930.9,1099.997,1024,224,16.48,31.21,88.79
swsl_resnext101_32x8d,930.28,1100.725,1024,224,16.48,31.21,88.79
resnext101_32x8d,929.98,1101.084,1024,224,16.48,31.21,88.79
ssl_resnext101_32x8d,929.0,1102.24,1024,224,16.48,31.21,88.79
convnextv2_tiny,925.13,553.423,512,288,7.39,22.21,28.64
convnextv2_small,924.53,1107.57,1024,224,8.71,21.56,50.32
maxvit_tiny_rw_256,921.72,833.209,768,256,6.74,44.35,29.07
inception_resnet_v2,917.69,1115.834,1024,299,13.18,25.06,55.84
ens_adv_inception_resnet_v2,917.66,1115.871,1024,299,13.18,25.06,55.84
maxxvit_rmlp_tiny_rw_256,914.74,1119.428,1024,256,6.66,39.76,29.64
xcit_tiny_24_p16_384_dist,912.61,1122.045,1024,384,6.87,34.29,12.12
cait_s24_224,908.65,1126.929,1024,224,9.35,40.58,46.92
pvt_v2_b5,904.89,1131.615,1024,224,11.76,50.92,81.96
nest_small,902.63,850.834,768,224,10.35,40.04,38.35
repvgg_b3,901.73,1135.583,1024,224,29.16,15.1,123.09
maxvit_tiny_pm_256,896.67,1141.994,1024,256,6.61,47.9,30.09
xception65p,896.53,571.079,512,299,13.91,52.48,39.82
swin_s3_small_224,896.35,856.792,768,224,9.43,37.84,49.74
jx_nest_small,892.32,860.663,768,224,10.35,40.04,38.35
efficientnet_b4,890.89,431.018,384,384,4.51,50.04,19.34
gmlp_b16_224,885.75,1156.072,1024,224,15.78,30.21,73.08
gluon_seresnext101_64x4d,885.23,1156.747,1024,224,15.53,31.25,88.23
hrnet_w40,881.9,1161.12,1024,224,12.75,25.29,57.56
efficientformer_l7,877.43,1167.027,1024,224,10.17,24.45,82.23
coat_mini,874.29,1171.227,1024,224,6.82,33.68,10.34
resnet101d,871.81,1174.559,1024,320,16.48,34.77,44.57
swin_base_patch4_window7_224,870.1,1176.867,1024,224,15.47,36.63,87.77
regnetz_040,868.17,884.605,768,320,6.35,37.78,27.12
regnetz_040h,862.76,890.151,768,320,6.43,37.94,28.94
mobilevitv2_150_384_in22ft1k,848.7,301.627,256,384,9.2,54.25,10.59
resnetv2_50d_evos,844.34,909.573,768,288,7.15,19.7,25.59
tf_efficientnet_b4,838.16,458.136,384,380,4.49,49.49,19.34
crossvit_base_240,835.31,919.411,768,240,21.22,36.33,105.03
vit_base_r50_s16_224,821.15,1247.01,1024,224,21.67,35.31,114.69
xcit_medium_24_p16_224_dist,819.59,1249.397,1024,224,16.13,31.71,84.4
xcit_medium_24_p16_224,818.73,1250.697,1024,224,16.13,31.71,84.4
gcvit_small,807.46,1268.151,1024,224,8.57,41.61,51.09
gluon_xception65,806.21,635.055,512,299,13.96,52.48,39.92
xception65,800.01,639.983,512,299,13.96,52.48,39.92
mvitv2_base,799.31,1281.092,1024,224,10.16,40.5,51.47
hrnet_w44,789.29,1297.348,1024,224,14.94,26.92,67.06
vit_base_patch16_plus_240,780.68,1311.665,1024,240,27.41,33.08,117.56
hrnet_w48,780.39,1312.147,1024,224,17.34,28.56,77.47
swinv2_tiny_window16_256,778.19,657.926,512,256,6.68,39.02,28.35
tresnet_m_448,775.99,1319.596,1024,448,22.94,29.21,31.39
xcit_small_12_p16_384_dist,760.88,1345.804,1024,384,14.14,36.51,26.25
vit_small_patch16_384,750.95,1022.685,768,384,15.52,50.78,22.2
maxvit_rmlp_small_rw_224,745.49,1373.585,1024,224,10.75,49.3,64.9
sequencer2d_l,742.48,1379.149,1024,224,9.74,22.12,54.3
swinv2_small_window8_256,738.39,1386.788,1024,256,11.58,40.14,49.73
swin_s3_base_224,730.45,1401.854,1024,224,13.69,48.26,71.13
poolformer_m48,729.44,1403.808,1024,224,11.59,29.17,73.47
densenet264d_iabn,727.43,1407.671,1024,224,13.47,14.0,72.74
vit_relpos_base_patch16_plus_240,723.43,1415.468,1024,240,27.3,34.33,117.38
dpn131,722.72,1416.854,1024,224,16.09,32.97,79.25
tnt_b_patch16_224,722.12,1418.026,1024,224,14.09,39.01,65.41
deit3_small_patch16_384,717.36,1070.572,768,384,15.52,50.78,22.21
deit3_small_patch16_384_in21ft1k,716.76,1071.477,768,384,15.52,50.78,22.21
swinv2_cr_base_224,715.64,1430.874,1024,224,15.86,59.66,87.88
eca_nfnet_l1,713.15,1435.867,1024,320,14.92,34.42,41.41
coatnet_2_rw_224,709.88,721.237,512,224,15.09,49.22,73.87
swinv2_cr_base_ns_224,709.69,1442.871,1024,224,15.86,59.66,87.88
coatnet_rmlp_2_rw_224,708.85,722.285,512,224,15.18,54.78,73.88
convit_base,706.65,1449.076,1024,224,17.52,31.77,86.54
mobilevitv2_175_384_in22ft1k,703.41,363.928,256,384,12.47,63.29,14.25
maxvit_small_tf_224,701.58,729.767,512,224,11.66,53.17,68.93
densenet264,701.03,1460.686,1024,224,12.95,12.8,72.69
ecaresnet200d,694.19,1475.094,1024,256,20.0,43.15,64.69
resnetv2_50x1_bitm,691.29,740.624,512,448,16.62,44.46,25.55
seresnet200d,691.25,1481.355,1024,256,20.01,43.15,71.86
xcit_tiny_24_p8_224,684.73,1495.467,1024,224,9.21,45.39,12.11
xcit_tiny_24_p8_224_dist,684.22,1496.573,1024,224,9.21,45.39,12.11
convnext_base,682.42,1500.518,1024,288,25.43,47.53,88.59
volo_d2_224,663.51,1543.3,1024,224,14.34,41.34,58.68
coatnet_2_224,660.84,581.062,384,224,16.5,52.67,74.68
legacy_senet154,654.15,1565.387,1024,224,20.77,38.69,115.09
gluon_senet154,654.04,1565.641,1024,224,20.77,38.69,115.09
senet154,653.94,1565.866,1024,224,20.77,38.69,115.09
xcit_nano_12_p8_384_dist,646.53,1583.823,1024,384,6.34,46.08,3.05
dpn107,646.38,1584.202,1024,224,18.38,33.46,86.92
nest_base,640.55,799.298,512,224,17.96,53.39,67.72
jx_nest_base,633.53,808.151,512,224,17.96,53.39,67.72
mobilevitv2_200_384_in22ft1k,626.31,408.731,256,384,16.24,72.34,18.45
xception71,619.72,826.163,512,299,18.09,69.92,42.34
hrnet_w64,618.15,1656.539,1024,224,28.97,35.09,128.06
resnet152d,618.09,1656.699,1024,320,24.08,47.67,60.21
regnetz_c16_evos,604.19,847.399,512,320,3.86,25.88,13.49
gcvit_base,594.61,1722.135,1024,224,14.87,55.48,90.32
regnety_160,594.3,1292.258,768,288,26.37,38.07,83.59
maxxvit_rmlp_small_rw_256,588.15,1741.023,1024,256,14.67,58.38,66.01
xcit_small_12_p8_224,582.04,1759.324,1024,224,18.69,47.21,26.21
xcit_small_12_p8_224_dist,581.74,1760.224,1024,224,18.69,47.21,26.21
maxvit_rmlp_small_rw_256,575.72,1333.976,768,256,14.15,66.09,64.9
regnetx_320,551.07,1393.631,768,224,31.81,36.3,107.81
seresnet152d,547.51,1870.27,1024,320,24.09,47.72,66.84
resnetrs152,544.33,1881.196,1024,320,24.34,48.14,86.62
vit_large_patch32_384,543.23,1884.997,1024,384,45.31,43.86,306.63
halonet_h1,540.47,473.65,256,256,3.0,51.17,8.1
seresnet269d,540.42,1894.818,1024,256,26.59,53.6,113.67
swinv2_base_window8_256,529.22,1451.182,768,256,20.37,52.59,87.92
maxxvitv2_rmlp_base_rw_224,523.43,1956.308,1024,224,24.2,62.77,116.09
resnext101_64x4d,521.77,1962.525,1024,288,25.66,51.59,83.46
regnetz_e8,521.5,1472.647,768,320,15.46,63.94,57.7
mixer_l16_224,518.26,1975.807,1024,224,44.6,41.69,208.2
vit_medium_patch16_gap_384,508.63,1006.611,512,384,26.08,67.54,39.03
swin_large_patch4_window7_224,501.11,1532.586,768,224,34.53,54.94,196.53
regnety_320,490.98,2085.591,1024,224,32.34,30.26,145.05
swinv2_small_window16_256,487.64,1049.932,512,256,12.82,66.29,49.73
seresnext101_32x8d,483.23,2119.074,1024,288,27.24,51.63,93.57
vit_small_patch8_224,478.05,1071.009,512,224,22.44,80.84,21.67
ig_resnext101_32x16d,477.64,2143.862,1024,224,36.27,51.18,194.03
swsl_resnext101_32x16d,476.69,2148.145,1024,224,36.27,51.18,194.03
ssl_resnext101_32x16d,476.06,2150.954,1024,224,36.27,51.18,194.03
seresnext101d_32x8d,475.05,2155.547,1024,288,27.64,52.95,93.59
nf_regnet_b5,470.14,1089.029,512,456,11.7,61.95,49.74
xcit_large_24_p16_224_dist,468.86,2184.017,1024,224,35.86,47.27,189.1
xcit_large_24_p16_224,468.75,2184.529,1024,224,35.86,47.27,189.1
volo_d3_224,463.72,2208.199,1024,224,20.78,60.09,86.33
nfnet_f1,463.52,2209.163,1024,320,35.97,46.77,132.63
efficientnet_b5,460.91,555.412,256,448,9.59,93.56,30.39
resnet200d,453.15,2259.739,1024,320,31.25,67.33,64.69
efficientnetv2_m,451.89,2266.018,1024,416,18.6,67.5,54.14
seresnextaa101d_32x8d,447.26,2289.498,1024,288,28.51,56.44,93.59
efficientnetv2_rw_m,437.1,1757.005,768,416,21.49,79.62,53.24
swinv2_cr_large_224,422.08,1819.551,768,224,35.1,78.42,196.68
coatnet_rmlp_3_rw_224,421.87,910.226,384,224,33.56,79.47,165.15
xcit_tiny_12_p8_384_dist,421.04,2432.044,1024,384,14.13,69.14,6.71
swinv2_cr_tiny_384,419.77,609.847,256,384,15.34,161.01,28.33
maxvit_rmlp_base_rw_224,419.03,1832.808,768,224,23.15,92.64,116.14
resnetv2_152x2_bit_teacher,418.89,2444.553,1024,224,46.95,45.11,236.34
resnetv2_101x1_bitm,418.36,1223.813,512,448,31.65,64.93,44.54
dm_nfnet_f1,409.02,1877.643,768,320,35.97,46.77,132.63
xcit_small_24_p16_384_dist,407.47,2513.062,1024,384,26.72,68.58,47.67
coatnet_3_rw_224,404.39,633.033,256,224,33.44,73.83,181.81
tf_efficientnet_b5,403.59,634.298,256,456,10.46,98.86,30.39
convnextv2_base,402.92,1270.715,512,288,25.43,47.53,88.72
resnetrs200,396.11,2585.123,1024,320,31.51,67.81,93.21
tresnet_l_448,395.6,2588.481,1024,448,43.5,47.56,55.99
eva_large_patch14_196,391.22,2617.408,1024,196,61.57,63.52,304.14
vit_large_patch16_224,389.92,2626.132,1024,224,61.6,63.52,304.33
regnetz_d8_evos,389.86,1969.937,768,320,7.03,38.92,23.46
maxvit_base_tf_224,387.71,1320.545,512,224,24.04,95.01,119.47
coatnet_3_224,387.35,660.882,256,224,36.56,79.01,166.97
crossvit_15_dagger_408,386.57,662.227,256,408,21.45,95.05,28.5
vit_base_patch16_18x2_224,384.3,2664.545,1024,224,52.51,71.38,256.73
deit3_large_patch16_224,376.93,2716.643,1024,224,61.6,63.52,304.37
deit3_large_patch16_224_in21ft1k,376.54,2719.504,1024,224,61.6,63.52,304.37
tf_efficientnetv2_m,374.38,2051.373,768,480,24.76,89.84,54.14
convnext_large,371.39,1378.579,512,288,56.87,71.29,197.77
beitv2_large_patch16_224,360.12,2843.465,1024,224,61.6,63.52,304.43
beit_large_patch16_224,359.86,2845.558,1024,224,61.6,63.52,304.43
swinv2_base_window12to16_192to256_22kft1k,359.31,1068.705,384,256,22.02,84.71,87.92
swinv2_base_window16_256,359.09,1069.342,384,256,22.02,84.71,87.92
eca_nfnet_l2,347.1,2212.621,768,384,30.05,68.28,56.72
flexivit_large,333.31,3072.173,1024,240,70.99,75.39,304.36
vit_large_r50_s32_384,332.86,3076.333,1024,384,57.43,76.52,329.09
maxxvitv2_rmlp_large_rw_224,330.79,3095.576,1024,224,44.14,87.15,215.42
resnest200e,317.25,3227.754,1024,320,35.69,82.78,70.2
maxvit_tiny_tf_384,317.22,807.002,256,384,17.53,123.42,30.98
convmixer_768_32,309.28,3310.892,1024,224,19.55,25.95,21.11
deit_base_patch16_384,306.13,1254.335,384,384,55.54,101.56,86.86
vit_base_patch16_384,306.13,1254.349,384,384,55.54,101.56,86.86
vit_base_patch16_clip_384,305.56,1256.673,384,384,55.54,101.56,86.86
xcit_small_24_p8_224_dist,305.18,3355.41,1024,224,35.81,90.78,47.63
deit_base_distilled_patch16_384,304.96,1259.16,384,384,55.65,101.82,87.63
xcit_small_24_p8_224,304.86,3358.887,1024,224,35.81,90.78,47.63
nasnetalarge,300.31,1278.679,384,331,23.89,90.56,88.75
volo_d1_384,299.05,1712.072,512,384,22.75,108.55,26.78
volo_d4_224,295.86,3461.069,1024,224,44.34,80.22,192.96
deit3_base_patch16_384,294.03,1305.985,384,384,55.54,101.56,86.88
deit3_base_patch16_384_in21ft1k,293.78,1307.085,384,384,55.54,101.56,86.88
tresnet_xl_448,292.43,2626.294,768,448,60.65,61.31,78.44
pnasnet5large,285.95,1342.894,384,331,25.04,92.89,86.06
vit_large_patch14_224,285.66,3584.705,1024,224,81.08,88.79,304.2
vit_large_patch14_clip_224,285.43,3587.599,1024,224,81.08,88.79,304.2
crossvit_18_dagger_408,283.82,901.967,256,408,32.47,124.87,44.61
xcit_medium_24_p16_384_dist,282.22,3628.317,1024,384,47.39,91.64,84.4
cait_xxs24_384,275.38,3718.492,1024,384,9.63,122.66,12.03
regnety_640,271.79,2825.663,768,224,64.16,42.5,281.38
maxvit_large_tf_224,268.97,1427.67,384,224,43.68,127.35,211.79
nfnet_f2,263.0,3893.59,1024,352,63.22,79.06,193.78
beit_base_patch16_384,260.66,1473.146,384,384,55.54,101.56,86.74
swinv2_cr_small_384,258.79,989.214,256,384,29.7,298.03,49.7
ecaresnet269d,257.79,3972.16,1024,352,50.25,101.25,102.09
resnetrs270,249.11,4110.633,1024,352,51.13,105.48,129.86
mvitv2_large,248.64,2059.181,512,224,43.87,112.02,217.99
efficientnet_b6,246.42,519.432,128,528,19.4,167.39,43.04
convnext_xlarge,241.35,2121.412,512,288,100.8,95.05,350.2
convnextv2_large,238.64,1072.708,256,288,56.87,71.29,197.96
tf_efficientnet_b6,236.4,541.434,128,528,19.4,167.39,43.04
swin_base_patch4_window12_384,235.04,816.885,192,384,47.19,134.78,87.9
dm_nfnet_f2,234.34,3277.279,768,352,63.22,79.06,193.78
coatnet_4_224,228.52,1120.23,256,224,62.48,129.26,275.43
vit_base_r50_s16_384,227.31,1689.303,384,384,67.43,135.03,98.95
efficientnetv2_l,221.97,2306.653,512,480,56.4,157.99,118.52
xcit_tiny_24_p8_384_dist,221.23,4628.611,1024,384,27.05,132.95,12.11
ig_resnext101_32x32d,220.61,2320.857,512,224,87.29,91.12,468.53
swinv2_large_window12to16_192to256_22kft1k,219.46,1166.485,256,256,47.81,121.53,196.74
tf_efficientnetv2_l,219.35,2334.183,512,480,56.4,157.99,118.52
resmlp_big_24_224,214.31,4778.166,1024,224,100.23,87.31,129.14
resmlp_big_24_224_in22ft1k,214.13,4782.043,1024,224,100.23,87.31,129.14
resmlp_big_24_distilled_224,214.04,4784.169,1024,224,100.23,87.31,129.14
xcit_medium_24_p8_224_dist,210.1,4873.763,1024,224,63.53,121.23,84.32
xcit_medium_24_p8_224,210.01,4875.864,1024,224,63.53,121.23,84.32
maxvit_small_tf_384,208.79,919.556,192,384,35.87,183.65,69.02
vit_base_patch8_224,199.59,1282.637,256,224,78.22,161.69,86.58
eca_nfnet_l3,199.58,2565.434,512,448,52.55,118.4,72.04
volo_d5_224,196.25,5217.924,1024,224,72.4,118.11,295.46
xcit_small_12_p8_384_dist,194.27,2635.521,512,384,54.92,138.29,26.21
cait_xs24_384,192.73,3984.863,768,384,19.28,183.98,26.67
swinv2_cr_base_384,184.92,1384.392,256,384,50.57,333.68,87.88
cait_xxs36_384,184.35,5554.56,1024,384,14.35,183.7,17.37
swinv2_cr_huge_224,183.61,2091.395,384,224,115.97,121.08,657.83
convnext_xxlarge,183.01,2098.268,384,224,151.66,95.29,846.47
coatnet_rmlp_2_rw_384,178.88,715.532,128,384,47.69,209.43,73.88
convmixer_1536_20,173.51,5901.752,1024,224,48.68,33.03,51.63
volo_d2_384,168.46,1519.603,256,384,46.17,184.51,58.87
resnetrs350,168.28,6085.136,1024,384,77.59,154.74,163.96
xcit_large_24_p16_384_dist,160.71,4778.847,768,384,105.35,137.17,189.1
resnetv2_152x2_bit_teacher_384,159.55,1604.488,256,384,136.16,132.56,236.34
maxvit_xlarge_tf_224,155.79,1643.178,256,224,97.49,191.02,474.95
maxvit_tiny_tf_512,155.64,822.373,128,512,33.49,257.59,31.05
regnety_1280,155.18,2474.502,384,224,127.66,71.58,644.81
vit_huge_patch14_224,154.03,6647.897,1024,224,167.43,139.43,658.75
vit_huge_patch14_clip_224,153.92,6652.944,1024,224,167.4,139.41,632.05
maxxvitv2_rmlp_base_rw_384,153.34,1669.502,256,384,72.98,213.74,116.09
efficientnetv2_xl,152.49,3357.61,512,512,93.85,247.32,208.12
tf_efficientnetv2_xl,151.4,2536.254,384,512,93.85,247.32,208.12
deit3_huge_patch14_224_in21ft1k,149.08,6868.834,1024,224,167.4,139.41,632.13
deit3_huge_patch14_224,149.01,6871.974,1024,224,167.4,139.41,632.13
cait_s24_384,148.46,3448.684,512,384,32.17,245.31,47.06
resnest269e,147.61,3468.584,512,416,77.69,171.98,110.93
nfnet_f3,147.43,3472.717,512,416,115.58,141.78,254.92
efficientnet_b7,142.41,674.084,96,600,38.33,289.94,66.35
resnetv2_50x3_bitm,138.27,1388.564,192,448,145.7,133.37,217.32
tf_efficientnet_b7,137.89,696.181,96,600,38.33,289.94,66.35
swin_large_patch4_window12_384,137.6,930.229,128,384,104.08,202.16,196.74
ig_resnext101_32x48d,132.29,2902.628,384,224,153.57,131.06,828.41
dm_nfnet_f3,127.59,4012.898,512,416,115.58,141.78,254.92
coatnet_5_224,125.18,1022.512,128,224,145.49,194.24,687.47
maxvit_rmlp_base_rw_384,121.26,2111.079,256,384,70.97,318.95,116.14
xcit_large_24_p8_224,119.97,6401.598,768,224,141.23,181.56,188.93
xcit_large_24_p8_224_dist,119.94,6403.17,768,224,141.23,181.56,188.93
resnetrs420,119.93,6403.598,768,416,108.45,213.79,191.89
resnetv2_152x2_bitm,117.33,2181.801,256,448,184.99,180.43,236.34
maxvit_base_tf_384,113.69,1688.826,192,384,73.8,332.9,119.65
swinv2_cr_large_384,113.07,1132.03,128,384,108.95,404.96,196.68
eva_large_patch14_336,102.65,2493.904,256,336,191.1,270.24,304.53
vit_large_patch14_clip_336,102.47,2498.286,256,336,191.11,270.24,304.53
vit_large_patch16_384,102.37,2500.639,256,384,191.21,270.24,304.72
xcit_small_24_p8_384_dist,102.36,5001.728,512,384,105.24,265.91,47.63
eva_giant_patch14_224,101.75,10063.521,1024,224,267.18,192.64,1012.56
vit_giant_patch14_224,100.42,7648.057,768,224,267.18,192.64,1012.61
vit_giant_patch14_clip_224,100.32,7655.265,768,224,267.18,192.64,1012.65
cait_s36_384,99.37,5152.338,512,384,47.99,367.4,68.37
deit3_large_patch16_384,99.34,2577.037,256,384,191.21,270.24,304.76
deit3_large_patch16_384_in21ft1k,99.27,2578.907,256,384,191.21,270.24,304.76
regnety_2560,97.99,2612.623,256,224,257.07,87.48,826.14
maxvit_small_tf_512,97.85,981.11,96,512,67.26,383.77,69.13
swinv2_base_window12to24_192to384_22kft1k,95.95,666.98,64,384,55.25,280.36,87.92
efficientnet_b8,95.3,1007.298,96,672,63.48,442.89,87.41
tf_efficientnet_b8,92.65,1036.1,96,672,63.48,442.89,87.41
beit_large_patch16_384,88.55,2890.891,256,384,191.21,270.24,305.0
resnetv2_101x3_bitm,83.1,2310.491,192,448,280.33,194.78,387.93
maxvit_large_tf_384,80.34,1593.284,128,384,132.55,445.84,212.03
nfnet_f4,79.54,4827.723,384,512,216.26,262.26,316.07
volo_d3_448,73.5,2612.274,192,448,96.33,446.83,86.63
dm_nfnet_f4,71.41,3584.699,256,512,216.26,262.26,316.07
xcit_medium_24_p8_384_dist,70.91,5415.294,384,384,186.67,354.73,84.32
swinv2_large_window12to24_192to384_22kft1k,60.84,788.97,48,384,116.15,407.83,196.74
vit_gigantic_patch14_clip_224,60.15,8511.823,512,224,483.96,275.37,1844.91
vit_gigantic_patch14_224,60.11,8517.291,512,224,483.95,275.37,1844.44
nfnet_f5,58.02,4412.387,256,544,290.97,349.71,377.21
vit_huge_patch14_clip_336,57.29,4468.831,256,336,390.97,407.54,632.46
convnextv2_huge,56.06,1712.576,96,384,337.96,232.35,660.29
volo_d4_448,54.47,2349.801,128,448,197.13,527.35,193.41
tf_efficientnet_l2,54.12,1182.593,64,475,172.11,609.89,480.31
maxvit_base_tf_512,52.65,1823.292,96,512,138.02,703.99,119.88
swinv2_cr_giant_224,52.12,2455.882,128,224,483.85,309.15,2598.76
dm_nfnet_f5,50.7,5049.339,256,544,290.97,349.71,377.21
swinv2_cr_huge_384,48.86,1309.971,64,384,352.04,583.18,657.94
maxvit_xlarge_tf_384,46.24,2076.289,96,384,292.78,668.76,475.32
nfnet_f6,44.3,5778.548,256,576,378.69,452.2,438.36
xcit_large_24_p8_384_dist,40.2,6368.127,256,384,415.0,531.82,188.93
eva_giant_patch14_336,39.77,6436.237,256,336,620.64,550.67,1013.01
dm_nfnet_f6,39.62,6461.626,256,576,378.69,452.2,438.36
maxvit_large_tf_512,38.67,1654.908,64,512,244.75,942.15,212.33
volo_d5_448,37.56,3408.043,128,448,315.06,737.92,295.91
beit_large_patch16_512,35.36,2715.28,96,512,362.24,656.39,305.67
nfnet_f7,34.74,7370.0,256,608,480.39,570.85,499.5
cait_m36_384,32.36,7912.123,256,384,173.11,734.81,271.22
resnetv2_152x4_bitm,30.0,4266.89,128,480,844.84,414.26,936.53
volo_d5_512,26.35,4857.602,128,512,425.09,1105.37,296.09
maxvit_xlarge_tf_512,23.12,2076.455,48,512,534.14,1413.22,475.77
efficientnet_l2,21.26,1505.032,32,800,479.12,1707.39,480.31
swinv2_cr_giant_384,15.03,2129.6,32,384,1450.71,1394.86,2598.76
cait_m48_448,13.69,9353.048,128,448,329.41,1708.23,356.46
eva_giant_patch14_560,10.36,4631.037,48,560,1906.76,2577.17,1014.45
1 model infer_samples_per_sec infer_step_time infer_batch_size infer_img_size infer_gmacs infer_macts param_count
2 tinynet_e 49277.65 20.77 1024 106 0.03 0.69 2.04
3 mobilenetv3_small_050 45562.75 22.464 1024 224 0.03 0.92 1.59
4 lcnet_035 41026.68 24.949 1024 224 0.03 1.04 1.64
5 lcnet_050 37575.13 27.242 1024 224 0.05 1.26 1.88
6 mobilenetv3_small_075 33062.39 30.961 1024 224 0.05 1.3 2.04
7 mobilenetv3_small_100 30012.26 34.109 1024 224 0.06 1.42 2.54
8 tf_mobilenetv3_small_minimal_100 28698.14 35.672 1024 224 0.06 1.41 2.04
9 tf_mobilenetv3_small_075 27407.51 37.352 1024 224 0.05 1.3 2.04
10 tinynet_d 27236.47 37.585 1024 152 0.05 1.42 2.34
11 tf_mobilenetv3_small_100 25103.65 40.781 1024 224 0.06 1.42 2.54
12 lcnet_075 24140.95 42.406 1024 224 0.1 1.99 2.36
13 mnasnet_small 20706.43 49.443 1024 224 0.07 2.16 2.03
14 levit_128s 20595.72 49.709 1024 224 0.31 1.88 7.78
15 lcnet_100 19684.75 52.01 1024 224 0.16 2.52 2.95
16 mobilenetv2_035 18358.82 55.767 1024 224 0.07 2.86 1.68
17 regnetx_002 18244.04 56.117 1024 224 0.2 2.16 2.68
18 ghostnet_050 17564.96 58.287 1024 224 0.05 1.77 2.59
19 regnety_002 17006.07 60.202 1024 224 0.2 2.17 3.16
20 mnasnet_050 15925.32 64.29 1024 224 0.11 3.07 2.22
21 vit_tiny_r_s16_p8_224 15068.38 67.946 1024 224 0.44 2.06 6.34
22 mobilenetv2_050 14843.74 68.974 1024 224 0.1 3.64 1.97
23 tinynet_c 14634.69 69.959 1024 184 0.11 2.87 2.46
24 semnasnet_050 14248.78 71.855 1024 224 0.11 3.44 2.08
25 levit_128 14164.26 72.284 1024 224 0.41 2.71 9.21
26 vit_small_patch32_224 13811.36 74.131 1024 224 1.15 2.5 22.88
27 mixer_s32_224 13352.85 76.677 1024 224 1.0 2.28 19.1
28 cs3darknet_focus_s 12798.44 79.999 1024 256 0.69 2.7 3.27
29 lcnet_150 12783.12 80.094 1024 224 0.34 3.79 4.5
30 cs3darknet_s 12395.11 82.602 1024 256 0.72 2.97 3.28
31 regnetx_004 12366.39 82.791 1024 224 0.4 3.14 5.16
32 mobilenetv3_large_075 12001.32 85.313 1024 224 0.16 4.0 3.99
33 levit_192 11882.81 86.163 1024 224 0.66 3.2 10.95
34 resnet10t 11615.84 88.145 1024 224 1.1 2.43 5.44
35 ese_vovnet19b_slim_dw 11539.4 88.729 1024 224 0.4 5.28 1.9
36 gernet_s 11496.77 89.058 1024 224 0.75 2.65 8.17
37 mobilenetv3_rw 10873.77 94.16 1024 224 0.23 4.41 5.48
38 mobilenetv3_large_100 10705.06 95.645 1024 224 0.23 4.41 5.48
39 hardcorenas_a 10554.34 97.012 1024 224 0.23 4.38 5.26
40 tf_mobilenetv3_large_075 10511.12 97.41 1024 224 0.16 4.0 3.99
41 tf_mobilenetv3_large_minimal_100 10371.16 98.725 1024 224 0.22 4.4 3.92
42 mnasnet_075 10345.17 98.972 1024 224 0.23 4.77 3.17
43 hardcorenas_b 9695.74 105.601 1024 224 0.26 5.09 5.18
44 regnety_004 9655.22 106.046 1024 224 0.41 3.89 4.34
45 ghostnet_100 9483.99 107.96 1024 224 0.15 3.55 5.18
46 hardcorenas_c 9481.05 107.994 1024 224 0.28 5.01 5.52
47 tf_mobilenetv3_large_100 9456.79 108.271 1024 224 0.23 4.41 5.48
48 regnetx_006 9408.22 108.83 1024 224 0.61 3.98 6.2
49 mobilenetv2_075 9313.88 109.932 1024 224 0.22 5.86 2.64
50 tinynet_b 9291.99 110.191 1024 188 0.21 4.44 3.73
51 mnasnet_b1 9286.4 110.258 1024 224 0.33 5.46 4.38
52 mnasnet_100 9263.52 110.53 1024 224 0.33 5.46 4.38
53 gluon_resnet18_v1b 9078.31 112.785 1024 224 1.82 2.48 11.69
54 semnasnet_075 9069.42 112.895 1024 224 0.23 5.54 2.91
55 resnet18 9045.63 113.192 1024 224 1.82 2.48 11.69
56 ssl_resnet18 9045.4 113.196 1024 224 1.82 2.48 11.69
57 swsl_resnet18 9040.4 113.258 1024 224 1.82 2.48 11.69
58 levit_256 8921.47 114.768 1024 224 1.13 4.23 18.89
59 hardcorenas_d 8879.46 115.311 1024 224 0.3 4.93 7.5
60 regnety_006 8666.48 118.144 1024 224 0.61 4.33 6.06
61 seresnet18 8542.99 119.851 1024 224 1.82 2.49 11.78
62 mobilenetv2_100 8507.29 120.356 1024 224 0.31 6.68 3.5
63 spnasnet_100 8342.04 122.741 1024 224 0.35 6.03 4.42
64 legacy_seresnet18 8310.8 123.202 1024 224 1.82 2.49 11.78
65 semnasnet_100 8284.16 123.599 1024 224 0.32 6.23 3.89
66 mnasnet_a1 8283.57 123.607 1024 224 0.32 6.23 3.89
67 regnetx_008 7852.75 130.39 1024 224 0.81 5.15 7.26
68 hardcorenas_f 7809.07 131.117 1024 224 0.35 5.57 8.2
69 hardcorenas_e 7730.97 132.444 1024 224 0.35 5.65 8.07
70 efficientnet_lite0 7722.75 132.584 1024 224 0.4 6.74 4.65
71 levit_256d 7689.03 133.165 1024 224 1.4 4.93 26.21
72 xcit_nano_12_p16_224_dist 7674.8 133.413 1024 224 0.56 4.17 3.05
73 xcit_nano_12_p16_224 7670.11 133.492 1024 224 0.56 4.17 3.05
74 resnet18d 7636.48 134.082 1024 224 2.06 3.29 11.71
75 ghostnet_130 7625.58 134.274 1024 224 0.24 4.6 7.36
76 tf_efficientnetv2_b0 7614.25 134.473 1024 224 0.73 4.77 7.14
77 ese_vovnet19b_slim 7588.4 134.932 1024 224 1.69 3.52 3.17
78 deit_tiny_distilled_patch16_224 7449.3 137.451 1024 224 1.27 6.01 5.91
79 deit_tiny_patch16_224 7398.73 138.391 1024 224 1.26 5.97 5.72
80 vit_tiny_patch16_224 7390.78 138.538 1024 224 1.26 5.97 5.72
81 regnety_008 7366.88 138.989 1024 224 0.81 5.25 6.26
82 tinynet_a 7358.6 139.145 1024 192 0.35 5.41 6.19
83 dla46_c 7311.64 140.038 1024 224 0.58 4.5 1.3
84 fbnetc_100 7303.94 140.187 1024 224 0.4 6.51 5.57
85 mobilevitv2_050 7248.37 141.262 1024 256 0.48 8.04 1.37
86 tf_efficientnet_lite0 6816.26 150.218 1024 224 0.4 6.74 4.65
87 pit_ti_distilled_224 6788.49 150.832 1024 224 0.71 6.23 5.1
88 pit_ti_224 6762.99 151.401 1024 224 0.7 6.19 4.85
89 efficientnet_b0 6687.26 153.115 1024 224 0.4 6.75 5.29
90 visformer_tiny 6618.81 154.698 1024 224 1.27 5.72 10.32
91 rexnet_100 6608.65 154.937 1024 224 0.41 7.44 4.8
92 mnasnet_140 6580.58 155.597 1024 224 0.6 7.71 7.12
93 efficientnet_b1_pruned 6513.48 157.201 1024 240 0.4 6.21 6.33
94 rexnetr_100 6491.35 157.737 1024 224 0.43 7.72 4.88
95 mobilenetv2_110d 6395.98 160.089 1024 224 0.45 8.71 4.52
96 resnet14t 6341.58 161.462 1024 224 1.69 5.8 10.08
97 regnetz_005 6208.75 164.916 1024 224 0.52 5.86 7.12
98 dla46x_c 6145.64 166.61 1024 224 0.54 5.66 1.07
99 nf_regnet_b0 6055.0 169.104 1024 256 0.64 5.58 8.76
100 tf_efficientnet_b0 5992.76 170.862 1024 224 0.4 6.75 5.29
101 hrnet_w18_small 5908.15 173.308 1024 224 1.61 5.72 13.19
102 edgenext_xx_small 5886.07 173.957 1024 288 0.33 4.21 1.33
103 semnasnet_140 5856.63 174.833 1024 224 0.6 8.87 6.11
104 resnetblur18 5839.81 175.336 1024 224 2.34 3.39 11.69
105 ese_vovnet19b_dw 5825.11 175.779 1024 224 1.34 8.25 6.54
106 dla60x_c 5790.89 176.817 1024 224 0.59 6.01 1.32
107 mobilenetv2_140 5780.41 177.139 1024 224 0.6 9.57 6.11
108 skresnet18 5648.81 181.265 1024 224 1.82 3.24 11.96
109 mobilevit_xxs 5528.18 185.22 1024 256 0.42 8.34 1.27
110 efficientnet_b0_gn 5401.88 189.551 1024 224 0.42 6.75 5.29
111 convnext_atto 5364.13 190.886 1024 288 0.91 6.3 3.7
112 gluon_resnet34_v1b 5344.34 191.593 1024 224 3.67 3.74 21.8
113 resnet34 5335.05 191.926 1024 224 3.67 3.74 21.8
114 efficientnet_lite1 5334.12 191.959 1024 240 0.62 10.14 5.42
115 tv_resnet34 5332.7 192.011 1024 224 3.67 3.74 21.8
116 vit_base_patch32_224 5287.0 193.67 1024 224 4.41 5.01 88.22
117 vit_base_patch32_clip_224 5281.4 193.877 1024 224 4.41 5.01 88.22
118 levit_384 5276.74 194.047 1024 224 2.36 6.26 39.13
119 pit_xs_distilled_224 5241.4 195.357 1024 224 1.41 7.76 11.0
120 pit_xs_224 5237.09 195.517 1024 224 1.4 7.71 10.62
121 selecsls42 5225.99 195.932 1024 224 2.94 4.62 30.35
122 selecsls42b 5201.55 196.853 1024 224 2.98 4.62 32.46
123 gernet_m 5124.67 199.807 1024 224 3.02 5.24 21.14
124 pvt_v2_b0 5122.72 199.882 1024 224 0.57 7.99 3.67
125 tf_efficientnetv2_b1 5122.21 199.903 1024 240 1.21 7.34 8.14
126 mixnet_s 5079.84 201.57 1024 224 0.25 6.25 4.13
127 convnext_atto_ols 5062.64 202.255 1024 288 0.96 6.8 3.7
128 seresnet34 5028.88 203.611 1024 224 3.67 3.74 21.96
129 rexnetr_130 5003.96 204.626 1024 224 0.68 9.81 7.61
130 fbnetv3_b 5003.0 204.666 1024 256 0.55 9.1 8.6
131 mixer_b32_224 4982.51 205.508 1024 224 3.24 6.29 60.29
132 xcit_tiny_12_p16_224_dist 4879.26 209.853 1024 224 1.24 6.29 6.72
133 legacy_seresnet34 4875.12 210.034 1024 224 3.67 3.74 21.96
134 xcit_tiny_12_p16_224 4870.16 210.244 1024 224 1.24 6.29 6.72
135 resnet34d 4834.78 211.786 1024 224 3.91 4.54 21.82
136 tf_efficientnet_lite1 4822.03 212.348 1024 240 0.62 10.14 5.42
137 resnet26 4794.98 213.545 1024 224 2.36 7.35 16.0
138 mobilenetv2_120d 4786.27 213.934 1024 224 0.69 11.97 5.83
139 rexnet_130 4770.1 214.659 1024 224 0.68 9.71 7.56
140 efficientnet_b0_g16_evos 4743.69 215.854 1024 224 1.01 7.42 8.11
141 efficientnet_es 4736.89 216.163 1024 224 1.81 8.73 5.44
142 efficientnet_es_pruned 4735.25 216.239 1024 224 1.81 8.73 5.44
143 tf_mixnet_s 4735.17 216.242 1024 224 0.25 6.25 4.13
144 gmlp_ti16_224 4709.0 217.445 1024 224 1.34 7.55 5.87
145 convnext_femto 4672.08 219.162 1024 288 1.3 7.56 5.22
146 mobilevitv2_075 4638.17 220.764 1024 256 1.05 12.06 2.87
147 resmlp_12_224 4601.92 222.504 1024 224 3.01 5.5 15.35
148 resmlp_12_distilled_224 4597.97 222.695 1024 224 3.01 5.5 15.35
149 gmixer_12_224 4543.02 225.388 1024 224 2.67 7.26 12.7
150 fbnetv3_d 4532.2 225.927 1024 256 0.68 11.1 10.31
151 tf_efficientnet_es 4518.93 226.591 1024 224 1.81 8.73 5.44
152 selecsls60 4510.1 227.034 1024 224 3.59 5.52 30.67
153 mixer_s16_224 4509.29 227.075 1024 224 3.79 5.97 18.53
154 regnetx_016 4507.02 227.189 1024 224 1.62 7.93 9.19
155 selecsls60b 4490.35 228.033 1024 224 3.63 5.52 32.77
156 cs3darknet_focus_m 4487.64 228.171 1024 288 2.51 6.19 9.3
157 dla34 4481.03 228.505 1024 224 3.07 5.02 15.74
158 crossvit_tiny_240 4476.83 228.722 1024 240 1.57 9.08 7.01
159 convnext_femto_ols 4473.25 228.904 1024 288 1.35 8.06 5.23
160 vit_tiny_r_s16_p8_384 4463.13 229.423 1024 384 1.34 6.49 6.36
161 cs3darknet_m 4452.94 229.949 1024 288 2.63 6.69 9.31
162 repvgg_b0 4433.11 230.978 1024 224 3.41 6.15 15.82
163 resnet26d 4354.59 235.143 1024 224 2.6 8.15 16.01
164 rexnetr_150 4349.97 235.392 1024 224 0.89 11.13 9.78
165 resnetaa34d 4309.77 237.588 1024 224 4.43 5.07 21.82
166 efficientnet_b2_pruned 4309.58 237.598 1024 260 0.73 9.13 8.31
167 darknet17 4296.61 238.316 1024 256 3.26 7.18 14.3
168 vit_small_patch32_384 4250.58 240.897 1024 384 3.45 8.25 22.92
169 crossvit_9_240 4201.98 243.683 1024 240 1.85 9.52 8.55
170 nf_resnet26 4197.39 243.949 1024 224 2.41 7.35 16.0
171 efficientnet_b0_g8_gn 4190.39 244.357 1024 224 0.66 6.75 6.56
172 rexnet_150 4186.31 244.594 1024 224 0.9 11.21 9.73
173 ecaresnet50d_pruned 4182.62 244.81 1024 224 2.53 6.43 19.94
174 efficientformer_l1 4075.83 251.225 1024 224 1.3 5.53 12.29
175 poolformer_s12 4050.19 252.815 1024 224 1.82 5.53 11.92
176 regnety_016 4035.9 253.712 1024 224 1.63 8.04 11.2
177 efficientnet_lite2 4013.48 255.128 1024 260 0.89 12.9 6.09
178 crossvit_9_dagger_240 3992.98 256.437 1024 240 1.99 9.97 8.78
179 efficientnet_cc_b0_8e 3929.29 260.595 1024 224 0.42 9.42 24.01
180 efficientnet_cc_b0_4e 3918.01 261.346 1024 224 0.41 9.42 13.31
181 darknet21 3914.26 261.596 1024 256 3.93 7.47 20.86
182 efficientnet_b1 3876.9 264.116 1024 256 0.77 12.22 7.79
183 tf_efficientnet_b1 3834.3 267.052 1024 240 0.71 10.88 7.79
184 resnest14d 3793.21 269.944 1024 224 2.76 7.33 10.61
185 sedarknet21 3784.73 270.549 1024 256 3.93 7.47 20.95
186 resnext26ts 3775.5 271.211 1024 256 2.43 10.52 10.3
187 tf_efficientnetv2_b2 3727.06 274.735 1024 260 1.72 9.84 10.1
188 convnext_pico 3702.78 276.537 1024 288 2.27 10.08 9.05
189 edgenext_x_small 3692.42 277.311 1024 288 0.68 7.5 2.34
190 tf_efficientnet_cc_b0_8e 3691.33 277.395 1024 224 0.42 9.42 24.01
191 dpn48b 3689.99 277.494 1024 224 1.69 8.92 9.13
192 eca_resnext26ts 3675.59 278.583 1024 256 2.43 10.52 10.3
193 seresnext26ts 3670.33 278.98 1024 256 2.43 10.52 10.39
194 tf_efficientnet_cc_b0_4e 3665.41 279.357 1024 224 0.41 9.42 13.31
195 tf_efficientnet_lite2 3662.0 279.618 1024 260 0.89 12.9 6.09
196 nf_ecaresnet26 3619.99 282.862 1024 224 2.41 7.36 16.0
197 nf_seresnet26 3618.8 282.955 1024 224 2.41 7.36 17.4
198 gcresnext26ts 3594.7 284.852 1024 256 2.43 10.53 10.48
199 mobilevitv2_100 3589.19 213.964 768 256 1.84 16.08 4.9
200 gernet_l 3556.24 287.933 1024 256 4.57 8.0 31.08
201 legacy_seresnext26_32x4d 3545.88 288.774 1024 224 2.49 9.39 16.79
202 convnext_pico_ols 3532.27 289.886 1024 288 2.37 10.74 9.06
203 resnet26t 3503.33 292.28 1024 256 3.35 10.52 16.01
204 repvgg_a2 3454.82 296.386 1024 224 5.7 6.26 28.21
205 mixnet_m 3418.52 299.526 1024 224 0.36 8.19 5.01
206 efficientnet_b3_pruned 3356.7 305.049 1024 300 1.04 11.86 9.86
207 nf_regnet_b1 3352.23 305.456 1024 288 1.02 9.2 10.22
208 ecaresnext50t_32x4d 3339.2 306.649 1024 224 2.7 10.09 15.41
209 ecaresnext26t_32x4d 3337.18 306.833 1024 224 2.7 10.09 15.41
210 seresnext26tn_32x4d 3327.66 307.711 1024 224 2.7 10.09 16.81
211 seresnext26t_32x4d 3327.23 307.751 1024 224 2.7 10.09 16.81
212 seresnext26d_32x4d 3303.57 309.954 1024 224 2.73 10.19 16.81
213 tf_mixnet_m 3301.19 310.17 1024 224 0.36 8.19 5.01
214 convit_tiny 3286.62 311.554 1024 224 1.26 7.94 5.71
215 mobilevit_xs 3278.19 234.265 768 256 1.05 16.33 2.32
216 pit_s_224 3268.88 313.245 1024 224 2.88 11.56 23.46
217 pit_s_distilled_224 3266.72 313.452 1024 224 2.9 11.64 24.04
218 skresnet34 3242.45 315.8 1024 224 3.67 5.13 22.28
219 eca_botnext26ts_256 3224.24 317.583 1024 256 2.46 11.6 10.59
220 ecaresnet101d_pruned 3223.88 317.616 1024 224 3.48 7.69 24.88
221 deit_small_distilled_patch16_224 3220.79 317.922 1024 224 4.63 12.02 22.44
222 ecaresnetlight 3215.57 318.439 1024 224 4.11 8.42 30.16
223 deit_small_patch16_224 3209.05 319.085 1024 224 4.61 11.95 22.05
224 vit_small_patch16_224 3199.98 319.99 1024 224 4.61 11.95 22.05
225 eca_halonext26ts 3173.71 322.639 1024 256 2.44 11.46 10.76
226 convnextv2_atto 3162.98 323.733 1024 288 0.91 6.3 3.71
227 resnetv2_50 3158.28 324.214 1024 224 4.11 11.11 25.55
228 nf_regnet_b2 3133.63 326.765 1024 272 1.22 9.27 14.31
229 rexnetr_200 3133.12 245.111 768 224 1.59 15.11 16.52
230 botnet26t_256 3123.98 327.772 1024 256 3.32 11.98 12.49
231 coat_lite_tiny 3113.54 328.874 1024 224 1.6 11.65 5.72
232 vit_small_r26_s32_224 3112.34 329.001 1024 224 3.56 9.85 36.43
233 bat_resnext26ts 3103.95 329.89 1024 256 2.53 12.51 10.73
234 halonet26t 3103.39 329.95 1024 256 3.19 11.69 12.48
235 pvt_v2_b1 3095.14 330.828 1024 224 2.12 15.39 14.01
236 cspresnet50 3063.22 334.278 1024 256 4.54 11.5 21.62
237 resnet32ts 3055.79 335.09 1024 256 4.63 11.58 17.96
238 rexnet_200 3051.5 251.668 768 224 1.56 14.91 16.37
239 lambda_resnet26t 3046.2 336.144 1024 256 3.02 11.87 10.96
240 ssl_resnet50 3030.48 337.887 1024 224 4.11 11.11 25.56
241 gluon_resnet50_v1b 3027.43 338.23 1024 224 4.11 11.11 25.56
242 tv_resnet50 3027.39 338.232 1024 224 4.11 11.11 25.56
243 swsl_resnet50 3027.07 338.268 1024 224 4.11 11.11 25.56
244 resnet50 3025.4 338.455 1024 224 4.11 11.11 25.56
245 deit3_small_patch16_224_in21ft1k 3023.02 338.721 1024 224 4.61 11.95 22.06
246 deit3_small_patch16_224 3017.77 339.312 1024 224 4.61 11.95 22.06
247 tresnet_m 3006.54 340.578 1024 224 5.74 7.31 31.39
248 resnet33ts 3005.78 340.665 1024 256 4.76 11.66 19.68
249 vit_small_resnet26d_224 2994.08 341.995 1024 224 5.07 11.12 63.61
250 resnetv2_50t 2989.06 342.569 1024 224 4.32 11.82 25.57
251 regnetx_032 2988.15 342.675 1024 224 3.2 11.37 15.3
252 dpn68b 2981.13 343.481 1024 224 2.35 10.47 12.61
253 hrnet_w18_small_v2 2978.67 343.765 1024 224 2.62 9.65 15.6
254 dpn68 2975.29 344.155 1024 224 2.35 10.47 12.61
255 resnetv2_50d 2971.15 344.633 1024 224 4.35 11.92 25.57
256 efficientnet_em 2938.12 348.51 1024 240 3.04 14.34 6.9
257 vit_base_patch32_plus_256 2934.64 348.925 1024 256 7.79 7.76 119.48
258 coat_lite_mini 2921.75 350.462 1024 224 2.0 12.25 11.01
259 tf_efficientnet_b2 2919.63 350.718 1024 260 1.02 13.83 9.11
260 seresnet33ts 2919.51 350.732 1024 256 4.76 11.66 19.78
261 eca_resnet33ts 2917.21 351.008 1024 256 4.76 11.66 19.68
262 haloregnetz_b 2890.29 354.276 1024 224 1.97 11.94 11.68
263 coatnet_pico_rw_224 2884.58 354.98 1024 224 2.05 14.62 10.85
264 dla60 2883.99 355.049 1024 224 4.26 10.16 22.04
265 gluon_resnet50_v1c 2872.58 356.463 1024 224 4.35 11.92 25.58
266 resnet50t 2869.49 356.844 1024 224 4.32 11.82 25.57
267 gcresnet33ts 2863.36 357.609 1024 256 4.76 11.68 19.88
268 gluon_resnet50_v1d 2853.24 358.879 1024 224 4.35 11.92 25.58
269 cspresnet50d 2852.98 358.911 1024 256 4.86 12.55 21.64
270 resnet50d 2850.55 359.218 1024 224 4.35 11.92 25.58
271 vovnet39a 2845.31 359.878 1024 224 7.09 6.73 22.6
272 cspresnet50w 2835.31 361.148 1024 256 5.04 12.19 28.12
273 vgg11 2827.53 362.143 1024 224 7.61 7.44 132.86
274 tf_efficientnet_em 2826.28 362.303 1024 240 3.04 14.34 6.9
275 visformer_small 2818.88 363.251 1024 224 4.88 11.43 40.22
276 vit_relpos_small_patch16_224 2792.87 366.637 1024 224 4.59 13.05 21.98
277 vit_relpos_base_patch32_plus_rpn_256 2784.26 367.771 1024 256 7.68 8.01 119.42
278 vit_srelpos_small_patch16_224 2781.72 368.106 1024 224 4.59 12.16 21.97
279 resnest26d 2772.97 369.267 1024 224 3.64 9.97 17.07
280 cs3darknet_focus_l 2770.5 369.596 1024 288 5.9 10.16 21.15
281 efficientnet_b2a 2767.64 369.979 1024 288 1.12 16.2 9.11
282 efficientnet_b2 2766.98 370.065 1024 288 1.12 16.2 9.11
283 ese_vovnet39b 2760.12 370.986 1024 224 7.09 6.74 24.57
284 legacy_seresnet50 2753.49 371.881 1024 224 3.88 10.6 28.09
285 densenet121 2749.79 372.378 1024 224 2.87 6.9 7.98
286 tv_densenet121 2747.16 372.735 1024 224 2.87 6.9 7.98
287 eca_vovnet39b 2736.53 374.185 1024 224 7.09 6.74 22.6
288 coatnet_nano_cc_224 2716.19 376.986 1024 224 2.24 15.02 13.76
289 convnextv2_femto 2710.95 377.714 1024 288 1.3 7.56 5.23
290 resnetv2_50x1_bit_distilled 2704.93 378.554 1024 224 4.23 11.11 25.55
291 selecsls84 2697.2 379.64 1024 224 5.9 7.57 50.95
292 flexivit_small 2693.55 380.153 1024 240 5.35 14.18 22.06
293 twins_svt_small 2691.25 380.48 1024 224 2.94 13.75 24.06
294 mixnet_l 2678.25 382.327 1024 224 0.58 10.84 7.33
295 seresnet50 2674.61 382.848 1024 224 4.11 11.13 28.09
296 xcit_nano_12_p16_384_dist 2668.39 383.74 1024 384 1.64 12.15 3.05
297 cs3darknet_l 2649.93 386.412 1024 288 6.16 10.83 21.16
298 coatnet_nano_rw_224 2633.36 388.844 1024 224 2.41 15.41 15.14
299 coatnext_nano_rw_224 2627.24 389.75 1024 224 2.47 12.8 14.7
300 xcit_tiny_24_p16_224_dist 2617.14 391.253 1024 224 2.34 11.82 12.12
301 densenet121d 2616.98 391.278 1024 224 3.11 7.7 8.0
302 xcit_tiny_24_p16_224 2614.91 391.584 1024 224 2.34 11.82 12.12
303 resnet50_gn 2599.07 393.975 1024 224 4.14 11.11 25.56
304 vit_relpos_small_patch16_rpn_224 2596.73 394.33 1024 224 4.59 13.05 21.97
305 res2net50_48w_2s 2593.21 394.865 1024 224 4.18 11.72 25.29
306 mobilevit_s 2587.93 296.749 768 256 2.03 19.94 5.58
307 convnext_nano 2579.36 396.983 1024 288 4.06 13.84 15.59
308 tf_mixnet_l 2577.4 397.288 1024 224 0.58 10.84 7.33
309 resnetaa50d 2573.35 397.912 1024 224 5.39 12.44 25.58
310 vgg11_bn 2556.04 400.607 1024 224 7.62 7.44 132.87
311 seresnet50t 2550.33 401.504 1024 224 4.32 11.83 28.1
312 ecaresnet50d 2544.16 402.478 1024 224 4.35 11.93 25.58
313 gcvit_xxtiny 2518.13 406.639 1024 224 2.14 15.36 12.0
314 cs3sedarknet_l 2502.51 409.176 1024 288 6.16 10.83 21.91
315 resnetrs50 2497.73 409.96 1024 224 4.48 12.14 35.69
316 mobilevitv2_125 2489.87 308.438 768 256 2.86 20.1 7.48
317 resnetblur50 2484.87 412.08 1024 224 5.16 12.02 25.56
318 cspresnext50 2483.24 412.352 1024 256 4.05 15.86 20.57
319 gluon_resnet50_v1s 2459.02 416.413 1024 224 5.47 13.52 25.68
320 efficientnet_cc_b1_8e 2458.85 416.443 1024 240 0.75 15.44 39.72
321 vit_base_resnet26d_224 2458.01 416.584 1024 224 6.97 13.16 101.4
322 densenetblur121d 2444.58 418.873 1024 224 3.11 7.9 8.0
323 tv_resnext50_32x4d 2431.41 421.143 1024 224 4.26 14.4 25.03
324 ssl_resnext50_32x4d 2431.35 421.155 1024 224 4.26 14.4 25.03
325 swsl_resnext50_32x4d 2430.87 421.236 1024 224 4.26 14.4 25.03
326 resnext50_32x4d 2429.56 421.462 1024 224 4.26 14.4 25.03
327 gluon_resnext50_32x4d 2428.35 421.674 1024 224 4.26 14.4 25.03
328 dla60x 2414.82 424.035 1024 224 3.54 13.8 17.35
329 efficientnet_lite3 2407.43 212.664 512 300 1.65 21.85 8.2
330 regnetx_040 2406.98 425.416 1024 224 3.99 12.2 22.12
331 semobilevit_s 2404.63 319.371 768 256 2.03 19.95 5.74
332 gcresnext50ts 2402.57 426.196 1024 256 3.75 15.46 15.67
333 regnety_040s_gn 2385.11 429.317 1024 224 4.03 12.29 20.65
334 resnetblur50d 2367.52 432.507 1024 224 5.4 12.82 25.58
335 vovnet57a 2360.79 433.737 1024 224 8.95 7.52 36.64
336 tf_efficientnet_cc_b1_8e 2357.71 434.307 1024 240 0.75 15.44 39.72
337 resmlp_24_distilled_224 2351.85 435.39 1024 224 5.96 10.91 30.02
338 resmlp_24_224 2345.81 436.509 1024 224 5.96 10.91 30.02
339 res2net50_14w_8s 2341.48 437.317 1024 224 4.21 13.28 25.06
340 coatnet_rmlp_nano_rw_224 2340.53 437.494 1024 224 2.62 20.34 15.15
341 sehalonet33ts 2339.44 328.271 768 256 3.55 14.7 13.69
342 res2net50_26w_4s 2338.49 437.876 1024 224 4.28 12.61 25.7
343 convnext_nano_ols 2328.37 439.779 1024 288 4.38 15.5 15.65
344 lambda_resnet26rpt_256 2324.88 165.158 384 256 3.16 11.87 10.99
345 gmixer_24_224 2324.82 440.451 1024 224 5.28 14.45 24.72
346 gcresnet50t 2321.78 441.028 1024 256 5.42 14.67 25.9
347 resnext50d_32x4d 2317.05 441.929 1024 224 4.5 15.2 25.05
348 resnest50d_1s4x24d 2309.9 443.296 1024 224 4.43 13.57 25.68
349 seresnetaa50d 2309.78 443.319 1024 224 5.4 12.46 28.11
350 dla60_res2net 2301.91 444.834 1024 224 4.15 12.34 20.85
351 vit_base_r26_s32_224 2301.77 444.864 1024 224 6.81 12.36 101.38
352 twins_pcpvt_small 2290.09 447.132 1024 224 3.83 18.08 24.11
353 regnetz_b16 2286.62 447.81 1024 288 2.39 16.43 9.72
354 ese_vovnet57b 2267.23 451.64 1024 224 8.95 7.52 38.61
355 gluon_inception_v3 2265.31 452.024 1024 299 5.73 8.97 23.83
356 inception_v3 2260.97 452.888 1024 299 5.73 8.97 23.83
357 adv_inception_v3 2258.89 453.305 1024 299 5.73 8.97 23.83
358 tf_inception_v3 2255.73 453.943 1024 299 5.73 8.97 23.83
359 densenet169 2232.91 458.582 1024 224 3.4 7.3 14.15
360 tf_efficientnetv2_b3 2223.64 460.493 1024 300 3.04 15.74 14.36
361 nf_ecaresnet50 2211.52 463.019 1024 224 4.21 11.13 25.56
362 nf_seresnet50 2207.21 463.921 1024 224 4.21 11.13 28.09
363 skresnet50 2206.75 464.017 1024 224 4.11 12.5 25.8
364 edgenext_small 2206.31 464.109 1024 320 1.97 14.16 5.59
365 seresnext50_32x4d 2197.09 466.058 1024 224 4.26 14.42 27.56
366 gluon_seresnext50_32x4d 2196.94 466.091 1024 224 4.26 14.42 27.56
367 xcit_small_12_p16_224_dist 2195.81 466.33 1024 224 4.82 12.58 26.25
368 legacy_seresnext50_32x4d 2193.34 466.856 1024 224 4.26 14.42 27.56
369 xcit_small_12_p16_224 2190.16 467.534 1024 224 4.82 12.58 26.25
370 repvgg_b1g4 2188.83 467.817 1024 224 8.15 10.64 39.97
371 tf_efficientnet_lite3 2188.37 233.953 512 300 1.65 21.85 8.2
372 efficientnetv2_rw_t 2170.03 471.87 1024 288 3.19 16.42 13.65
373 gmlp_s16_224 2164.56 473.061 1024 224 4.42 15.1 19.42
374 dla60_res2next 2126.26 481.583 1024 224 3.49 13.17 17.03
375 gc_efficientnetv2_rw_t 2126.09 481.621 1024 288 3.2 16.45 13.68
376 skresnet50d 2112.57 484.703 1024 224 4.36 13.31 25.82
377 mobilevitv2_150 2105.0 243.219 512 256 4.09 24.11 10.59
378 mobilevitv2_150_in22ft1k 2104.51 243.274 512 256 4.09 24.11 10.59
379 convnextv2_pico 2092.16 489.434 1024 288 2.27 10.08 9.07
380 poolformer_s24 2090.38 489.851 1024 224 3.41 10.68 21.39
381 cs3sedarknet_xdw 2090.04 489.929 1024 256 5.97 17.18 21.6
382 res2next50 2085.23 491.055 1024 224 4.2 13.71 24.67
383 cspdarknet53 2084.51 491.231 1024 256 6.57 16.81 27.64
384 fbnetv3_g 2084.48 491.238 1024 288 1.77 21.09 16.62
385 crossvit_small_240 2074.04 493.709 1024 240 5.63 18.17 26.86
386 deit3_medium_patch16_224_in21ft1k 2064.27 496.046 1024 224 8.0 15.93 38.85
387 deit3_medium_patch16_224 2063.34 496.268 1024 224 8.0 15.93 38.85
388 xcit_nano_12_p8_224_dist 2049.01 499.742 1024 224 2.16 15.71 3.05
389 xcit_nano_12_p8_224 2044.48 500.848 1024 224 2.16 15.71 3.05
390 nf_regnet_b3 2035.39 503.085 1024 320 2.05 14.61 18.59
391 cs3darknet_focus_x 2017.73 507.488 1024 256 8.03 10.69 35.02
392 vit_relpos_medium_patch16_cls_224 2000.38 511.89 1024 224 8.03 18.24 38.76
393 lambda_resnet50ts 1991.21 514.246 1024 256 5.07 17.48 21.54
394 swin_tiny_patch4_window7_224 1978.72 517.495 1024 224 4.51 17.06 28.29
395 sebotnet33ts_256 1959.75 195.932 384 256 3.89 17.46 13.7
396 coatnet_0_rw_224 1957.32 523.148 1024 224 4.43 18.73 27.44
397 ecaresnet26t 1953.32 524.224 1024 320 5.24 16.44 16.01
398 regnetx_080 1942.5 527.144 1024 224 8.02 14.06 39.57
399 gcvit_xtiny 1941.57 527.393 1024 224 2.93 20.26 19.98
400 resnetv2_101 1925.46 531.806 1024 224 7.83 16.23 44.54
401 regnetx_064 1920.06 533.303 1024 224 6.49 16.37 26.21
402 mixnet_xl 1918.85 533.64 1024 224 0.93 14.57 11.9
403 edgenext_small_rw 1912.9 535.3 1024 320 2.46 14.85 7.83
404 vit_relpos_medium_patch16_224 1907.96 536.687 1024 224 7.97 17.02 38.75
405 vit_srelpos_medium_patch16_224 1900.57 538.773 1024 224 7.96 16.21 38.74
406 resnest50d 1896.74 539.858 1024 224 5.4 14.36 27.48
407 crossvit_15_240 1894.86 540.397 1024 240 5.81 19.77 27.53
408 vit_base_resnet50d_224 1892.78 540.989 1024 224 8.73 16.92 110.97
409 gluon_resnet101_v1b 1879.26 544.883 1024 224 7.83 16.23 44.55
410 tv_resnet101 1878.26 545.172 1024 224 7.83 16.23 44.55
411 resnet101 1875.25 546.047 1024 224 7.83 16.23 44.55
412 dla102 1873.79 546.472 1024 224 7.19 14.18 33.27
413 efficientformer_l3 1868.08 548.142 1024 224 3.93 12.01 31.41
414 maxvit_rmlp_pico_rw_256 1866.73 411.402 768 256 1.85 24.86 7.52
415 resnetv2_101d 1855.94 551.727 1024 224 8.07 17.04 44.56
416 pvt_v2_b2 1835.92 557.745 1024 224 4.05 27.53 25.36
417 maxvit_pico_rw_256 1829.44 419.787 768 256 1.83 22.3 7.46
418 vgg13 1820.36 562.512 1024 224 11.31 12.25 133.05
419 lamhalobotnet50ts_256 1818.57 563.067 1024 256 5.02 18.44 22.57
420 crossvit_15_dagger_240 1817.96 563.255 1024 240 6.13 20.43 28.21
421 gluon_resnet101_v1c 1816.14 563.82 1024 224 8.08 17.04 44.57
422 res2net50_26w_6s 1811.81 565.168 1024 224 6.33 15.28 37.05
423 gluon_resnet101_v1d 1808.21 566.295 1024 224 8.08 17.04 44.57
424 swin_s3_tiny_224 1803.67 567.72 1024 224 4.64 19.13 28.33
425 coatnet_rmlp_0_rw_224 1803.63 567.733 1024 224 4.72 24.89 27.45
426 vit_relpos_medium_patch16_rpn_224 1770.72 578.284 1024 224 7.97 17.02 38.73
427 halonet50ts 1765.73 579.917 1024 256 5.3 19.2 22.73
428 repvgg_b1 1760.92 581.5 1024 224 13.16 10.64 57.42
429 coatnet_bn_0_rw_224 1753.99 583.799 1024 224 4.67 22.04 27.44
430 wide_resnet50_2 1747.87 585.844 1024 224 11.43 14.4 68.88
431 efficientnet_b3 1741.21 294.036 512 320 2.01 26.52 12.23
432 efficientnet_b3a 1740.84 294.1 512 320 2.01 26.52 12.23
433 densenet201 1738.22 589.096 1024 224 4.34 7.85 20.01
434 coatnet_0_224 1727.45 296.376 512 224 4.58 24.01 25.04
435 darknetaa53 1721.33 594.876 1024 288 10.08 15.68 36.02
436 tf_efficientnet_b3 1720.61 297.558 512 300 1.87 23.83 12.23
437 cait_xxs24_224 1720.1 595.301 1024 224 2.53 20.29 11.96
438 vit_large_patch32_224 1718.53 595.845 1024 224 15.41 13.32 327.9
439 mobilevitv2_175 1697.71 301.572 512 256 5.54 28.13 14.25
440 mobilevitv2_175_in22ft1k 1697.51 301.606 512 256 5.54 28.13 14.25
441 xcit_tiny_12_p16_384_dist 1694.92 604.145 1024 384 3.64 18.26 6.72
442 pvt_v2_b2_li 1694.45 604.311 1024 224 3.91 27.6 22.55
443 coat_lite_small 1694.41 604.328 1024 224 3.96 22.09 19.84
444 resnetaa101d 1692.59 604.976 1024 224 9.12 17.56 44.57
445 legacy_seresnet101 1686.93 607.005 1024 224 7.61 15.74 49.33
446 tresnet_v2_l 1685.52 607.515 1024 224 8.81 16.34 46.17
447 hrnet_w18 1679.12 609.832 1024 224 4.32 16.31 21.3
448 vit_medium_patch16_gap_240 1667.0 614.264 1024 240 9.22 18.81 44.4
449 vit_tiny_patch16_384 1660.88 616.528 1024 384 4.7 25.39 5.79
450 regnetv_040 1659.81 616.926 1024 288 6.6 20.3 20.64
451 convnext_tiny_hnf 1659.73 616.951 1024 288 7.39 22.21 28.59
452 seresnet101 1655.13 618.666 1024 224 7.84 16.27 49.33
453 vit_base_patch32_384 1651.29 620.109 1024 384 13.06 16.5 88.3
454 vit_base_patch32_clip_384 1649.72 620.7 1024 384 13.06 16.5 88.3
455 regnety_040 1647.66 621.47 1024 288 6.61 20.3 20.65
456 regnety_032 1645.25 622.383 1024 288 5.29 18.61 19.44
457 gluon_resnet101_v1s 1642.29 623.505 1024 224 9.19 18.64 44.67
458 vgg13_bn 1634.19 626.596 1024 224 11.33 12.25 133.05
459 resnetaa50 1631.05 627.803 1024 288 8.52 19.24 25.56
460 mixer_b16_224_miil 1628.71 628.706 1024 224 12.62 14.53 59.88
461 mixer_b16_224 1627.79 629.061 1024 224 12.62 14.53 59.88
462 convnext_tiny 1626.95 629.384 1024 288 7.39 22.21 28.59
463 nf_resnet101 1620.77 631.785 1024 224 8.01 16.23 44.55
464 swinv2_cr_tiny_224 1618.15 632.807 1024 224 4.66 28.45 28.33
465 ecaresnet101d 1609.33 636.276 1024 224 8.08 17.07 44.57
466 twins_pcpvt_base 1605.41 637.831 1024 224 6.68 25.25 43.83
467 dla102x 1601.78 639.274 1024 224 5.89 19.42 26.31
468 ese_vovnet39b_evos 1601.47 639.4 1024 224 7.07 6.74 24.58
469 darknet53 1597.03 641.177 1024 288 11.78 15.68 41.61
470 resnetblur101d 1596.24 641.494 1024 224 9.12 17.94 44.57
471 resnet51q 1592.08 643.172 1024 288 8.07 20.94 35.7
472 swinv2_cr_tiny_ns_224 1591.39 643.448 1024 224 4.66 28.45 28.33
473 mixer_l32_224 1583.03 646.85 1024 224 11.27 19.86 206.94
474 resmlp_36_distilled_224 1577.86 648.967 1024 224 8.91 16.33 44.69
475 resmlp_36_224 1577.4 649.158 1024 224 8.91 16.33 44.69
476 resnetv2_50d_gn 1561.87 655.61 1024 288 7.24 19.7 25.57
477 botnet50ts_256 1556.81 246.643 384 256 5.54 22.23 22.74
478 nf_resnet50 1548.83 661.132 1024 288 6.88 18.37 25.56
479 resnetv2_50d_frn 1547.35 661.764 1024 224 4.33 11.92 25.59
480 halo2botnet50ts_256 1546.64 496.545 768 256 5.02 21.78 22.64
481 mvitv2_tiny 1534.63 667.247 1024 224 4.7 21.16 24.17
482 gluon_resnext101_32x4d 1505.04 680.366 1024 224 8.01 21.23 44.18
483 swsl_resnext101_32x4d 1504.46 680.63 1024 224 8.01 21.23 44.18
484 cs3darknet_x 1504.38 680.665 1024 288 10.6 14.36 35.05
485 ssl_resnext101_32x4d 1503.93 680.869 1024 224 8.01 21.23 44.18
486 resnext101_32x4d 1503.63 681.005 1024 224 8.01 21.23 44.18
487 resnest50d_4s2x40d 1497.58 683.755 1024 224 4.4 17.94 30.42
488 convnextv2_nano 1488.75 515.858 768 288 4.06 13.84 15.62
489 skresnext50_32x4d 1478.83 692.427 1024 224 4.5 17.18 27.48
490 mobilevitv2_200 1478.44 519.454 768 256 7.22 32.15 18.45
491 tresnet_l 1477.44 693.076 1024 224 10.88 11.9 55.99
492 mobilevitv2_200_in22ft1k 1477.37 519.83 768 256 7.22 32.15 18.45
493 vgg16 1475.59 693.946 1024 224 15.47 13.56 138.36
494 regnetz_c16 1475.58 693.953 1024 320 3.92 25.88 13.46
495 resnetv2_50d_evob 1468.61 697.244 1024 224 4.33 11.92 25.59
496 vit_medium_patch16_gap_256 1467.03 697.996 1024 256 10.59 22.15 38.86
497 res2net50_26w_8s 1466.52 698.239 1024 224 8.37 17.95 48.4
498 sequencer2d_s 1465.84 698.562 1024 224 4.96 11.31 27.65
499 eca_nfnet_l0 1461.61 700.586 1024 288 7.12 17.29 24.14
500 nfnet_l0 1460.27 701.228 1024 288 7.13 17.29 35.07
501 cs3sedarknet_x 1435.72 713.217 1024 288 10.6 14.37 35.4
502 resnet61q 1434.01 714.068 1024 288 9.87 21.52 36.85
503 res2net101_26w_4s 1424.71 718.728 1024 224 8.1 18.45 45.21
504 repvgg_b2g4 1415.15 723.581 1024 224 12.63 12.9 61.76
505 nest_tiny 1413.2 543.434 768 224 5.83 25.48 17.06
506 poolformer_s36 1408.65 726.922 1024 224 5.0 15.82 30.86
507 maxvit_rmlp_nano_rw_256 1404.06 546.971 768 256 4.47 31.92 15.5
508 convit_small 1397.72 732.608 1024 224 5.76 17.87 27.78
509 jx_nest_tiny 1387.89 553.347 768 224 5.83 25.48 17.06
510 maxvit_nano_rw_256 1378.18 557.246 768 256 4.46 30.28 15.45
511 nf_ecaresnet101 1373.28 745.649 1024 224 8.01 16.27 44.55
512 nf_seresnet101 1369.04 747.958 1024 224 8.02 16.27 49.33
513 gluon_seresnext101_32x4d 1358.35 753.84 1024 224 8.02 21.26 48.96
514 legacy_seresnext101_32x4d 1357.27 754.442 1024 224 8.02 21.26 48.96
515 efficientnet_b3_gn 1357.0 282.964 384 320 2.14 28.83 11.73
516 nfnet_f0 1356.65 754.786 1024 256 12.62 18.05 71.49
517 seresnext101_32x4d 1356.0 755.148 1024 224 8.02 21.26 48.96
518 resnetv2_152 1353.28 756.668 1024 224 11.55 22.56 60.19
519 xception 1353.17 567.542 768 299 8.4 35.83 22.86
520 twins_svt_base 1350.54 758.199 1024 224 8.59 26.33 56.07
521 crossvit_18_240 1343.82 761.996 1024 240 9.05 26.26 43.27
522 ese_vovnet99b_iabn 1343.72 762.049 1024 224 16.49 11.27 63.2
523 maxxvit_rmlp_nano_rw_256 1341.45 763.341 1024 256 4.37 26.05 16.78
524 regnetx_120 1339.05 764.708 1024 224 12.13 21.37 46.11
525 vgg16_bn 1336.79 765.998 1024 224 15.5 13.56 138.37
526 dpn92 1330.6 769.562 1024 224 6.54 18.21 37.67
527 tv_resnet152 1329.75 770.054 1024 224 11.56 22.56 60.19
528 gcvit_tiny 1328.61 770.718 1024 224 4.79 29.82 28.22
529 gluon_resnet152_v1b 1328.2 770.954 1024 224 11.56 22.56 60.19
530 resnet152 1327.13 771.578 1024 224 11.56 22.56 60.19
531 ese_vovnet99b 1316.93 777.554 1024 224 16.51 11.27 63.2
532 pvt_v2_b3 1316.31 777.917 1024 224 6.92 37.7 45.24
533 xcit_tiny_12_p8_224_dist 1300.55 787.348 1024 224 4.81 23.6 6.71
534 xcit_tiny_12_p8_224 1299.96 787.704 1024 224 4.81 23.6 6.71
535 crossvit_18_dagger_240 1298.96 788.312 1024 240 9.5 27.03 44.27
536 hrnet_w32 1297.82 789.002 1024 224 8.97 22.02 41.23
537 gluon_resnet152_v1c 1296.47 789.825 1024 224 11.8 23.36 60.21
538 resnetv2_152d 1296.37 789.881 1024 224 11.8 23.36 60.2
539 gluon_resnet152_v1d 1293.21 791.811 1024 224 11.8 23.36 60.21
540 vit_small_resnet50d_s16_224 1288.35 794.801 1024 224 13.48 24.82 57.53
541 cs3edgenet_x 1281.15 799.266 1024 288 14.59 16.36 47.82
542 edgenext_base 1272.74 804.548 1024 320 6.01 24.32 18.51
543 regnety_120 1268.38 807.318 1024 224 12.14 21.38 51.82
544 dla169 1258.34 813.753 1024 224 11.6 20.2 53.39
545 hrnet_w30 1252.2 817.74 1024 224 8.15 21.21 37.71
546 xception41p 1249.06 409.896 512 299 9.25 39.86 26.91
547 maxxvitv2_nano_rw_256 1248.81 819.967 1024 256 6.26 23.05 23.7
548 ecaresnet50t 1243.91 823.198 1024 320 8.82 24.13 25.57
549 vgg19 1237.03 827.774 1024 224 19.63 14.86 143.67
550 swin_small_patch4_window7_224 1228.67 833.406 1024 224 8.77 27.47 49.61
551 efficientnet_el_pruned 1220.93 838.69 1024 300 8.0 30.7 10.59
552 densenet161 1220.41 839.05 1024 224 7.79 11.06 28.68
553 efficientnet_el 1218.76 840.187 1024 300 8.0 30.7 10.59
554 deit_base_distilled_patch16_224 1211.4 845.292 1024 224 17.68 24.05 87.34
555 vit_base_patch16_224 1209.0 846.969 1024 224 17.58 23.9 86.57
556 vit_base_patch16_224_miil 1208.72 847.163 1024 224 17.59 23.91 94.4
557 deit_base_patch16_224 1208.56 847.275 1024 224 17.58 23.9 86.57
558 vit_base_patch16_clip_224 1205.77 849.236 1024 224 17.58 23.9 86.57
559 gluon_resnet152_v1s 1205.41 849.488 1024 224 12.92 24.96 60.32
560 coatnet_rmlp_1_rw_224 1201.89 851.979 1024 224 7.85 35.47 41.69
561 maxvit_tiny_rw_224 1200.3 853.107 1024 224 5.11 33.11 29.06
562 mixnet_xxl 1193.04 643.721 768 224 2.04 23.43 23.96
563 tf_efficientnet_el 1192.11 858.967 1024 300 8.0 30.7 10.59
564 swinv2_tiny_window8_256 1191.01 859.761 1024 256 5.96 24.57 28.35
565 volo_d1_224 1190.57 860.079 1024 224 6.94 24.43 26.63
566 repvgg_b2 1183.91 864.916 1024 224 20.45 12.9 89.02
567 legacy_seresnet152 1181.09 866.978 1024 224 11.33 22.08 66.82
568 xcit_small_24_p16_224_dist 1175.31 871.245 1024 224 9.1 23.64 47.67
569 xcit_small_24_p16_224 1174.76 871.656 1024 224 9.1 23.64 47.67
570 inception_v4 1168.76 876.127 1024 299 12.28 15.09 42.68
571 seresnet152 1166.02 878.19 1024 224 11.57 22.61 66.82
572 twins_pcpvt_large 1163.18 880.331 1024 224 9.84 35.82 60.99
573 deit3_base_patch16_224 1159.4 883.201 1024 224 17.58 23.9 86.59
574 deit3_base_patch16_224_in21ft1k 1159.14 883.404 1024 224 17.58 23.9 86.59
575 cait_xxs36_224 1156.4 885.493 1024 224 3.77 30.34 17.3
576 vit_base_patch32_clip_448 1154.9 886.645 1024 448 17.93 23.9 88.34
577 regnetx_160 1153.07 888.048 1024 224 15.99 25.52 54.28
578 dm_nfnet_f0 1152.75 888.293 1024 256 12.62 18.05 71.49
579 sequencer2d_m 1147.71 892.201 1024 224 6.55 14.26 38.31
580 repvgg_b3g4 1145.87 893.631 1024 224 17.89 15.1 83.83
581 mvitv2_small_cls 1144.7 894.542 1024 224 7.04 28.17 34.87
582 mvitv2_small 1143.83 895.224 1024 224 7.0 28.08 34.87
583 efficientnet_lite4 1139.64 336.935 384 380 4.04 45.66 13.01
584 tnt_s_patch16_224 1135.12 902.091 1024 224 5.24 24.37 23.76
585 convmixer_1024_20_ks9_p14 1130.85 905.497 1024 224 5.55 5.51 24.38
586 vgg19_bn 1127.16 908.464 1024 224 19.66 14.86 143.68
587 vit_relpos_base_patch16_clsgap_224 1124.58 910.547 1024 224 17.6 25.12 86.43
588 vit_relpos_base_patch16_cls_224 1122.76 912.026 1024 224 17.6 25.12 86.43
589 coatnet_rmlp_1_rw2_224 1119.61 914.591 1024 224 8.11 40.13 41.72
590 beit_base_patch16_224 1109.32 923.073 1024 224 17.58 23.9 86.53
591 xception41 1107.6 462.251 512 299 9.28 39.86 26.97
592 tresnet_xl 1106.51 925.423 1024 224 15.17 15.34 78.44
593 beitv2_base_patch16_224 1106.05 925.798 1024 224 17.58 23.9 86.53
594 coat_tiny 1099.16 931.604 1024 224 4.35 27.2 5.5
595 vit_base_patch16_gap_224 1085.51 943.323 1024 224 17.49 25.59 86.57
596 maxvit_tiny_tf_224 1081.57 710.062 768 224 5.6 35.78 30.92
597 vit_relpos_base_patch16_224 1078.21 949.713 1024 224 17.51 24.97 86.43
598 nf_regnet_b4 1075.82 951.823 1024 384 4.7 28.61 30.21
599 coatnet_1_rw_224 1074.48 953.005 1024 224 8.04 34.6 41.72
600 dla102x2 1070.83 956.252 1024 224 9.34 29.91 41.28
601 pit_b_224 1066.8 479.928 512 224 12.42 32.94 73.76
602 pit_b_distilled_224 1063.31 481.504 512 224 12.5 33.07 74.79
603 tf_efficientnet_lite4 1058.68 362.703 384 380 4.04 45.66 13.01
604 efficientnetv2_s 1057.28 968.508 1024 384 8.44 35.77 21.46
605 vit_large_r50_s32_224 1034.79 989.556 1024 224 19.58 24.41 328.99
606 vit_small_patch16_36x1_224 1032.1 992.142 1024 224 13.71 35.69 64.67
607 efficientnet_b3_g8_gn 1031.26 496.465 512 320 3.2 28.83 14.25
608 tf_efficientnetv2_s 1029.13 995.002 1024 384 8.44 35.77 21.46
609 flexivit_base 1028.55 995.558 1024 240 20.29 28.36 86.59
610 vit_base_patch16_rpn_224 1016.66 1007.208 1024 224 17.49 23.75 86.54
611 vit_small_r26_s32_384 1011.11 1012.73 1024 384 10.43 29.85 36.47
612 vit_small_patch16_18x2_224 1005.34 1018.547 1024 224 13.71 35.69 64.67
613 swinv2_cr_small_224 1000.71 1023.259 1024 224 9.07 50.27 49.7
614 efficientnetv2_rw_s 995.91 1028.19 1024 384 8.72 38.03 23.94
615 wide_resnet101_2 995.32 1028.801 1024 224 22.8 21.23 126.89
616 swinv2_cr_small_ns_224 989.25 1035.114 1024 224 9.08 50.27 49.7
617 vit_relpos_base_patch16_rpn_224 986.84 1037.641 1024 224 17.51 24.97 86.41
618 coatnet_1_224 984.69 519.944 512 224 8.7 39.0 42.23
619 resnet200 983.36 1041.314 1024 224 15.07 32.19 64.67
620 dpn98 982.09 1042.657 1024 224 11.73 25.2 61.57
621 convnext_small 981.97 1042.782 1024 288 14.39 35.65 50.22
622 cs3se_edgenet_x 975.89 1049.279 1024 320 18.01 20.21 50.72
623 regnety_080 969.67 1056.01 1024 288 13.22 29.69 39.18
624 poolformer_m36 966.97 1058.965 1024 224 8.8 22.02 56.17
625 resnest101e 963.69 1062.57 1024 256 13.38 28.66 48.28
626 regnetz_b16_evos 955.65 803.632 768 288 2.36 16.43 9.74
627 twins_svt_large 954.95 1072.291 1024 224 15.15 35.1 99.27
628 pvt_v2_b4 952.02 1075.594 1024 224 10.14 53.74 62.56
629 gluon_resnext101_64x4d 944.48 1084.183 1024 224 15.52 31.21 83.46
630 regnetv_064 944.32 1084.367 1024 288 10.55 27.11 30.58
631 regnety_064 944.18 1084.526 1024 288 10.56 27.11 30.58
632 maxvit_rmlp_tiny_rw_256 941.64 815.588 768 256 6.77 46.92 29.15
633 regnetz_d8 936.16 1093.814 1024 320 6.19 37.08 23.37
634 resnetrs101 936.12 1093.858 1024 288 13.56 28.53 63.62
635 regnetz_d32 933.58 1096.833 1024 320 9.33 37.08 27.58
636 ig_resnext101_32x8d 930.9 1099.997 1024 224 16.48 31.21 88.79
637 swsl_resnext101_32x8d 930.28 1100.725 1024 224 16.48 31.21 88.79
638 resnext101_32x8d 929.98 1101.084 1024 224 16.48 31.21 88.79
639 ssl_resnext101_32x8d 929.0 1102.24 1024 224 16.48 31.21 88.79
640 convnextv2_tiny 925.13 553.423 512 288 7.39 22.21 28.64
641 convnextv2_small 924.53 1107.57 1024 224 8.71 21.56 50.32
642 maxvit_tiny_rw_256 921.72 833.209 768 256 6.74 44.35 29.07
643 inception_resnet_v2 917.69 1115.834 1024 299 13.18 25.06 55.84
644 ens_adv_inception_resnet_v2 917.66 1115.871 1024 299 13.18 25.06 55.84
645 maxxvit_rmlp_tiny_rw_256 914.74 1119.428 1024 256 6.66 39.76 29.64
646 xcit_tiny_24_p16_384_dist 912.61 1122.045 1024 384 6.87 34.29 12.12
647 cait_s24_224 908.65 1126.929 1024 224 9.35 40.58 46.92
648 pvt_v2_b5 904.89 1131.615 1024 224 11.76 50.92 81.96
649 nest_small 902.63 850.834 768 224 10.35 40.04 38.35
650 repvgg_b3 901.73 1135.583 1024 224 29.16 15.1 123.09
651 maxvit_tiny_pm_256 896.67 1141.994 1024 256 6.61 47.9 30.09
652 xception65p 896.53 571.079 512 299 13.91 52.48 39.82
653 swin_s3_small_224 896.35 856.792 768 224 9.43 37.84 49.74
654 jx_nest_small 892.32 860.663 768 224 10.35 40.04 38.35
655 efficientnet_b4 890.89 431.018 384 384 4.51 50.04 19.34
656 gmlp_b16_224 885.75 1156.072 1024 224 15.78 30.21 73.08
657 gluon_seresnext101_64x4d 885.23 1156.747 1024 224 15.53 31.25 88.23
658 hrnet_w40 881.9 1161.12 1024 224 12.75 25.29 57.56
659 efficientformer_l7 877.43 1167.027 1024 224 10.17 24.45 82.23
660 coat_mini 874.29 1171.227 1024 224 6.82 33.68 10.34
661 resnet101d 871.81 1174.559 1024 320 16.48 34.77 44.57
662 swin_base_patch4_window7_224 870.1 1176.867 1024 224 15.47 36.63 87.77
663 regnetz_040 868.17 884.605 768 320 6.35 37.78 27.12
664 regnetz_040h 862.76 890.151 768 320 6.43 37.94 28.94
665 mobilevitv2_150_384_in22ft1k 848.7 301.627 256 384 9.2 54.25 10.59
666 resnetv2_50d_evos 844.34 909.573 768 288 7.15 19.7 25.59
667 tf_efficientnet_b4 838.16 458.136 384 380 4.49 49.49 19.34
668 crossvit_base_240 835.31 919.411 768 240 21.22 36.33 105.03
669 vit_base_r50_s16_224 821.15 1247.01 1024 224 21.67 35.31 114.69
670 xcit_medium_24_p16_224_dist 819.59 1249.397 1024 224 16.13 31.71 84.4
671 xcit_medium_24_p16_224 818.73 1250.697 1024 224 16.13 31.71 84.4
672 gcvit_small 807.46 1268.151 1024 224 8.57 41.61 51.09
673 gluon_xception65 806.21 635.055 512 299 13.96 52.48 39.92
674 xception65 800.01 639.983 512 299 13.96 52.48 39.92
675 mvitv2_base 799.31 1281.092 1024 224 10.16 40.5 51.47
676 hrnet_w44 789.29 1297.348 1024 224 14.94 26.92 67.06
677 vit_base_patch16_plus_240 780.68 1311.665 1024 240 27.41 33.08 117.56
678 hrnet_w48 780.39 1312.147 1024 224 17.34 28.56 77.47
679 swinv2_tiny_window16_256 778.19 657.926 512 256 6.68 39.02 28.35
680 tresnet_m_448 775.99 1319.596 1024 448 22.94 29.21 31.39
681 xcit_small_12_p16_384_dist 760.88 1345.804 1024 384 14.14 36.51 26.25
682 vit_small_patch16_384 750.95 1022.685 768 384 15.52 50.78 22.2
683 maxvit_rmlp_small_rw_224 745.49 1373.585 1024 224 10.75 49.3 64.9
684 sequencer2d_l 742.48 1379.149 1024 224 9.74 22.12 54.3
685 swinv2_small_window8_256 738.39 1386.788 1024 256 11.58 40.14 49.73
686 swin_s3_base_224 730.45 1401.854 1024 224 13.69 48.26 71.13
687 poolformer_m48 729.44 1403.808 1024 224 11.59 29.17 73.47
688 densenet264d_iabn 727.43 1407.671 1024 224 13.47 14.0 72.74
689 vit_relpos_base_patch16_plus_240 723.43 1415.468 1024 240 27.3 34.33 117.38
690 dpn131 722.72 1416.854 1024 224 16.09 32.97 79.25
691 tnt_b_patch16_224 722.12 1418.026 1024 224 14.09 39.01 65.41
692 deit3_small_patch16_384 717.36 1070.572 768 384 15.52 50.78 22.21
693 deit3_small_patch16_384_in21ft1k 716.76 1071.477 768 384 15.52 50.78 22.21
694 swinv2_cr_base_224 715.64 1430.874 1024 224 15.86 59.66 87.88
695 eca_nfnet_l1 713.15 1435.867 1024 320 14.92 34.42 41.41
696 coatnet_2_rw_224 709.88 721.237 512 224 15.09 49.22 73.87
697 swinv2_cr_base_ns_224 709.69 1442.871 1024 224 15.86 59.66 87.88
698 coatnet_rmlp_2_rw_224 708.85 722.285 512 224 15.18 54.78 73.88
699 convit_base 706.65 1449.076 1024 224 17.52 31.77 86.54
700 mobilevitv2_175_384_in22ft1k 703.41 363.928 256 384 12.47 63.29 14.25
701 maxvit_small_tf_224 701.58 729.767 512 224 11.66 53.17 68.93
702 densenet264 701.03 1460.686 1024 224 12.95 12.8 72.69
703 ecaresnet200d 694.19 1475.094 1024 256 20.0 43.15 64.69
704 resnetv2_50x1_bitm 691.29 740.624 512 448 16.62 44.46 25.55
705 seresnet200d 691.25 1481.355 1024 256 20.01 43.15 71.86
706 xcit_tiny_24_p8_224 684.73 1495.467 1024 224 9.21 45.39 12.11
707 xcit_tiny_24_p8_224_dist 684.22 1496.573 1024 224 9.21 45.39 12.11
708 convnext_base 682.42 1500.518 1024 288 25.43 47.53 88.59
709 volo_d2_224 663.51 1543.3 1024 224 14.34 41.34 58.68
710 coatnet_2_224 660.84 581.062 384 224 16.5 52.67 74.68
711 legacy_senet154 654.15 1565.387 1024 224 20.77 38.69 115.09
712 gluon_senet154 654.04 1565.641 1024 224 20.77 38.69 115.09
713 senet154 653.94 1565.866 1024 224 20.77 38.69 115.09
714 xcit_nano_12_p8_384_dist 646.53 1583.823 1024 384 6.34 46.08 3.05
715 dpn107 646.38 1584.202 1024 224 18.38 33.46 86.92
716 nest_base 640.55 799.298 512 224 17.96 53.39 67.72
717 jx_nest_base 633.53 808.151 512 224 17.96 53.39 67.72
718 mobilevitv2_200_384_in22ft1k 626.31 408.731 256 384 16.24 72.34 18.45
719 xception71 619.72 826.163 512 299 18.09 69.92 42.34
720 hrnet_w64 618.15 1656.539 1024 224 28.97 35.09 128.06
721 resnet152d 618.09 1656.699 1024 320 24.08 47.67 60.21
722 regnetz_c16_evos 604.19 847.399 512 320 3.86 25.88 13.49
723 gcvit_base 594.61 1722.135 1024 224 14.87 55.48 90.32
724 regnety_160 594.3 1292.258 768 288 26.37 38.07 83.59
725 maxxvit_rmlp_small_rw_256 588.15 1741.023 1024 256 14.67 58.38 66.01
726 xcit_small_12_p8_224 582.04 1759.324 1024 224 18.69 47.21 26.21
727 xcit_small_12_p8_224_dist 581.74 1760.224 1024 224 18.69 47.21 26.21
728 maxvit_rmlp_small_rw_256 575.72 1333.976 768 256 14.15 66.09 64.9
729 regnetx_320 551.07 1393.631 768 224 31.81 36.3 107.81
730 seresnet152d 547.51 1870.27 1024 320 24.09 47.72 66.84
731 resnetrs152 544.33 1881.196 1024 320 24.34 48.14 86.62
732 vit_large_patch32_384 543.23 1884.997 1024 384 45.31 43.86 306.63
733 halonet_h1 540.47 473.65 256 256 3.0 51.17 8.1
734 seresnet269d 540.42 1894.818 1024 256 26.59 53.6 113.67
735 swinv2_base_window8_256 529.22 1451.182 768 256 20.37 52.59 87.92
736 maxxvitv2_rmlp_base_rw_224 523.43 1956.308 1024 224 24.2 62.77 116.09
737 resnext101_64x4d 521.77 1962.525 1024 288 25.66 51.59 83.46
738 regnetz_e8 521.5 1472.647 768 320 15.46 63.94 57.7
739 mixer_l16_224 518.26 1975.807 1024 224 44.6 41.69 208.2
740 vit_medium_patch16_gap_384 508.63 1006.611 512 384 26.08 67.54 39.03
741 swin_large_patch4_window7_224 501.11 1532.586 768 224 34.53 54.94 196.53
742 regnety_320 490.98 2085.591 1024 224 32.34 30.26 145.05
743 swinv2_small_window16_256 487.64 1049.932 512 256 12.82 66.29 49.73
744 seresnext101_32x8d 483.23 2119.074 1024 288 27.24 51.63 93.57
745 vit_small_patch8_224 478.05 1071.009 512 224 22.44 80.84 21.67
746 ig_resnext101_32x16d 477.64 2143.862 1024 224 36.27 51.18 194.03
747 swsl_resnext101_32x16d 476.69 2148.145 1024 224 36.27 51.18 194.03
748 ssl_resnext101_32x16d 476.06 2150.954 1024 224 36.27 51.18 194.03
749 seresnext101d_32x8d 475.05 2155.547 1024 288 27.64 52.95 93.59
750 nf_regnet_b5 470.14 1089.029 512 456 11.7 61.95 49.74
751 xcit_large_24_p16_224_dist 468.86 2184.017 1024 224 35.86 47.27 189.1
752 xcit_large_24_p16_224 468.75 2184.529 1024 224 35.86 47.27 189.1
753 volo_d3_224 463.72 2208.199 1024 224 20.78 60.09 86.33
754 nfnet_f1 463.52 2209.163 1024 320 35.97 46.77 132.63
755 efficientnet_b5 460.91 555.412 256 448 9.59 93.56 30.39
756 resnet200d 453.15 2259.739 1024 320 31.25 67.33 64.69
757 efficientnetv2_m 451.89 2266.018 1024 416 18.6 67.5 54.14
758 seresnextaa101d_32x8d 447.26 2289.498 1024 288 28.51 56.44 93.59
759 efficientnetv2_rw_m 437.1 1757.005 768 416 21.49 79.62 53.24
760 swinv2_cr_large_224 422.08 1819.551 768 224 35.1 78.42 196.68
761 coatnet_rmlp_3_rw_224 421.87 910.226 384 224 33.56 79.47 165.15
762 xcit_tiny_12_p8_384_dist 421.04 2432.044 1024 384 14.13 69.14 6.71
763 swinv2_cr_tiny_384 419.77 609.847 256 384 15.34 161.01 28.33
764 maxvit_rmlp_base_rw_224 419.03 1832.808 768 224 23.15 92.64 116.14
765 resnetv2_152x2_bit_teacher 418.89 2444.553 1024 224 46.95 45.11 236.34
766 resnetv2_101x1_bitm 418.36 1223.813 512 448 31.65 64.93 44.54
767 dm_nfnet_f1 409.02 1877.643 768 320 35.97 46.77 132.63
768 xcit_small_24_p16_384_dist 407.47 2513.062 1024 384 26.72 68.58 47.67
769 coatnet_3_rw_224 404.39 633.033 256 224 33.44 73.83 181.81
770 tf_efficientnet_b5 403.59 634.298 256 456 10.46 98.86 30.39
771 convnextv2_base 402.92 1270.715 512 288 25.43 47.53 88.72
772 resnetrs200 396.11 2585.123 1024 320 31.51 67.81 93.21
773 tresnet_l_448 395.6 2588.481 1024 448 43.5 47.56 55.99
774 eva_large_patch14_196 391.22 2617.408 1024 196 61.57 63.52 304.14
775 vit_large_patch16_224 389.92 2626.132 1024 224 61.6 63.52 304.33
776 regnetz_d8_evos 389.86 1969.937 768 320 7.03 38.92 23.46
777 maxvit_base_tf_224 387.71 1320.545 512 224 24.04 95.01 119.47
778 coatnet_3_224 387.35 660.882 256 224 36.56 79.01 166.97
779 crossvit_15_dagger_408 386.57 662.227 256 408 21.45 95.05 28.5
780 vit_base_patch16_18x2_224 384.3 2664.545 1024 224 52.51 71.38 256.73
781 deit3_large_patch16_224 376.93 2716.643 1024 224 61.6 63.52 304.37
782 deit3_large_patch16_224_in21ft1k 376.54 2719.504 1024 224 61.6 63.52 304.37
783 tf_efficientnetv2_m 374.38 2051.373 768 480 24.76 89.84 54.14
784 convnext_large 371.39 1378.579 512 288 56.87 71.29 197.77
785 beitv2_large_patch16_224 360.12 2843.465 1024 224 61.6 63.52 304.43
786 beit_large_patch16_224 359.86 2845.558 1024 224 61.6 63.52 304.43
787 swinv2_base_window12to16_192to256_22kft1k 359.31 1068.705 384 256 22.02 84.71 87.92
788 swinv2_base_window16_256 359.09 1069.342 384 256 22.02 84.71 87.92
789 eca_nfnet_l2 347.1 2212.621 768 384 30.05 68.28 56.72
790 flexivit_large 333.31 3072.173 1024 240 70.99 75.39 304.36
791 vit_large_r50_s32_384 332.86 3076.333 1024 384 57.43 76.52 329.09
792 maxxvitv2_rmlp_large_rw_224 330.79 3095.576 1024 224 44.14 87.15 215.42
793 resnest200e 317.25 3227.754 1024 320 35.69 82.78 70.2
794 maxvit_tiny_tf_384 317.22 807.002 256 384 17.53 123.42 30.98
795 convmixer_768_32 309.28 3310.892 1024 224 19.55 25.95 21.11
796 deit_base_patch16_384 306.13 1254.335 384 384 55.54 101.56 86.86
797 vit_base_patch16_384 306.13 1254.349 384 384 55.54 101.56 86.86
798 vit_base_patch16_clip_384 305.56 1256.673 384 384 55.54 101.56 86.86
799 xcit_small_24_p8_224_dist 305.18 3355.41 1024 224 35.81 90.78 47.63
800 deit_base_distilled_patch16_384 304.96 1259.16 384 384 55.65 101.82 87.63
801 xcit_small_24_p8_224 304.86 3358.887 1024 224 35.81 90.78 47.63
802 nasnetalarge 300.31 1278.679 384 331 23.89 90.56 88.75
803 volo_d1_384 299.05 1712.072 512 384 22.75 108.55 26.78
804 volo_d4_224 295.86 3461.069 1024 224 44.34 80.22 192.96
805 deit3_base_patch16_384 294.03 1305.985 384 384 55.54 101.56 86.88
806 deit3_base_patch16_384_in21ft1k 293.78 1307.085 384 384 55.54 101.56 86.88
807 tresnet_xl_448 292.43 2626.294 768 448 60.65 61.31 78.44
808 pnasnet5large 285.95 1342.894 384 331 25.04 92.89 86.06
809 vit_large_patch14_224 285.66 3584.705 1024 224 81.08 88.79 304.2
810 vit_large_patch14_clip_224 285.43 3587.599 1024 224 81.08 88.79 304.2
811 crossvit_18_dagger_408 283.82 901.967 256 408 32.47 124.87 44.61
812 xcit_medium_24_p16_384_dist 282.22 3628.317 1024 384 47.39 91.64 84.4
813 cait_xxs24_384 275.38 3718.492 1024 384 9.63 122.66 12.03
814 regnety_640 271.79 2825.663 768 224 64.16 42.5 281.38
815 maxvit_large_tf_224 268.97 1427.67 384 224 43.68 127.35 211.79
816 nfnet_f2 263.0 3893.59 1024 352 63.22 79.06 193.78
817 beit_base_patch16_384 260.66 1473.146 384 384 55.54 101.56 86.74
818 swinv2_cr_small_384 258.79 989.214 256 384 29.7 298.03 49.7
819 ecaresnet269d 257.79 3972.16 1024 352 50.25 101.25 102.09
820 resnetrs270 249.11 4110.633 1024 352 51.13 105.48 129.86
821 mvitv2_large 248.64 2059.181 512 224 43.87 112.02 217.99
822 efficientnet_b6 246.42 519.432 128 528 19.4 167.39 43.04
823 convnext_xlarge 241.35 2121.412 512 288 100.8 95.05 350.2
824 convnextv2_large 238.64 1072.708 256 288 56.87 71.29 197.96
825 tf_efficientnet_b6 236.4 541.434 128 528 19.4 167.39 43.04
826 swin_base_patch4_window12_384 235.04 816.885 192 384 47.19 134.78 87.9
827 dm_nfnet_f2 234.34 3277.279 768 352 63.22 79.06 193.78
828 coatnet_4_224 228.52 1120.23 256 224 62.48 129.26 275.43
829 vit_base_r50_s16_384 227.31 1689.303 384 384 67.43 135.03 98.95
830 efficientnetv2_l 221.97 2306.653 512 480 56.4 157.99 118.52
831 xcit_tiny_24_p8_384_dist 221.23 4628.611 1024 384 27.05 132.95 12.11
832 ig_resnext101_32x32d 220.61 2320.857 512 224 87.29 91.12 468.53
833 swinv2_large_window12to16_192to256_22kft1k 219.46 1166.485 256 256 47.81 121.53 196.74
834 tf_efficientnetv2_l 219.35 2334.183 512 480 56.4 157.99 118.52
835 resmlp_big_24_224 214.31 4778.166 1024 224 100.23 87.31 129.14
836 resmlp_big_24_224_in22ft1k 214.13 4782.043 1024 224 100.23 87.31 129.14
837 resmlp_big_24_distilled_224 214.04 4784.169 1024 224 100.23 87.31 129.14
838 xcit_medium_24_p8_224_dist 210.1 4873.763 1024 224 63.53 121.23 84.32
839 xcit_medium_24_p8_224 210.01 4875.864 1024 224 63.53 121.23 84.32
840 maxvit_small_tf_384 208.79 919.556 192 384 35.87 183.65 69.02
841 vit_base_patch8_224 199.59 1282.637 256 224 78.22 161.69 86.58
842 eca_nfnet_l3 199.58 2565.434 512 448 52.55 118.4 72.04
843 volo_d5_224 196.25 5217.924 1024 224 72.4 118.11 295.46
844 xcit_small_12_p8_384_dist 194.27 2635.521 512 384 54.92 138.29 26.21
845 cait_xs24_384 192.73 3984.863 768 384 19.28 183.98 26.67
846 swinv2_cr_base_384 184.92 1384.392 256 384 50.57 333.68 87.88
847 cait_xxs36_384 184.35 5554.56 1024 384 14.35 183.7 17.37
848 swinv2_cr_huge_224 183.61 2091.395 384 224 115.97 121.08 657.83
849 convnext_xxlarge 183.01 2098.268 384 224 151.66 95.29 846.47
850 coatnet_rmlp_2_rw_384 178.88 715.532 128 384 47.69 209.43 73.88
851 convmixer_1536_20 173.51 5901.752 1024 224 48.68 33.03 51.63
852 volo_d2_384 168.46 1519.603 256 384 46.17 184.51 58.87
853 resnetrs350 168.28 6085.136 1024 384 77.59 154.74 163.96
854 xcit_large_24_p16_384_dist 160.71 4778.847 768 384 105.35 137.17 189.1
855 resnetv2_152x2_bit_teacher_384 159.55 1604.488 256 384 136.16 132.56 236.34
856 maxvit_xlarge_tf_224 155.79 1643.178 256 224 97.49 191.02 474.95
857 maxvit_tiny_tf_512 155.64 822.373 128 512 33.49 257.59 31.05
858 regnety_1280 155.18 2474.502 384 224 127.66 71.58 644.81
859 vit_huge_patch14_224 154.03 6647.897 1024 224 167.43 139.43 658.75
860 vit_huge_patch14_clip_224 153.92 6652.944 1024 224 167.4 139.41 632.05
861 maxxvitv2_rmlp_base_rw_384 153.34 1669.502 256 384 72.98 213.74 116.09
862 efficientnetv2_xl 152.49 3357.61 512 512 93.85 247.32 208.12
863 tf_efficientnetv2_xl 151.4 2536.254 384 512 93.85 247.32 208.12
864 deit3_huge_patch14_224_in21ft1k 149.08 6868.834 1024 224 167.4 139.41 632.13
865 deit3_huge_patch14_224 149.01 6871.974 1024 224 167.4 139.41 632.13
866 cait_s24_384 148.46 3448.684 512 384 32.17 245.31 47.06
867 resnest269e 147.61 3468.584 512 416 77.69 171.98 110.93
868 nfnet_f3 147.43 3472.717 512 416 115.58 141.78 254.92
869 efficientnet_b7 142.41 674.084 96 600 38.33 289.94 66.35
870 resnetv2_50x3_bitm 138.27 1388.564 192 448 145.7 133.37 217.32
871 tf_efficientnet_b7 137.89 696.181 96 600 38.33 289.94 66.35
872 swin_large_patch4_window12_384 137.6 930.229 128 384 104.08 202.16 196.74
873 ig_resnext101_32x48d 132.29 2902.628 384 224 153.57 131.06 828.41
874 dm_nfnet_f3 127.59 4012.898 512 416 115.58 141.78 254.92
875 coatnet_5_224 125.18 1022.512 128 224 145.49 194.24 687.47
876 maxvit_rmlp_base_rw_384 121.26 2111.079 256 384 70.97 318.95 116.14
877 xcit_large_24_p8_224 119.97 6401.598 768 224 141.23 181.56 188.93
878 xcit_large_24_p8_224_dist 119.94 6403.17 768 224 141.23 181.56 188.93
879 resnetrs420 119.93 6403.598 768 416 108.45 213.79 191.89
880 resnetv2_152x2_bitm 117.33 2181.801 256 448 184.99 180.43 236.34
881 maxvit_base_tf_384 113.69 1688.826 192 384 73.8 332.9 119.65
882 swinv2_cr_large_384 113.07 1132.03 128 384 108.95 404.96 196.68
883 eva_large_patch14_336 102.65 2493.904 256 336 191.1 270.24 304.53
884 vit_large_patch14_clip_336 102.47 2498.286 256 336 191.11 270.24 304.53
885 vit_large_patch16_384 102.37 2500.639 256 384 191.21 270.24 304.72
886 xcit_small_24_p8_384_dist 102.36 5001.728 512 384 105.24 265.91 47.63
887 eva_giant_patch14_224 101.75 10063.521 1024 224 267.18 192.64 1012.56
888 vit_giant_patch14_224 100.42 7648.057 768 224 267.18 192.64 1012.61
889 vit_giant_patch14_clip_224 100.32 7655.265 768 224 267.18 192.64 1012.65
890 cait_s36_384 99.37 5152.338 512 384 47.99 367.4 68.37
891 deit3_large_patch16_384 99.34 2577.037 256 384 191.21 270.24 304.76
892 deit3_large_patch16_384_in21ft1k 99.27 2578.907 256 384 191.21 270.24 304.76
893 regnety_2560 97.99 2612.623 256 224 257.07 87.48 826.14
894 maxvit_small_tf_512 97.85 981.11 96 512 67.26 383.77 69.13
895 swinv2_base_window12to24_192to384_22kft1k 95.95 666.98 64 384 55.25 280.36 87.92
896 efficientnet_b8 95.3 1007.298 96 672 63.48 442.89 87.41
897 tf_efficientnet_b8 92.65 1036.1 96 672 63.48 442.89 87.41
898 beit_large_patch16_384 88.55 2890.891 256 384 191.21 270.24 305.0
899 resnetv2_101x3_bitm 83.1 2310.491 192 448 280.33 194.78 387.93
900 maxvit_large_tf_384 80.34 1593.284 128 384 132.55 445.84 212.03
901 nfnet_f4 79.54 4827.723 384 512 216.26 262.26 316.07
902 volo_d3_448 73.5 2612.274 192 448 96.33 446.83 86.63
903 dm_nfnet_f4 71.41 3584.699 256 512 216.26 262.26 316.07
904 xcit_medium_24_p8_384_dist 70.91 5415.294 384 384 186.67 354.73 84.32
905 swinv2_large_window12to24_192to384_22kft1k 60.84 788.97 48 384 116.15 407.83 196.74
906 vit_gigantic_patch14_clip_224 60.15 8511.823 512 224 483.96 275.37 1844.91
907 vit_gigantic_patch14_224 60.11 8517.291 512 224 483.95 275.37 1844.44
908 nfnet_f5 58.02 4412.387 256 544 290.97 349.71 377.21
909 vit_huge_patch14_clip_336 57.29 4468.831 256 336 390.97 407.54 632.46
910 convnextv2_huge 56.06 1712.576 96 384 337.96 232.35 660.29
911 volo_d4_448 54.47 2349.801 128 448 197.13 527.35 193.41
912 tf_efficientnet_l2 54.12 1182.593 64 475 172.11 609.89 480.31
913 maxvit_base_tf_512 52.65 1823.292 96 512 138.02 703.99 119.88
914 swinv2_cr_giant_224 52.12 2455.882 128 224 483.85 309.15 2598.76
915 dm_nfnet_f5 50.7 5049.339 256 544 290.97 349.71 377.21
916 swinv2_cr_huge_384 48.86 1309.971 64 384 352.04 583.18 657.94
917 maxvit_xlarge_tf_384 46.24 2076.289 96 384 292.78 668.76 475.32
918 nfnet_f6 44.3 5778.548 256 576 378.69 452.2 438.36
919 xcit_large_24_p8_384_dist 40.2 6368.127 256 384 415.0 531.82 188.93
920 eva_giant_patch14_336 39.77 6436.237 256 336 620.64 550.67 1013.01
921 dm_nfnet_f6 39.62 6461.626 256 576 378.69 452.2 438.36
922 maxvit_large_tf_512 38.67 1654.908 64 512 244.75 942.15 212.33
923 volo_d5_448 37.56 3408.043 128 448 315.06 737.92 295.91
924 beit_large_patch16_512 35.36 2715.28 96 512 362.24 656.39 305.67
925 nfnet_f7 34.74 7370.0 256 608 480.39 570.85 499.5
926 cait_m36_384 32.36 7912.123 256 384 173.11 734.81 271.22
927 resnetv2_152x4_bitm 30.0 4266.89 128 480 844.84 414.26 936.53
928 volo_d5_512 26.35 4857.602 128 512 425.09 1105.37 296.09
929 maxvit_xlarge_tf_512 23.12 2076.455 48 512 534.14 1413.22 475.77
930 efficientnet_l2 21.26 1505.032 32 800 479.12 1707.39 480.31
931 swinv2_cr_giant_384 15.03 2129.6 32 384 1450.71 1394.86 2598.76
932 cait_m48_448 13.69 9353.048 128 448 329.41 1708.23 356.46
933 eva_giant_patch14_560 10.36 4631.037 48 560 1906.76 2577.17 1014.45

@ -0,0 +1,930 @@
model,infer_samples_per_sec,infer_step_time,infer_batch_size,infer_img_size,infer_gmacs,infer_macts,param_count
tinynet_e,72737.62,14.068,1024,106,0.03,0.69,2.04
mobilenetv3_small_050,54822.3,18.668,1024,224,0.03,0.92,1.59
lcnet_035,53629.35,19.084,1024,224,0.03,1.04,1.64
lcnet_050,45492.41,22.499,1024,224,0.05,1.26,1.88
mobilenetv3_small_075,39215.51,26.102,1024,224,0.05,1.3,2.04
tinynet_d,37346.61,27.409,1024,152,0.05,1.42,2.34
mobilenetv3_small_100,36280.34,28.214,1024,224,0.06,1.42,2.54
tf_mobilenetv3_small_minimal_100,31726.33,32.265,1024,224,0.06,1.41,2.04
tf_mobilenetv3_small_075,31503.43,32.494,1024,224,0.05,1.3,2.04
lcnet_075,29817.69,34.332,1024,224,0.1,1.99,2.36
tf_mobilenetv3_small_100,29444.91,34.767,1024,224,0.06,1.42,2.54
mnasnet_small,25354.86,40.376,1024,224,0.07,2.16,2.03
lcnet_100,24134.76,42.417,1024,224,0.16,2.52,2.95
regnetx_002,23983.4,42.686,1024,224,0.2,2.16,2.68
levit_128s,22675.73,45.148,1024,224,0.31,1.88,7.78
regnety_002,21709.37,47.158,1024,224,0.2,2.17,3.16
mobilenetv2_035,21673.44,47.236,1024,224,0.07,2.86,1.68
mnasnet_050,20010.27,51.163,1024,224,0.11,3.07,2.22
ghostnet_050,18932.82,54.075,1024,224,0.05,1.77,2.59
tinynet_c,18428.42,55.556,1024,184,0.11,2.87,2.46
semnasnet_050,17215.18,59.471,1024,224,0.11,3.44,2.08
mobilenetv2_050,17194.94,59.542,1024,224,0.1,3.64,1.97
cs3darknet_focus_s,16189.76,63.24,1024,256,0.69,2.7,3.27
lcnet_150,15557.15,65.811,1024,224,0.34,3.79,4.5
cs3darknet_s,15369.47,66.615,1024,256,0.72,2.97,3.28
levit_128,15337.67,66.754,1024,224,0.41,2.71,9.21
gernet_s,15288.68,66.966,1024,224,0.75,2.65,8.17
mobilenetv3_large_075,14216.3,72.019,1024,224,0.16,4.0,3.99
mixer_s32_224,14182.92,72.188,1024,224,1.0,2.28,19.1
vit_tiny_r_s16_p8_224,14125.39,72.482,1024,224,0.44,2.06,6.34
resnet10t,14112.07,72.551,1024,224,1.1,2.43,5.44
vit_small_patch32_224,13799.47,74.195,1024,224,1.15,2.5,22.88
regnetx_004,13610.2,75.225,1024,224,0.4,3.14,5.16
levit_192,13524.14,75.706,1024,224,0.66,3.2,10.95
mobilenetv3_rw,12956.58,79.021,1024,224,0.23,4.41,5.48
hardcorenas_a,12803.61,79.966,1024,224,0.23,4.38,5.26
mobilenetv3_large_100,12749.93,80.304,1024,224,0.23,4.41,5.48
mnasnet_075,12532.36,81.697,1024,224,0.23,4.77,3.17
tf_mobilenetv3_large_075,12186.51,84.017,1024,224,0.16,4.0,3.99
tinynet_b,12083.18,84.735,1024,188,0.21,4.44,3.73
regnety_004,11918.36,85.906,1024,224,0.41,3.89,4.34
tf_mobilenetv3_large_minimal_100,11715.94,87.392,1024,224,0.22,4.4,3.92
hardcorenas_c,11548.05,88.662,1024,224,0.28,5.01,5.52
hardcorenas_b,11510.71,88.949,1024,224,0.26,5.09,5.18
ese_vovnet19b_slim_dw,11501.95,89.018,1024,224,0.4,5.28,1.9
ghostnet_100,11332.61,90.348,1024,224,0.15,3.55,5.18
mnasnet_100,11138.43,91.923,1024,224,0.33,5.46,4.38
gluon_resnet18_v1b,11098.78,92.252,1024,224,1.82,2.48,11.69
resnet18,11083.1,92.383,1024,224,1.82,2.48,11.69
swsl_resnet18,11062.48,92.555,1024,224,1.82,2.48,11.69
ssl_resnet18,11061.11,92.565,1024,224,1.82,2.48,11.69
tf_mobilenetv3_large_100,11018.56,92.922,1024,224,0.23,4.41,5.48
mnasnet_b1,10993.58,93.135,1024,224,0.33,5.46,4.38
hardcorenas_d,10910.47,93.843,1024,224,0.3,4.93,7.5
semnasnet_075,10898.09,93.951,1024,224,0.23,5.54,2.91
mobilenetv2_075,10893.76,93.988,1024,224,0.22,5.86,2.64
seresnet18,10385.56,98.588,1024,224,1.82,2.49,11.78
legacy_seresnet18,10064.41,101.734,1024,224,1.82,2.49,11.78
spnasnet_100,10009.21,102.296,1024,224,0.35,6.03,4.42
tf_efficientnetv2_b0,9930.95,103.1,1024,224,0.73,4.77,7.14
levit_256,9858.1,103.863,1024,224,1.13,4.23,18.89
tinynet_a,9720.11,105.337,1024,192,0.35,5.41,6.19
hardcorenas_f,9714.91,105.393,1024,224,0.35,5.57,8.2
semnasnet_100,9623.78,106.393,1024,224,0.32,6.23,3.89
mnasnet_a1,9623.77,106.393,1024,224,0.32,6.23,3.89
mobilenetv2_100,9598.91,106.667,1024,224,0.31,6.68,3.5
hardcorenas_e,9571.87,106.966,1024,224,0.35,5.65,8.07
dla46_c,9568.4,107.007,1024,224,0.58,4.5,1.3
efficientnet_lite0,9361.14,109.377,1024,224,0.4,6.74,4.65
fbnetc_100,9352.03,109.484,1024,224,0.4,6.51,5.57
resnet18d,9334.83,109.687,1024,224,2.06,3.29,11.71
ese_vovnet19b_slim,9109.47,112.4,1024,224,1.69,3.52,3.17
regnety_006,9097.63,112.542,1024,224,0.61,4.33,6.06
regnetz_005,8607.49,118.955,1024,224,0.52,5.86,7.12
xcit_nano_12_p16_224_dist,8577.2,119.375,1024,224,0.56,4.17,3.05
xcit_nano_12_p16_224,8554.61,119.689,1024,224,0.56,4.17,3.05
levit_256d,8382.88,122.143,1024,224,1.4,4.93,26.21
regnetx_006,8379.52,122.192,1024,224,0.61,3.98,6.2
ghostnet_130,8278.59,123.681,1024,224,0.24,4.6,7.36
tf_efficientnet_lite0,8080.51,126.714,1024,224,0.4,6.74,4.65
efficientnet_b0,7965.17,128.548,1024,224,0.4,6.75,5.29
mnasnet_140,7779.42,131.618,1024,224,0.6,7.71,7.12
deit_tiny_distilled_patch16_224,7467.68,137.113,1024,224,1.27,6.01,5.91
rexnetr_100,7464.12,137.179,1024,224,0.43,7.72,4.88
deit_tiny_patch16_224,7430.15,137.806,1024,224,1.26,5.97,5.72
resnet14t,7429.68,137.815,1024,224,1.69,5.8,10.08
vit_tiny_patch16_224,7424.93,137.902,1024,224,1.26,5.97,5.72
regnetx_008,7394.88,138.463,1024,224,0.81,5.15,7.26
mobilenetv2_110d,7247.12,141.287,1024,224,0.45,8.71,4.52
hrnet_w18_small,7232.93,141.561,1024,224,1.61,5.72,13.19
tf_efficientnet_b0,7016.18,145.938,1024,224,0.4,6.75,5.29
regnety_008,6938.46,147.571,1024,224,0.81,5.25,6.26
mobilevitv2_050,6848.87,149.503,1024,256,0.48,8.04,1.37
pit_ti_distilled_224,6811.68,150.317,1024,224,0.71,6.23,5.1
pit_ti_224,6784.24,150.927,1024,224,0.7,6.19,4.85
gernet_m,6679.85,153.286,1024,224,3.02,5.24,21.14
efficientnet_b1_pruned,6642.37,154.15,1024,240,0.4,6.21,6.33
resnet34,6496.42,157.614,1024,224,3.67,3.74,21.8
gluon_resnet34_v1b,6494.61,157.658,1024,224,3.67,3.74,21.8
tv_resnet34,6481.01,157.989,1024,224,3.67,3.74,21.8
tf_efficientnetv2_b1,6476.52,158.098,1024,240,1.21,7.34,8.14
semnasnet_140,6454.5,158.637,1024,224,0.6,8.87,6.11
nf_regnet_b0,6452.24,158.693,1024,256,0.64,5.58,8.76
ese_vovnet19b_dw,6335.13,161.627,1024,224,1.34,8.25,6.54
mobilenetv2_140,6271.56,163.266,1024,224,0.6,9.57,6.11
rexnet_100,6226.48,164.447,1024,224,0.41,7.44,4.8
efficientnet_lite1,6187.91,165.472,1024,240,0.62,10.14,5.42
efficientnet_es_pruned,6115.4,167.434,1024,224,1.81,8.73,5.44
efficientnet_es,6115.12,167.443,1024,224,1.81,8.73,5.44
visformer_tiny,6103.09,167.772,1024,224,1.27,5.72,10.32
seresnet34,6058.13,169.019,1024,224,3.67,3.74,21.96
fbnetv3_b,6018.76,170.124,1024,256,0.55,9.1,8.6
selecsls42,5953.76,171.98,1024,224,2.94,4.62,30.35
selecsls42b,5921.2,172.924,1024,224,2.98,4.62,32.46
resnet26,5895.21,173.69,1024,224,2.36,7.35,16.0
edgenext_xx_small,5893.72,173.732,1024,288,0.33,4.21,1.33
levit_384,5880.4,174.126,1024,224,2.36,6.26,39.13
resnet34d,5865.98,174.555,1024,224,3.91,4.54,21.82
legacy_seresnet34,5850.24,175.025,1024,224,3.67,3.74,21.96
dla34,5827.3,175.712,1024,224,3.07,5.02,15.74
tf_efficientnet_es,5781.29,177.112,1024,224,1.81,8.73,5.44
cs3darknet_focus_m,5721.39,178.967,1024,288,2.51,6.19,9.3
resnetblur18,5636.65,181.657,1024,224,2.34,3.39,11.69
rexnetr_130,5590.0,183.173,1024,224,0.68,9.81,7.61
mobilevit_xxs,5524.87,185.333,1024,256,0.42,8.34,1.27
tf_efficientnet_lite1,5524.68,185.339,1024,240,0.62,10.14,5.42
cs3darknet_m,5478.07,186.916,1024,288,2.63,6.69,9.31
convnext_atto,5460.54,187.516,1024,288,0.91,6.3,3.7
xcit_tiny_12_p16_224_dist,5457.72,187.611,1024,224,1.24,6.29,6.72
xcit_tiny_12_p16_224,5456.63,187.649,1024,224,1.24,6.29,6.72
skresnet18,5413.1,189.159,1024,224,1.82,3.24,11.96
darknet17,5401.37,189.571,1024,256,3.26,7.18,14.3
mixnet_s,5392.58,189.878,1024,224,0.25,6.25,4.13
resmlp_12_224,5366.15,190.814,1024,224,3.01,5.5,15.35
resmlp_12_distilled_224,5364.91,190.857,1024,224,3.01,5.5,15.35
convnext_atto_ols,5288.94,193.6,1024,288,0.96,6.8,3.7
vit_base_patch32_clip_224,5280.68,193.903,1024,224,4.41,5.01,88.22
vit_base_patch32_224,5280.52,193.908,1024,224,4.41,5.01,88.22
pit_xs_distilled_224,5272.13,194.218,1024,224,1.41,7.76,11.0
pit_xs_224,5271.0,194.259,1024,224,1.4,7.71,10.62
repvgg_b0,5252.66,194.939,1024,224,3.41,6.15,15.82
mixer_b32_224,5221.71,196.094,1024,224,3.24,6.29,60.29
pvt_v2_b0,5210.31,196.521,1024,224,0.57,7.99,3.67
resnetaa34d,5171.78,197.986,1024,224,4.43,5.07,21.82
selecsls60,5160.83,198.407,1024,224,3.59,5.52,30.67
selecsls60b,5119.51,200.008,1024,224,3.63,5.52,32.77
mobilenetv2_120d,5111.95,200.304,1024,224,0.69,11.97,5.83
resnet26d,5108.26,200.449,1024,224,2.6,8.15,16.01
gmixer_12_224,5064.97,202.162,1024,224,2.67,7.26,12.7
gmlp_ti16_224,5007.93,204.464,1024,224,1.34,7.55,5.87
mixer_s16_224,4998.69,204.842,1024,224,3.79,5.97,18.53
tf_mixnet_s,4989.18,205.231,1024,224,0.25,6.25,4.13
efficientnet_b0_g16_evos,4930.67,207.667,1024,224,1.01,7.42,8.11
rexnetr_150,4900.22,208.959,1024,224,0.89,11.13,9.78
fbnetv3_d,4881.14,209.776,1024,256,0.68,11.1,10.31
darknet21,4850.41,211.105,1024,256,3.93,7.47,20.86
nf_resnet26,4816.48,212.591,1024,224,2.41,7.35,16.0
efficientnet_lite2,4781.65,214.14,1024,260,0.89,12.9,6.09
convnext_femto,4749.12,215.607,1024,288,1.3,7.56,5.22
tf_efficientnetv2_b2,4718.26,217.018,1024,260,1.72,9.84,10.1
sedarknet21,4656.51,219.895,1024,256,3.93,7.47,20.95
dla46x_c,4636.77,220.831,1024,224,0.54,5.66,1.07
convnext_femto_ols,4618.33,221.714,1024,288,1.35,8.06,5.23
resnext26ts,4603.25,222.441,1024,256,2.43,10.52,10.3
efficientformer_l1,4566.14,224.248,1024,224,1.3,5.53,12.29
dpn48b,4506.78,227.201,1024,224,1.69,8.92,9.13
crossvit_tiny_240,4481.69,228.473,1024,240,1.57,9.08,7.01
dla60x_c,4459.27,229.622,1024,224,0.59,6.01,1.32
eca_resnext26ts,4456.63,229.759,1024,256,2.43,10.52,10.3
seresnext26ts,4453.99,229.896,1024,256,2.43,10.52,10.39
legacy_seresnext26_32x4d,4441.15,230.558,1024,224,2.49,9.39,16.79
gernet_l,4396.56,232.898,1024,256,4.57,8.0,31.08
mobilevitv2_075,4393.87,233.041,1024,256,1.05,12.06,2.87
gcresnext26ts,4384.92,233.516,1024,256,2.43,10.53,10.48
tf_efficientnet_b1,4370.6,234.282,1024,240,0.71,10.88,7.79
tf_efficientnet_lite2,4293.9,238.467,1024,260,0.89,12.9,6.09
rexnet_130,4262.16,240.243,1024,224,0.68,9.71,7.56
efficientnet_b1,4239.44,241.53,1024,256,0.77,12.22,7.79
vit_small_patch32_384,4239.1,241.55,1024,384,3.45,8.25,22.92
crossvit_9_240,4212.37,243.082,1024,240,1.85,9.52,8.55
crossvit_9_dagger_240,4095.03,250.049,1024,240,1.99,9.97,8.78
nf_ecaresnet26,4091.86,250.24,1024,224,2.41,7.36,16.0
nf_seresnet26,4088.47,250.449,1024,224,2.41,7.36,17.4
efficientnet_cc_b0_8e,4076.51,251.183,1024,224,0.42,9.42,24.01
efficientnet_cc_b0_4e,4073.3,251.382,1024,224,0.41,9.42,13.31
ecaresnet50d_pruned,4055.39,252.492,1024,224,2.53,6.43,19.94
efficientnet_b2_pruned,4030.92,254.025,1024,260,0.73,9.13,8.31
ecaresnext50t_32x4d,4018.73,254.796,1024,224,2.7,10.09,15.41
ecaresnext26t_32x4d,4017.09,254.9,1024,224,2.7,10.09,15.41
seresnext26t_32x4d,4014.43,255.069,1024,224,2.7,10.09,16.81
seresnext26tn_32x4d,4014.36,255.074,1024,224,2.7,10.09,16.81
repvgg_a2,3987.84,256.77,1024,224,5.7,6.26,28.21
poolformer_s12,3982.67,257.103,1024,224,1.82,5.53,11.92
seresnext26d_32x4d,3979.57,257.303,1024,224,2.73,10.19,16.81
vit_tiny_r_s16_p8_384,3963.05,258.374,1024,384,1.34,6.49,6.36
resnet26t,3939.46,259.923,1024,256,3.35,10.52,16.01
nf_regnet_b1,3911.64,261.772,1024,288,1.02,9.2,10.22
rexnet_150,3881.93,263.775,1024,224,0.9,11.21,9.73
nf_regnet_b2,3879.78,263.921,1024,272,1.22,9.27,14.31
resnetv2_50,3865.49,264.896,1024,224,4.11,11.11,25.55
regnetx_016,3852.41,265.794,1024,224,1.62,7.93,9.19
tf_efficientnet_cc_b0_4e,3812.08,268.608,1024,224,0.41,9.42,13.31
tf_efficientnet_cc_b0_8e,3803.67,269.202,1024,224,0.42,9.42,24.01
convnext_pico,3747.49,273.239,1024,288,2.27,10.08,9.05
ecaresnetlight,3744.45,273.459,1024,224,4.11,8.42,30.16
dpn68,3724.59,274.917,1024,224,2.35,10.47,12.61
edgenext_x_small,3714.71,275.646,1024,288,0.68,7.5,2.34
gluon_resnet50_v1b,3672.76,278.798,1024,224,4.11,11.11,25.56
ssl_resnet50,3671.85,278.866,1024,224,4.11,11.11,25.56
efficientnet_em,3671.25,278.913,1024,240,3.04,14.34,6.9
resnet50,3668.58,279.116,1024,224,4.11,11.11,25.56
swsl_resnet50,3668.32,279.136,1024,224,4.11,11.11,25.56
tv_resnet50,3667.14,279.225,1024,224,4.11,11.11,25.56
dpn68b,3667.07,279.229,1024,224,2.35,10.47,12.61
rexnetr_200,3659.45,279.811,1024,224,1.59,15.11,16.52
convnext_pico_ols,3651.34,280.434,1024,288,2.37,10.74,9.06
botnet26t_256,3594.28,284.883,1024,256,3.32,11.98,12.49
bat_resnext26ts,3569.91,286.828,1024,256,2.53,12.51,10.73
resnetv2_50t,3547.32,288.657,1024,224,4.32,11.82,25.57
mixnet_m,3537.26,289.477,1024,224,0.36,8.19,5.01
regnety_016,3531.88,289.919,1024,224,1.63,8.04,11.2
tf_efficientnet_em,3529.62,290.106,1024,240,3.04,14.34,6.9
resnetv2_50d,3525.02,290.482,1024,224,4.35,11.92,25.57
halonet26t,3515.15,291.299,1024,256,3.19,11.69,12.48
resnet32ts,3492.62,293.179,1024,256,4.63,11.58,17.96
hrnet_w18_small_v2,3482.81,294.001,1024,224,2.62,9.65,15.6
gluon_resnet50_v1c,3481.59,294.107,1024,224,4.35,11.92,25.58
dla60,3466.91,295.351,1024,224,4.26,10.16,22.04
resnet33ts,3460.78,295.875,1024,256,4.76,11.66,19.68
tf_efficientnet_b2,3402.3,300.962,1024,260,1.02,13.83,9.11
convit_tiny,3399.61,301.199,1024,224,1.26,7.94,5.71
resnet50t,3373.72,303.51,1024,224,4.32,11.82,25.57
tf_mixnet_m,3366.38,304.167,1024,224,0.36,8.19,5.01
efficientnet_b3_pruned,3360.1,304.74,1024,300,1.04,11.86,9.86
seresnet33ts,3354.27,305.27,1024,256,4.76,11.66,19.78
resnet50d,3351.47,305.527,1024,224,4.35,11.92,25.58
eca_resnet33ts,3350.95,305.574,1024,256,4.76,11.66,19.68
vit_small_resnet26d_224,3346.77,305.954,1024,224,5.07,11.12,63.61
cs3darknet_focus_l,3335.18,307.018,1024,288,5.9,10.16,21.15
gluon_resnet50_v1d,3334.65,307.068,1024,224,4.35,11.92,25.58
mobilevitv2_100,3324.63,307.994,1024,256,1.84,16.08,4.9
vovnet39a,3320.12,308.408,1024,224,7.09,6.73,22.6
legacy_seresnet50,3312.33,309.135,1024,224,3.88,10.6,28.09
efficientnet_b0_gn,3307.86,309.554,1024,224,0.42,6.75,5.29
gcresnet33ts,3307.01,309.633,1024,256,4.76,11.68,19.88
pit_s_distilled_224,3301.25,310.173,1024,224,2.9,11.64,24.04
pit_s_224,3299.97,310.295,1024,224,2.88,11.56,23.46
mobilevit_xs,3252.28,314.844,1024,256,1.05,16.33,2.32
deit_small_distilled_patch16_224,3233.6,316.663,1024,224,4.63,12.02,22.44
efficientnet_b2a,3223.97,317.608,1024,288,1.12,16.2,9.11
efficientnet_b2,3223.9,317.615,1024,288,1.12,16.2,9.11
deit_small_patch16_224,3218.99,318.1,1024,224,4.61,11.95,22.05
vit_small_patch16_224,3218.38,318.16,1024,224,4.61,11.95,22.05
cs3darknet_l,3210.26,318.965,1024,288,6.16,10.83,21.16
ese_vovnet39b,3206.21,319.369,1024,224,7.09,6.74,24.57
eca_vovnet39b,3203.77,319.612,1024,224,7.09,6.74,22.6
convnextv2_atto,3196.73,320.315,1024,288,0.91,6.3,3.71
coatnet_pico_rw_224,3189.82,321.008,1024,224,2.05,14.62,10.85
seresnet50,3181.57,321.841,1024,224,4.11,11.13,28.09
pvt_v2_b1,3147.37,325.339,1024,224,2.12,15.39,14.01
coat_lite_tiny,3146.41,325.439,1024,224,1.6,11.65,5.72
res2net50_48w_2s,3127.52,327.404,1024,224,4.18,11.72,25.29
eca_botnext26ts_256,3112.32,329.003,1024,256,2.46,11.6,10.59
ecaresnet101d_pruned,3103.16,329.973,1024,224,3.48,7.69,24.88
efficientnet_b0_g8_gn,3073.2,333.192,1024,224,0.66,6.75,6.56
ssl_resnext50_32x4d,3071.68,333.356,1024,224,4.26,14.4,25.03
dla60x,3071.64,333.359,1024,224,3.54,13.8,17.35
swsl_resnext50_32x4d,3070.7,333.464,1024,224,4.26,14.4,25.03
tv_resnext50_32x4d,3069.81,333.56,1024,224,4.26,14.4,25.03
resnext50_32x4d,3069.72,333.57,1024,224,4.26,14.4,25.03
gluon_resnext50_32x4d,3068.47,333.704,1024,224,4.26,14.4,25.03
vit_small_r26_s32_224,3061.92,334.417,1024,224,3.56,9.85,36.43
skresnet34,3055.95,335.073,1024,224,3.67,5.13,22.28
deit3_small_patch16_224_in21ft1k,3048.82,335.855,1024,224,4.61,11.95,22.06
deit3_small_patch16_224,3047.23,336.031,1024,224,4.61,11.95,22.06
eca_halonext26ts,3035.71,337.305,1024,256,2.44,11.46,10.76
haloregnetz_b,3032.47,337.665,1024,224,1.97,11.94,11.68
vit_relpos_base_patch32_plus_rpn_256,3026.45,338.338,1024,256,7.68,8.01,119.42
vit_relpos_small_patch16_rpn_224,3019.95,339.067,1024,224,4.59,13.05,21.97
vit_relpos_small_patch16_224,3008.26,340.383,1024,224,4.59,13.05,21.98
vit_srelpos_small_patch16_224,3000.96,341.213,1024,224,4.59,12.16,21.97
xcit_nano_12_p16_384_dist,3000.48,341.266,1024,384,1.64,12.15,3.05
cs3sedarknet_l,2995.41,341.845,1024,288,6.16,10.83,21.91
resnetaa50d,2993.03,342.116,1024,224,5.39,12.44,25.58
vgg11,2983.47,85.796,256,224,7.61,7.44,132.86
selecsls84,2973.16,344.402,1024,224,5.9,7.57,50.95
resnetrs50,2963.42,345.535,1024,224,4.48,12.14,35.69
seresnet50t,2957.12,346.271,1024,224,4.32,11.83,28.1
resnest14d,2954.69,346.556,1024,224,2.76,7.33,10.61
gluon_resnet50_v1s,2953.65,346.677,1024,224,5.47,13.52,25.68
coat_lite_mini,2952.61,346.799,1024,224,2.0,12.25,11.01
ecaresnet50d,2945.96,347.583,1024,224,4.35,11.93,25.58
densenet121,2933.45,349.064,1024,224,2.87,6.9,7.98
tv_densenet121,2929.69,349.514,1024,224,2.87,6.9,7.98
vit_base_patch32_plus_256,2929.65,349.519,1024,256,7.79,7.76,119.48
rexnet_200,2927.94,349.723,1024,224,1.56,14.91,16.37
xcit_tiny_24_p16_224_dist,2927.0,349.834,1024,224,2.34,11.82,12.12
xcit_tiny_24_p16_224,2921.97,350.436,1024,224,2.34,11.82,12.12
coatnet_nano_cc_224,2867.38,357.108,1024,224,2.24,15.02,13.76
gcresnext50ts,2857.34,358.363,1024,256,3.75,15.46,15.67
lambda_resnet26rpt_256,2853.55,358.839,1024,256,3.16,11.87,10.99
resnext50d_32x4d,2845.08,359.908,1024,224,4.5,15.2,25.05
mixnet_l,2828.6,361.996,1024,224,0.58,10.84,7.33
densenet121d,2824.08,362.584,1024,224,3.11,7.7,8.0
efficientnet_lite3,2821.84,362.87,1024,300,1.65,21.85,8.2
cspresnet50,2793.65,366.534,1024,256,4.54,11.5,21.62
coatnet_nano_rw_224,2781.93,368.077,1024,224,2.41,15.41,15.14
vgg11_bn,2760.38,370.949,1024,224,7.62,7.44,132.87
vovnet57a,2755.77,371.572,1024,224,8.95,7.52,36.64
resmlp_24_224,2750.33,372.306,1024,224,5.96,10.91,30.02
resmlp_24_distilled_224,2740.33,373.665,1024,224,5.96,10.91,30.02
convnextv2_femto,2735.91,374.269,1024,288,1.3,7.56,5.23
flexivit_small,2735.78,374.287,1024,240,5.35,14.18,22.06
gcresnet50t,2732.04,374.8,1024,256,5.42,14.67,25.9
legacy_seresnext50_32x4d,2722.84,376.065,1024,224,4.26,14.42,27.56
seresnext50_32x4d,2721.47,376.256,1024,224,4.26,14.42,27.56
gluon_seresnext50_32x4d,2720.58,376.379,1024,224,4.26,14.42,27.56
visformer_small,2719.93,376.468,1024,224,4.88,11.43,40.22
twins_svt_small,2713.39,377.374,1024,224,2.94,13.75,24.06
resnetv2_50x1_bit_distilled,2708.81,378.014,1024,224,4.23,11.11,25.55
res2net50_14w_8s,2692.9,380.248,1024,224,4.21,13.28,25.06
resnetblur50,2685.97,381.228,1024,224,5.16,12.02,25.56
vit_base_resnet26d_224,2684.6,381.421,1024,224,6.97,13.16,101.4
tf_mixnet_l,2680.8,381.958,1024,224,0.58,10.84,7.33
seresnetaa50d,2658.93,385.106,1024,224,5.4,12.46,28.11
dla60_res2net,2656.16,385.506,1024,224,4.15,12.34,20.85
cspresnet50d,2655.05,385.668,1024,256,4.86,12.55,21.64
coatnext_nano_rw_224,2655.0,385.674,1024,224,2.47,12.8,14.7
ese_vovnet57b,2654.33,385.773,1024,224,8.95,7.52,38.61
tf_efficientnetv2_b3,2654.14,385.8,1024,300,3.04,15.74,14.36
cspresnet50w,2641.68,387.621,1024,256,5.04,12.19,28.12
res2net50_26w_4s,2629.64,389.395,1024,224,4.28,12.61,25.7
regnetz_b16,2626.71,389.828,1024,288,2.39,16.43,9.72
convnext_nano,2611.78,392.059,1024,288,4.06,13.84,15.59
efficientnetv2_rw_t,2601.49,393.609,1024,288,3.19,16.42,13.65
fbnetv3_g,2595.29,394.549,1024,288,1.77,21.09,16.62
gmixer_24_224,2595.15,394.571,1024,224,5.28,14.45,24.72
mobilevit_s,2586.09,395.952,1024,256,2.03,19.94,5.58
coatnet_rmlp_nano_rw_224,2569.7,398.478,1024,224,2.62,20.34,15.15
gcvit_xxtiny,2561.41,399.768,1024,224,2.14,15.36,12.0
tf_efficientnet_lite3,2530.94,404.582,1024,300,1.65,21.85,8.2
efficientnet_cc_b1_8e,2530.65,404.628,1024,240,0.75,15.44,39.72
densenetblur121d,2522.66,405.908,1024,224,3.11,7.9,8.0
resnetblur50d,2509.45,408.045,1024,224,5.4,12.82,25.58
nf_ecaresnet50,2490.39,411.168,1024,224,4.21,11.13,25.56
inception_v3,2485.21,412.025,1024,299,5.73,8.97,23.83
nf_seresnet50,2482.66,412.449,1024,224,4.21,11.13,28.09
tf_inception_v3,2481.38,412.658,1024,299,5.73,8.97,23.83
gc_efficientnetv2_rw_t,2480.59,412.793,1024,288,3.2,16.45,13.68
adv_inception_v3,2479.41,412.983,1024,299,5.73,8.97,23.83
repvgg_b1g4,2473.34,414.003,1024,224,8.15,10.64,39.97
mobilevitv2_125,2472.28,414.18,1024,256,2.86,20.1,7.48
gluon_inception_v3,2468.42,414.827,1024,299,5.73,8.97,23.83
nf_regnet_b3,2461.52,415.991,1024,320,2.05,14.61,18.59
xcit_small_12_p16_224_dist,2446.89,418.478,1024,224,4.82,12.58,26.25
xcit_small_12_p16_224,2446.42,418.558,1024,224,4.82,12.58,26.25
cspresnext50,2438.96,419.836,1024,256,4.05,15.86,20.57
convnext_nano_ols,2435.0,420.521,1024,288,4.38,15.5,15.65
regnetx_032,2429.42,421.489,1024,224,3.2,11.37,15.3
densenet169,2426.29,422.031,1024,224,3.4,7.3,14.15
sehalonet33ts,2419.4,423.234,1024,256,3.55,14.7,13.69
tf_efficientnet_cc_b1_8e,2406.19,425.557,1024,240,0.75,15.44,39.72
semobilevit_s,2402.02,426.294,1024,256,2.03,19.95,5.74
resnetv2_101,2330.6,439.36,1024,224,7.83,16.23,44.54
twins_pcpvt_small,2312.72,442.754,1024,224,3.83,18.08,24.11
xcit_nano_12_p8_224_dist,2295.5,446.077,1024,224,2.16,15.71,3.05
xcit_nano_12_p8_224,2292.87,446.587,1024,224,2.16,15.71,3.05
gmlp_s16_224,2290.73,447.007,1024,224,4.42,15.1,19.42
cs3darknet_focus_x,2287.2,447.697,1024,256,8.03,10.69,35.02
vit_base_r26_s32_224,2275.25,450.047,1024,224,6.81,12.36,101.38
gluon_resnet101_v1b,2260.37,453.01,1024,224,7.83,16.23,44.55
tv_resnet101,2258.59,453.368,1024,224,7.83,16.23,44.55
resnet101,2258.28,453.43,1024,224,7.83,16.23,44.55
skresnet50,2234.62,458.23,1024,224,4.11,12.5,25.8
ecaresnet26t,2232.29,458.709,1024,320,5.24,16.44,16.01
edgenext_small,2226.69,459.86,1024,320,1.97,14.16,5.59
dla102,2219.96,461.255,1024,224,7.19,14.18,33.27
res2next50,2214.71,462.347,1024,224,4.2,13.71,24.67
dla60_res2next,2210.67,463.194,1024,224,3.49,13.17,17.03
resnetv2_101d,2203.82,464.633,1024,224,8.07,17.04,44.56
gluon_resnet101_v1c,2194.65,466.578,1024,224,8.08,17.04,44.57
resnest26d,2170.04,471.869,1024,224,3.64,9.97,17.07
vgg13,2149.71,476.331,1024,224,11.31,12.25,133.05
gluon_resnet101_v1d,2137.49,479.053,1024,224,8.08,17.04,44.57
skresnet50d,2115.22,484.098,1024,224,4.36,13.31,25.82
convnextv2_pico,2108.5,485.64,1024,288,2.27,10.08,9.07
vit_base_resnet50d_224,2101.17,487.333,1024,224,8.73,16.92,110.97
coatnet_0_rw_224,2082.49,491.706,1024,224,4.43,18.73,27.44
crossvit_small_240,2081.5,491.94,1024,240,5.63,18.17,26.86
deit3_medium_patch16_224_in21ft1k,2076.53,493.118,1024,224,8.0,15.93,38.85
deit3_medium_patch16_224,2072.34,494.116,1024,224,8.0,15.93,38.85
mobilevitv2_150,2071.36,494.349,1024,256,4.09,24.11,10.59
mobilevitv2_150_in22ft1k,2070.3,494.603,1024,256,4.09,24.11,10.59
sebotnet33ts_256,2067.91,247.581,512,256,3.89,17.46,13.7
wide_resnet50_2,2057.08,497.78,1024,224,11.43,14.4,68.88
vit_relpos_medium_patch16_rpn_224,2044.85,500.757,1024,224,7.97,17.02,38.73
efficientformer_l3,2041.79,501.507,1024,224,3.93,12.01,31.41
poolformer_s24,2040.35,501.863,1024,224,3.41,10.68,21.39
vit_relpos_medium_patch16_224,2037.47,502.572,1024,224,7.97,17.02,38.75
cspdarknet53,2035.94,502.949,1024,256,6.57,16.81,27.64
resnet51q,2034.41,503.329,1024,288,8.07,20.94,35.7
vit_srelpos_medium_patch16_224,2033.15,503.638,1024,224,7.96,16.21,38.74
maxvit_rmlp_pico_rw_256,2008.78,509.748,1024,256,1.85,24.86,7.52
vit_relpos_medium_patch16_cls_224,2007.24,510.141,1024,224,8.03,18.24,38.76
dla102x,2006.55,510.315,1024,224,5.89,19.42,26.31
legacy_seresnet101,2003.12,511.188,1024,224,7.61,15.74,49.33
swin_tiny_patch4_window7_224,1995.14,513.235,1024,224,4.51,17.06,28.29
repvgg_b1,1985.42,515.747,1024,224,13.16,10.64,57.42
resnetaa101d,1982.98,516.381,1024,224,9.12,17.56,44.57
coatnet_rmlp_0_rw_224,1981.75,516.703,1024,224,4.72,24.89,27.45
tf_efficientnet_b3,1975.92,518.226,1024,300,1.87,23.83,12.23
gcvit_xtiny,1969.68,519.869,1024,224,2.93,20.26,19.98
hrnet_w18,1967.17,520.531,1024,224,4.32,16.31,21.3
gluon_resnet101_v1s,1965.68,520.926,1024,224,9.19,18.64,44.67
maxvit_pico_rw_256,1965.38,521.006,1024,256,1.83,22.3,7.46
resnetaa50,1958.15,522.93,1024,288,8.52,19.24,25.56
seresnet101,1954.63,523.871,1024,224,7.84,16.27,49.33
efficientnet_b3,1949.54,525.239,1024,320,2.01,26.52,12.23
efficientnet_b3a,1949.11,525.356,1024,320,2.01,26.52,12.23
edgenext_small_rw,1932.68,529.816,1024,320,2.46,14.85,7.83
regnetx_040,1932.62,529.839,1024,224,3.99,12.2,22.12
cs3sedarknet_xdw,1925.4,531.825,1024,256,5.97,17.18,21.6
coatnet_bn_0_rw_224,1920.71,533.123,1024,224,4.67,22.04,27.44
xcit_tiny_12_p16_384_dist,1911.65,535.652,1024,384,3.64,18.26,6.72
ssl_resnext101_32x4d,1910.73,535.909,1024,224,8.01,21.23,44.18
swsl_resnext101_32x4d,1910.43,535.993,1024,224,8.01,21.23,44.18
resnext101_32x4d,1909.99,536.115,1024,224,8.01,21.23,44.18
gluon_resnext101_32x4d,1909.34,536.298,1024,224,8.01,21.23,44.18
darknet53,1903.77,537.866,1024,288,11.78,15.68,41.61
darknetaa53,1898.12,539.468,1024,288,10.08,15.68,36.02
crossvit_15_240,1892.46,541.083,1024,240,5.81,19.77,27.53
halonet50ts,1881.53,544.226,1024,256,5.3,19.2,22.73
vgg13_bn,1879.72,544.749,1024,224,11.33,12.25,133.05
mixnet_xl,1872.46,546.86,1024,224,0.93,14.57,11.9
res2net50_26w_6s,1870.88,547.321,1024,224,6.33,15.28,37.05
ecaresnet101d,1869.88,547.616,1024,224,8.08,17.07,44.57
densenet201,1869.57,547.706,1024,224,4.34,7.85,20.01
nf_resnet101,1858.48,550.976,1024,224,8.01,16.23,44.55
coatnet_0_224,1857.28,275.661,512,224,4.58,24.01,25.04
pvt_v2_b2,1854.85,552.053,1024,224,4.05,27.53,25.36
crossvit_15_dagger_240,1850.69,553.295,1024,240,6.13,20.43,28.21
resmlp_36_224,1846.41,554.574,1024,224,8.91,16.33,44.69
resmlp_36_distilled_224,1845.04,554.99,1024,224,8.91,16.33,44.69
resnet61q,1841.84,555.954,1024,288,9.87,21.52,36.85
swin_s3_tiny_224,1817.5,563.398,1024,224,4.64,19.13,28.33
cait_xxs24_224,1796.55,569.968,1024,224,2.53,20.29,11.96
cs3darknet_x,1789.33,572.268,1024,288,10.6,14.36,35.05
vit_medium_patch16_gap_240,1785.54,573.481,1024,240,9.22,18.81,44.4
nf_resnet50,1784.84,573.708,1024,288,6.88,18.37,25.56
resnet50_gn,1764.31,580.385,1024,224,4.14,11.11,25.56
mixer_b16_224_miil,1761.45,581.327,1024,224,12.62,14.53,59.88
mixer_b16_224,1759.76,581.885,1024,224,12.62,14.53,59.88
resnetblur101d,1757.96,582.482,1024,224,9.12,17.94,44.57
eca_nfnet_l0,1726.58,593.068,1024,288,7.12,17.29,24.14
nfnet_l0,1721.83,594.705,1024,288,7.13,17.29,35.07
vit_large_patch32_224,1717.59,596.169,1024,224,15.41,13.32,327.9
vgg16,1717.44,596.224,1024,224,15.47,13.56,138.36
regnetz_c16,1710.89,598.505,1024,320,3.92,25.88,13.46
pvt_v2_b2_li,1709.89,598.855,1024,224,3.91,27.6,22.55
resnest50d_1s4x24d,1705.52,600.391,1024,224,4.43,13.57,25.68
coat_lite_small,1704.55,600.733,1024,224,3.96,22.09,19.84
resnetv2_50d_frn,1697.1,603.368,1024,224,4.33,11.92,25.59
cs3sedarknet_x,1689.8,605.975,1024,288,10.6,14.37,35.4
seresnext101_32x4d,1687.65,606.747,1024,224,8.02,21.26,48.96
gluon_seresnext101_32x4d,1687.1,606.945,1024,224,8.02,21.26,48.96
legacy_seresnext101_32x4d,1684.69,607.813,1024,224,8.02,21.26,48.96
regnetv_040,1682.92,608.454,1024,288,6.6,20.3,20.64
mobilevitv2_175,1677.66,457.769,768,256,5.54,28.13,14.25
regnety_040,1677.03,610.59,1024,288,6.61,20.3,20.65
mobilevitv2_175_in22ft1k,1677.0,457.949,768,256,5.54,28.13,14.25
convnext_tiny_hnf,1676.16,610.908,1024,288,7.39,22.21,28.59
res2net101_26w_4s,1675.37,611.195,1024,224,8.1,18.45,45.21
vit_tiny_patch16_384,1665.76,614.72,1024,384,4.7,25.39,5.79
sequencer2d_s,1661.32,616.362,1024,224,4.96,11.31,27.65
ese_vovnet39b_evos,1661.21,616.404,1024,224,7.07,6.74,24.58
vit_base_patch32_384,1649.27,620.868,1024,384,13.06,16.5,88.3
vit_base_patch32_clip_384,1648.64,621.105,1024,384,13.06,16.5,88.3
mixer_l32_224,1645.23,622.393,1024,224,11.27,19.86,206.94
convnext_tiny,1642.14,623.562,1024,288,7.39,22.21,28.59
botnet50ts_256,1639.64,312.25,512,256,5.54,22.23,22.74
swinv2_cr_tiny_224,1630.02,628.199,1024,224,4.66,28.45,28.33
resnetv2_50d_evob,1627.44,629.196,1024,224,4.33,11.92,25.59
twins_pcpvt_base,1615.12,633.996,1024,224,6.68,25.25,43.83
resnetv2_152,1614.43,634.268,1024,224,11.55,22.56,60.19
hrnet_w32,1605.06,637.96,1024,224,8.97,22.02,41.23
swinv2_cr_tiny_ns_224,1600.43,639.811,1024,224,4.66,28.45,28.33
xception41p,1598.79,480.351,768,299,9.25,39.86,26.91
tv_resnet152,1582.54,647.049,1024,224,11.56,22.56,60.19
gluon_resnet152_v1b,1581.57,647.444,1024,224,11.56,22.56,60.19
resnet152,1581.02,647.671,1024,224,11.56,22.56,60.19
xception,1579.88,648.138,1024,299,8.4,35.83,22.86
halo2botnet50ts_256,1572.75,651.076,1024,256,5.02,21.78,22.64
res2net50_26w_8s,1568.85,652.695,1024,224,8.37,17.95,48.4
vit_medium_patch16_gap_256,1564.22,654.626,1024,256,10.59,22.15,38.86
resnetv2_152d,1557.03,657.648,1024,224,11.8,23.36,60.2
efficientnet_el_pruned,1555.14,658.449,1024,300,8.0,30.7,10.59
maxvit_rmlp_nano_rw_256,1551.85,659.845,1024,256,4.47,31.92,15.5
regnetx_064,1550.52,660.413,1024,224,6.49,16.37,26.21
efficientnet_el,1549.97,660.646,1024,300,8.0,30.7,10.59
gluon_resnet152_v1c,1548.96,661.078,1024,224,11.8,23.36,60.21
nf_ecaresnet101,1546.58,662.091,1024,224,8.01,16.27,44.55
nf_seresnet101,1539.38,665.191,1024,224,8.02,16.27,49.33
mvitv2_tiny,1537.54,665.985,1024,224,4.7,21.16,24.17
nfnet_f0,1525.01,671.456,1024,256,12.62,18.05,71.49
vgg16_bn,1523.86,671.963,1024,224,15.5,13.56,138.37
cs3edgenet_x,1521.21,673.136,1024,288,14.59,16.36,47.82
gluon_resnet152_v1d,1520.11,673.621,1024,224,11.8,23.36,60.21
maxvit_nano_rw_256,1517.43,674.812,1024,256,4.46,30.28,15.45
tf_efficientnet_el,1506.16,679.862,1024,300,8.0,30.7,10.59
convnextv2_nano,1500.71,511.746,768,288,4.06,13.84,15.62
resnest50d,1492.63,686.022,1024,224,5.4,14.36,27.48
ese_vovnet99b,1489.17,687.617,1024,224,16.51,11.27,63.2
dla169,1471.11,696.059,1024,224,11.6,20.2,53.39
regnety_032,1467.85,697.604,1024,288,5.29,18.61,19.44
skresnext50_32x4d,1463.28,699.785,1024,224,4.5,17.18,27.48
xcit_tiny_12_p8_224_dist,1458.7,701.981,1024,224,4.81,23.6,6.71
xcit_tiny_12_p8_224,1458.23,702.211,1024,224,4.81,23.6,6.71
convit_small,1457.54,702.541,1024,224,5.76,17.87,27.78
mobilevitv2_200_in22ft1k,1456.59,527.247,768,256,7.22,32.15,18.45
mobilevitv2_200,1456.02,527.451,768,256,7.22,32.15,18.45
ecaresnet50t,1438.32,711.929,1024,320,8.82,24.13,25.57
gluon_resnet152_v1s,1432.22,714.961,1024,224,12.92,24.96,60.32
nest_tiny,1415.33,542.618,768,224,5.83,25.48,17.06
regnety_040s_gn,1412.65,724.867,1024,224,4.03,12.29,20.65
vgg19,1393.71,183.67,256,224,19.63,14.86,143.67
jx_nest_tiny,1389.62,552.657,768,224,5.83,25.48,17.06
legacy_seresnet152,1383.83,739.96,1024,224,11.33,22.08,66.82
densenet161,1376.52,743.891,1024,224,7.79,11.06,28.68
poolformer_s36,1370.67,747.069,1024,224,5.0,15.82,30.86
vit_small_resnet50d_s16_224,1367.59,748.748,1024,224,13.48,24.82,57.53
twins_svt_base,1362.65,751.463,1024,224,8.59,26.33,56.07
seresnet152,1361.7,751.99,1024,224,11.57,22.61,66.82
xception41,1356.44,566.173,768,299,9.28,39.86,26.97
maxvit_tiny_rw_224,1350.45,758.254,1024,224,5.11,33.11,29.06
crossvit_18_240,1348.85,759.154,1024,240,9.05,26.26,43.27
maxxvit_rmlp_nano_rw_256,1347.73,759.767,1024,256,4.37,26.05,16.78
efficientnet_lite4,1343.74,571.528,768,380,4.04,45.66,13.01
gcvit_tiny,1339.65,764.364,1024,224,4.79,29.82,28.22
pvt_v2_b3,1325.92,772.282,1024,224,6.92,37.7,45.24
crossvit_18_dagger_240,1313.78,779.419,1024,240,9.5,27.03,44.27
volo_d1_224,1312.37,780.255,1024,224,6.94,24.43,26.63
xcit_small_24_p16_224_dist,1307.3,783.278,1024,224,9.1,23.64,47.67
tresnet_m,1305.71,784.234,1024,224,5.74,7.31,31.39
inception_v4,1305.41,784.412,1024,299,12.28,15.09,42.68
repvgg_b2,1305.22,784.529,1024,224,20.45,12.9,89.02
xcit_small_24_p16_224,1303.71,785.433,1024,224,9.1,23.64,47.67
sequencer2d_m,1295.72,790.281,1024,224,6.55,14.26,38.31
edgenext_base,1283.77,797.633,1024,320,6.01,24.32,18.51
hrnet_w30,1280.53,799.653,1024,224,8.15,21.21,37.71
dm_nfnet_f0,1275.46,802.834,1024,256,12.62,18.05,71.49
coatnet_rmlp_1_rw_224,1268.37,807.322,1024,224,7.85,35.47,41.69
maxxvitv2_nano_rw_256,1259.7,812.877,1024,256,6.26,23.05,23.7
efficientnetv2_s,1254.49,816.255,1024,384,8.44,35.77,21.46
vgg19_bn,1246.52,205.36,256,224,19.66,14.86,143.68
nf_regnet_b4,1235.79,828.604,1024,384,4.7,28.61,30.21
swin_small_patch4_window7_224,1235.74,828.641,1024,224,8.77,27.47,49.61
tf_efficientnet_lite4,1232.22,623.25,768,380,4.04,45.66,13.01
regnetz_d32,1223.51,836.919,1024,320,9.33,37.08,27.58
mixnet_xxl,1219.27,629.871,768,224,2.04,23.43,23.96
tf_efficientnetv2_s,1219.16,839.906,1024,384,8.44,35.77,21.46
deit_base_patch16_224,1213.08,844.121,1024,224,17.58,23.9,86.57
deit_base_distilled_patch16_224,1212.98,844.19,1024,224,17.68,24.05,87.34
vit_base_patch16_clip_224,1211.82,844.996,1024,224,17.58,23.9,86.57
vit_base_patch16_224_miil,1211.26,845.389,1024,224,17.59,23.91,94.4
dpn92,1210.45,845.948,1024,224,6.54,18.21,37.67
vit_base_patch16_224,1210.28,846.074,1024,224,17.58,23.9,86.57
coatnet_rmlp_1_rw2_224,1208.65,847.215,1024,224,8.11,40.13,41.72
cait_xxs36_224,1205.51,849.419,1024,224,3.77,30.34,17.3
maxvit_tiny_tf_224,1200.3,639.828,768,224,5.6,35.78,30.92
swinv2_tiny_window8_256,1200.06,853.274,1024,256,5.96,24.57,28.35
efficientnetv2_rw_s,1199.87,853.413,1024,384,8.72,38.03,23.94
dla102x2,1198.52,854.374,1024,224,9.34,29.91,41.28
regnetx_160,1195.08,856.833,1024,224,15.99,25.52,54.28
dpn98,1183.92,864.908,1024,224,11.73,25.2,61.57
vit_base_patch16_rpn_224,1180.39,867.498,1024,224,17.49,23.75,86.54
twins_pcpvt_large,1168.64,876.22,1024,224,9.84,35.82,60.99
deit3_base_patch16_224,1164.77,879.134,1024,224,17.58,23.9,86.59
deit3_base_patch16_224_in21ft1k,1164.5,879.334,1024,224,17.58,23.9,86.59
regnetz_d8,1163.64,879.982,1024,320,6.19,37.08,23.37
swsl_resnext101_32x8d,1158.15,884.156,1024,224,16.48,31.21,88.79
resnext101_32x8d,1158.05,884.232,1024,224,16.48,31.21,88.79
ssl_resnext101_32x8d,1158.02,884.255,1024,224,16.48,31.21,88.79
wide_resnet101_2,1157.66,884.531,1024,224,22.8,21.23,126.89
ig_resnext101_32x8d,1157.3,884.8,1024,224,16.48,31.21,88.79
coatnet_1_rw_224,1155.72,886.014,1024,224,8.04,34.6,41.72
vit_base_patch16_gap_224,1154.73,886.777,1024,224,17.49,25.59,86.57
vit_base_patch32_clip_448,1154.21,887.173,1024,448,17.93,23.9,88.34
resnet200,1149.71,890.646,1024,224,15.07,32.19,64.67
mvitv2_small,1146.92,892.812,1024,224,7.0,28.08,34.87
xception65p,1145.07,670.686,768,299,13.91,52.48,39.82
cs3se_edgenet_x,1143.17,895.738,1024,320,18.01,20.21,50.72
vit_relpos_base_patch16_rpn_224,1143.15,895.76,1024,224,17.51,24.97,86.41
vit_relpos_base_patch16_224,1141.31,897.204,1024,224,17.51,24.97,86.43
tnt_s_patch16_224,1135.32,901.935,1024,224,5.24,24.37,23.76
resnetrs101,1134.67,902.454,1024,288,13.56,28.53,63.62
vit_relpos_base_patch16_clsgap_224,1128.94,907.03,1024,224,17.6,25.12,86.43
vit_relpos_base_patch16_cls_224,1126.78,908.771,1024,224,17.6,25.12,86.43
inception_resnet_v2,1126.73,908.809,1024,299,13.18,25.06,55.84
ens_adv_inception_resnet_v2,1125.41,909.877,1024,299,13.18,25.06,55.84
beit_base_patch16_224,1112.26,920.631,1024,224,17.58,23.9,86.53
coat_tiny,1108.72,923.572,1024,224,4.35,27.2,5.5
beitv2_base_patch16_224,1108.55,923.711,1024,224,17.58,23.9,86.53
mvitv2_small_cls,1101.66,929.491,1024,224,7.04,28.17,34.87
resnetv2_50d_gn,1092.35,937.413,1024,288,7.24,19.7,25.57
pit_b_distilled_224,1078.48,474.731,512,224,12.5,33.07,74.79
pit_b_224,1075.34,476.117,512,224,12.42,32.94,73.76
hrnet_w40,1059.78,966.217,1024,224,12.75,25.29,57.56
coatnet_1_224,1045.17,489.859,512,224,8.7,39.0,42.23
resnet101d,1039.88,984.712,1024,320,16.48,34.77,44.57
flexivit_base,1037.21,987.248,1024,240,20.29,28.36,86.59
gluon_resnext101_64x4d,1034.86,989.491,1024,224,15.52,31.21,83.46
vit_small_patch16_36x1_224,1033.13,991.146,1024,224,13.71,35.69,64.67
vit_large_r50_s32_224,1030.67,993.517,1024,224,19.58,24.41,328.99
maxvit_rmlp_tiny_rw_256,1029.25,746.162,768,256,6.77,46.92,29.15
xcit_tiny_24_p16_384_dist,1027.64,996.444,1024,384,6.87,34.29,12.12
efficientnet_b4,1014.08,504.879,512,384,4.51,50.04,19.34
maxvit_tiny_rw_256,1008.0,1015.861,1024,256,6.74,44.35,29.07
vit_small_patch16_18x2_224,1006.7,1017.169,1024,224,13.71,35.69,64.67
swinv2_cr_small_224,1005.28,1018.603,1024,224,9.07,50.27,49.7
regnetx_080,1004.51,1019.384,1024,224,8.02,14.06,39.57
repvgg_b3,994.23,1029.925,1024,224,29.16,15.1,123.09
swinv2_cr_small_ns_224,993.75,1030.424,1024,224,9.08,50.27,49.7
repvgg_b2g4,988.97,1035.405,1024,224,12.63,12.9,61.76
convnext_small,988.3,1036.113,1024,288,14.39,35.65,50.22
gluon_xception65,987.82,777.458,768,299,13.96,52.48,39.92
vit_small_r26_s32_384,982.68,1042.031,1024,384,10.43,29.85,36.47
xception65,978.83,784.597,768,299,13.96,52.48,39.92
regnetz_040,975.77,787.056,768,320,6.35,37.78,27.12
regnetz_040h,971.51,790.512,768,320,6.43,37.94,28.94
gluon_seresnext101_64x4d,965.3,1060.794,1024,224,15.53,31.25,88.23
maxvit_tiny_pm_256,964.03,1062.189,1024,256,6.61,47.9,30.09
efficientformer_l7,962.55,1063.825,1024,224,10.17,24.45,82.23
twins_svt_large,962.19,1064.229,1024,224,15.15,35.1,99.27
tf_efficientnet_b4,957.62,534.646,512,380,4.49,49.49,19.34
pvt_v2_b4,957.38,1069.569,1024,224,10.14,53.74,62.56
poolformer_m36,954.91,1072.334,1024,224,8.8,22.02,56.17
cait_s24_224,954.44,1072.866,1024,224,9.35,40.58,46.92
regnetz_b16_evos,950.47,808.013,768,288,2.36,16.43,9.74
resnest50d_4s2x40d,938.07,1091.586,1024,224,4.4,17.94,30.42
hrnet_w48,936.07,1093.917,1024,224,17.34,28.56,77.47
gmlp_b16_224,930.95,1099.935,1024,224,15.78,30.21,73.08
convnextv2_tiny,930.82,550.041,512,288,7.39,22.21,28.64
convnextv2_small,928.68,1102.629,1024,224,8.71,21.56,50.32
maxxvit_rmlp_tiny_rw_256,918.72,1114.583,1024,256,6.66,39.76,29.64
mobilevitv2_150_384_in22ft1k,915.49,419.435,384,384,9.2,54.25,10.59
pvt_v2_b5,909.79,1125.516,1024,224,11.76,50.92,81.96
nest_small,903.21,850.284,768,224,10.35,40.04,38.35
swin_s3_small_224,899.98,853.339,768,224,9.43,37.84,49.74
xcit_medium_24_p16_224_dist,898.61,1139.525,1024,224,16.13,31.71,84.4
xcit_medium_24_p16_224,898.6,1139.542,1024,224,16.13,31.71,84.4
jx_nest_small,892.03,860.939,768,224,10.35,40.04,38.35
coat_mini,880.8,1162.569,1024,224,6.82,33.68,10.34
swin_base_patch4_window7_224,875.38,1169.764,1024,224,15.47,36.63,87.77
dpn131,865.2,1183.527,1024,224,16.09,32.97,79.25
resnetv2_50d_evos,854.82,1197.895,1024,288,7.15,19.7,25.59
xcit_small_12_p16_384_dist,853.54,1199.694,1024,384,14.14,36.51,26.25
sequencer2d_l,839.78,1219.347,1024,224,9.74,22.12,54.3
crossvit_base_240,839.43,914.892,768,240,21.22,36.33,105.03
hrnet_w44,821.37,1246.671,1024,224,14.94,26.92,67.06
eca_nfnet_l1,818.87,1250.489,1024,320,14.92,34.42,41.41
vit_base_r50_s16_224,817.55,1252.502,1024,224,21.67,35.31,114.69
maxvit_rmlp_small_rw_224,816.34,1254.368,1024,224,10.75,49.3,64.9
gcvit_small,815.24,1256.055,1024,224,8.57,41.61,51.09
regnety_080,811.28,1262.191,1024,288,13.22,29.69,39.18
densenet264,804.85,1272.268,1024,224,12.95,12.8,72.69
mvitv2_base,804.14,1273.395,1024,224,10.16,40.5,51.47
repvgg_b3g4,802.85,1275.443,1024,224,17.89,15.1,83.83
vit_base_patch16_plus_240,782.25,1309.022,1024,240,27.41,33.08,117.56
swinv2_tiny_window16_256,781.61,655.045,512,256,6.68,39.02,28.35
maxvit_small_tf_224,777.04,658.899,512,224,11.66,53.17,68.93
xcit_tiny_24_p8_224,771.1,1327.958,1024,224,9.21,45.39,12.11
xcit_tiny_24_p8_224_dist,770.21,1329.496,1024,224,9.21,45.39,12.11
coatnet_2_rw_224,763.52,670.562,512,224,15.09,49.22,73.87
vit_relpos_base_patch16_plus_240,763.4,1341.361,1024,240,27.3,34.33,117.38
efficientnet_b3_gn,763.0,671.023,512,320,2.14,28.83,11.73
coatnet_rmlp_2_rw_224,759.73,673.906,512,224,15.18,54.78,73.88
vit_small_patch16_384,753.82,1018.79,768,384,15.52,50.78,22.2
hrnet_w64,750.36,1364.663,1024,224,28.97,35.09,128.06
xception71,749.7,1024.396,768,299,18.09,69.92,42.34
resnet152d,742.37,1379.356,1024,320,24.08,47.67,60.21
swinv2_small_window8_256,741.95,1380.134,1024,256,11.58,40.14,49.73
mobilevitv2_175_384_in22ft1k,739.09,519.544,384,384,12.47,63.29,14.25
ecaresnet200d,736.17,1390.959,1024,256,20.0,43.15,64.69
seresnet200d,733.28,1396.444,1024,256,20.01,43.15,71.86
swin_s3_base_224,733.27,1396.459,1024,224,13.69,48.26,71.13
convit_base,731.09,1400.636,1024,224,17.52,31.77,86.54
resnest101e,726.65,1409.184,1024,256,13.38,28.66,48.28
deit3_small_patch16_384,726.49,1057.125,768,384,15.52,50.78,22.21
deit3_small_patch16_384_in21ft1k,726.32,1057.368,768,384,15.52,50.78,22.21
volo_d2_224,722.61,1417.079,1024,224,14.34,41.34,58.68
tnt_b_patch16_224,721.24,1419.762,1024,224,14.09,39.01,65.41
xcit_nano_12_p8_384_dist,720.41,1421.4,1024,384,6.34,46.08,3.05
swinv2_cr_base_224,719.23,1423.721,1024,224,15.86,59.66,87.88
poolformer_m48,719.07,1424.046,1024,224,11.59,29.17,73.47
coatnet_2_224,715.36,715.711,512,224,16.5,52.67,74.68
swinv2_cr_base_ns_224,712.96,1436.239,1024,224,15.86,59.66,87.88
dpn107,691.0,1481.897,1024,224,18.38,33.46,86.92
convnext_base,687.14,1490.219,1024,288,25.43,47.53,88.59
resnetv2_50x1_bitm,684.31,374.087,256,448,16.62,44.46,25.55
efficientnet_b3_g8_gn,664.63,770.341,512,320,3.2,28.83,14.25
regnety_064,657.71,1556.911,1024,288,10.56,27.11,30.58
regnetv_064,652.6,1569.096,1024,288,10.55,27.11,30.58
xcit_small_12_p8_224,651.3,1572.214,1024,224,18.69,47.21,26.21
xcit_small_12_p8_224_dist,651.08,1572.755,1024,224,18.69,47.21,26.21
resnetrs152,649.95,1575.501,1024,320,24.34,48.14,86.62
mobilevitv2_200_384_in22ft1k,647.42,395.4,256,384,16.24,72.34,18.45
seresnet152d,645.69,1585.88,1024,320,24.09,47.72,66.84
tresnet_l,644.38,1589.105,1024,224,10.88,11.9,55.99
tresnet_v2_l,642.3,1594.246,1024,224,8.81,16.34,46.17
nest_base,640.98,798.76,512,224,17.96,53.39,67.72
regnetx_120,640.37,1599.07,1024,224,12.13,21.37,46.11
seresnext101_32x8d,639.53,1601.159,1024,288,27.24,51.63,93.57
regnetz_e8,639.43,1601.423,1024,320,15.46,63.94,57.7
ese_vovnet99b_iabn,636.1,1609.798,1024,224,16.49,11.27,63.2
jx_nest_base,634.61,806.787,512,224,17.96,53.39,67.72
regnety_120,625.75,1636.422,1024,224,12.14,21.38,51.82
efficientnetv2_m,624.53,1639.618,1024,416,18.6,67.5,54.14
seresnext101d_32x8d,621.55,1647.466,1024,288,27.64,52.95,93.59
resnext101_64x4d,619.77,1652.21,1024,288,25.66,51.59,83.46
swsl_resnext101_32x16d,612.21,1672.624,1024,224,36.27,51.18,194.03
ig_resnext101_32x16d,611.98,1673.243,1024,224,36.27,51.18,194.03
maxvit_rmlp_small_rw_256,611.67,1255.571,768,256,14.15,66.09,64.9
ssl_resnext101_32x16d,611.31,1675.063,1024,224,36.27,51.18,194.03
regnety_320,605.31,1691.684,1024,224,32.34,30.26,145.05
gcvit_base,602.42,1699.782,1024,224,14.87,55.48,90.32
regnetz_c16_evos,596.93,857.706,512,320,3.86,25.88,13.49
maxxvit_rmlp_small_rw_256,590.18,1735.046,1024,256,14.67,58.38,66.01
legacy_senet154,585.86,1747.854,1024,224,20.77,38.69,115.09
senet154,585.53,1748.836,1024,224,20.77,38.69,115.09
seresnextaa101d_32x8d,585.08,1750.175,1024,288,28.51,56.44,93.59
gluon_senet154,584.86,1750.843,1024,224,20.77,38.69,115.09
convmixer_768_32,581.95,1759.577,1024,224,19.55,25.95,21.11
seresnet269d,574.5,1782.4,1024,256,26.59,53.6,113.67
nf_regnet_b5,565.36,905.602,512,456,11.7,61.95,49.74
mixer_l16_224,553.66,1849.49,1024,224,44.6,41.69,208.2
resnet200d,545.14,1878.401,1024,320,31.25,67.33,64.69
nfnet_f1,544.28,1881.353,1024,320,35.97,46.77,132.63
vit_large_patch32_384,543.45,1884.237,1024,384,45.31,43.86,306.63
efficientnetv2_rw_m,543.37,1884.512,1024,416,21.49,79.62,53.24
vit_medium_patch16_gap_384,539.24,949.475,512,384,26.08,67.54,39.03
efficientnet_b5,533.21,960.212,512,448,9.59,93.56,30.39
swinv2_base_window8_256,531.81,1925.495,1024,256,20.37,52.59,87.92
maxxvitv2_rmlp_base_rw_224,525.72,1947.791,1024,224,24.2,62.77,116.09
xcit_large_24_p16_224_dist,509.19,2011.039,1024,224,35.86,47.27,189.1
xcit_large_24_p16_224,509.15,2011.169,1024,224,35.86,47.27,189.1
swin_large_patch4_window7_224,504.4,1522.593,768,224,34.53,54.94,196.53
halonet_h1,503.39,508.543,256,256,3.0,51.17,8.1
volo_d3_224,502.58,2037.467,1024,224,20.78,60.09,86.33
swinv2_small_window16_256,488.97,1047.084,512,256,12.82,66.29,49.73
tresnet_xl,481.58,2126.301,1024,224,15.17,15.34,78.44
vit_small_patch8_224,479.11,1068.641,512,224,22.44,80.84,21.67
tf_efficientnet_b5,476.47,805.919,384,456,10.46,98.86,30.39
maxvit_rmlp_base_rw_224,472.06,2169.196,1024,224,23.15,92.64,116.14
resnetrs200,471.68,2170.964,1024,320,31.51,67.81,93.21
xcit_tiny_12_p8_384_dist,471.45,2172.002,1024,384,14.13,69.14,6.71
dm_nfnet_f1,461.24,2220.087,1024,320,35.97,46.77,132.63
tf_efficientnetv2_m,458.93,1673.426,768,480,24.76,89.84,54.14
xcit_small_24_p16_384_dist,457.16,2239.891,1024,384,26.72,68.58,47.67
coatnet_rmlp_3_rw_224,439.5,582.463,256,224,33.56,79.47,165.15
maxvit_base_tf_224,430.05,1190.542,512,224,24.04,95.01,119.47
swinv2_cr_large_224,423.86,1811.887,768,224,35.1,78.42,196.68
resnetv2_152x2_bit_teacher,423.36,2418.743,1024,224,46.95,45.11,236.34
swinv2_cr_tiny_384,423.1,907.565,384,384,15.34,161.01,28.33
coatnet_3_rw_224,421.95,606.701,256,224,33.44,73.83,181.81
resnetv2_101x1_bitm,419.35,610.453,256,448,31.65,64.93,44.54
coatnet_3_224,405.07,631.982,256,224,36.56,79.01,166.97
convnextv2_base,403.59,1268.593,512,288,25.43,47.53,88.72
eca_nfnet_l2,401.73,2548.946,1024,384,30.05,68.28,56.72
regnetz_d8_evos,394.39,1947.294,768,320,7.03,38.92,23.46
convmixer_1024_20_ks9_p14,393.5,2602.254,1024,224,5.55,5.51,24.38
eva_large_patch14_196,392.3,2610.234,1024,196,61.57,63.52,304.14
crossvit_15_dagger_408,390.72,655.182,256,408,21.45,95.05,28.5
vit_large_patch16_224,390.66,2621.182,1024,224,61.6,63.52,304.33
vit_base_patch16_18x2_224,384.38,2663.987,1024,224,52.51,71.38,256.73
deit3_large_patch16_224_in21ft1k,377.58,2711.976,1024,224,61.6,63.52,304.37
deit3_large_patch16_224,377.53,2712.348,1024,224,61.6,63.52,304.37
convnext_large,373.02,2058.836,768,288,56.87,71.29,197.77
beit_large_patch16_224,360.62,2839.572,1024,224,61.6,63.52,304.43
beitv2_large_patch16_224,360.58,2839.86,1024,224,61.6,63.52,304.43
swinv2_base_window12to16_192to256_22kft1k,360.56,1065.006,384,256,22.02,84.71,87.92
swinv2_base_window16_256,360.23,1065.959,384,256,22.02,84.71,87.92
regnety_160,353.5,2172.566,768,288,26.37,38.07,83.59
nasnetalarge,345.63,1111.004,384,331,23.89,90.56,88.75
maxvit_tiny_tf_384,344.01,744.157,256,384,17.53,123.42,30.98
xcit_small_24_p8_224,342.37,2990.915,1024,224,35.81,90.78,47.63
xcit_small_24_p8_224_dist,342.26,2991.817,1024,224,35.81,90.78,47.63
flexivit_large,335.35,3053.52,1024,240,70.99,75.39,304.36
maxxvitv2_rmlp_large_rw_224,332.33,3081.271,1024,224,44.14,87.15,215.42
vit_large_r50_s32_384,329.8,3104.921,1024,384,57.43,76.52,329.09
pnasnet5large,328.89,1167.534,384,331,25.04,92.89,86.06
tresnet_m_448,325.8,3143.01,1024,448,22.94,29.21,31.39
volo_d1_384,323.04,1584.906,512,384,22.75,108.55,26.78
volo_d4_224,318.96,3210.439,1024,224,44.34,80.22,192.96
xcit_medium_24_p16_384_dist,312.74,3274.268,1024,384,47.39,91.64,84.4
nfnet_f2,310.6,3296.869,1024,352,63.22,79.06,193.78
vit_base_patch16_384,307.09,1250.42,384,384,55.54,101.56,86.86
deit_base_patch16_384,306.8,1251.599,384,384,55.54,101.56,86.86
vit_base_patch16_clip_384,306.29,1253.685,384,384,55.54,101.56,86.86
deit_base_distilled_patch16_384,305.48,1257.017,384,384,55.65,101.82,87.63
ecaresnet269d,305.06,3356.684,1024,352,50.25,101.25,102.09
maxvit_large_tf_224,301.43,1273.908,384,224,43.68,127.35,211.79
deit3_base_patch16_384_in21ft1k,298.01,1288.526,384,384,55.54,101.56,86.88
deit3_base_patch16_384,297.88,1289.093,384,384,55.54,101.56,86.88
resnetrs270,296.97,3448.186,1024,352,51.13,105.48,129.86
regnetx_320,289.44,2653.413,768,224,31.81,36.3,107.81
efficientnet_b6,287.31,890.997,256,528,19.4,167.39,43.04
vit_large_patch14_224,286.23,3577.501,1024,224,81.08,88.79,304.2
vit_large_patch14_clip_224,285.99,3580.5,1024,224,81.08,88.79,304.2
crossvit_18_dagger_408,285.18,673.248,192,408,32.47,124.87,44.61
cait_xxs24_384,281.48,3637.936,1024,384,9.63,122.66,12.03
ig_resnext101_32x32d,275.12,1860.956,512,224,87.29,91.12,468.53
tf_efficientnet_b6,274.07,700.545,192,528,19.4,167.39,43.04
dm_nfnet_f2,264.79,2900.408,768,352,63.22,79.06,193.78
beit_base_patch16_384,261.27,1469.733,384,384,55.54,101.56,86.74
efficientnetv2_l,260.33,1966.694,512,480,56.4,157.99,118.52
swinv2_cr_small_384,259.75,985.56,256,384,29.7,298.03,49.7
tf_efficientnetv2_l,257.29,1989.923,512,480,56.4,157.99,118.52
resnest200e,254.36,1006.453,256,320,35.69,82.78,70.2
mvitv2_large,249.99,2048.061,512,224,43.87,112.02,217.99
xcit_tiny_24_p8_384_dist,248.25,4124.916,1024,384,27.05,132.95,12.11
convnext_xlarge,242.63,2110.182,512,288,100.8,95.05,350.2
resmlp_big_24_224_in22ft1k,241.9,4233.056,1024,224,100.23,87.31,129.14
resmlp_big_24_224,241.74,4235.988,1024,224,100.23,87.31,129.14
resmlp_big_24_distilled_224,241.44,4241.249,1024,224,100.23,87.31,129.14
convnextv2_large,239.52,1068.782,256,288,56.87,71.29,197.96
coatnet_4_224,238.62,1072.827,256,224,62.48,129.26,275.43
swin_base_patch4_window12_384,236.12,813.144,192,384,47.19,134.78,87.9
xcit_medium_24_p8_224_dist,233.5,3289.007,768,224,63.53,121.23,84.32
xcit_medium_24_p8_224,233.5,3289.104,768,224,63.53,121.23,84.32
eca_nfnet_l3,229.87,2227.284,512,448,52.55,118.4,72.04
vit_base_r50_s16_384,226.32,1696.687,384,384,67.43,135.03,98.95
maxvit_small_tf_384,224.01,857.105,192,384,35.87,183.65,69.02
xcit_small_12_p8_384_dist,221.54,1733.28,384,384,54.92,138.29,26.21
swinv2_large_window12to16_192to256_22kft1k,220.1,1163.101,256,256,47.81,121.53,196.74
volo_d5_224,210.88,4855.76,1024,224,72.4,118.11,295.46
vit_base_patch8_224,199.67,1282.079,256,224,78.22,161.69,86.58
cait_xs24_384,197.64,3885.811,768,384,19.28,183.98,26.67
resnetrs350,196.19,5219.377,1024,384,77.59,154.74,163.96
cait_xxs36_384,188.27,5439.03,1024,384,14.35,183.7,17.37
swinv2_cr_base_384,185.68,1378.725,256,384,50.57,333.68,87.88
coatnet_rmlp_2_rw_384,184.84,1038.746,192,384,47.69,209.43,73.88
swinv2_cr_huge_224,184.09,2085.934,384,224,115.97,121.08,657.83
convnext_xxlarge,183.68,2787.486,512,224,151.66,95.29,846.47
volo_d2_384,180.56,2126.753,384,384,46.17,184.51,58.87
xcit_large_24_p16_384_dist,176.39,5805.281,1024,384,105.35,137.17,189.1
regnety_640,174.81,4393.396,768,224,64.16,42.5,281.38
maxvit_xlarge_tf_224,171.63,1491.6,256,224,97.49,191.02,474.95
nfnet_f3,170.11,4514.791,768,416,115.58,141.78,254.92
densenet264d_iabn,167.13,6126.84,1024,224,13.47,14.0,72.74
efficientnet_b7,166.38,1153.975,192,600,38.33,289.94,66.35
maxvit_tiny_tf_512,163.72,781.809,128,512,33.49,257.59,31.05
efficientnetv2_xl,162.7,3146.865,512,512,93.85,247.32,208.12
tf_efficientnetv2_xl,161.32,3173.821,512,512,93.85,247.32,208.12
tf_efficientnet_b7,160.43,1196.798,192,600,38.33,289.94,66.35
resnetv2_152x2_bit_teacher_384,159.54,1604.579,256,384,136.16,132.56,236.34
tresnet_l_448,154.66,6620.743,1024,448,43.5,47.56,55.99
vit_huge_patch14_224,154.27,6637.58,1024,224,167.43,139.43,658.75
vit_huge_patch14_clip_224,154.17,6642.017,1024,224,167.4,139.41,632.05
maxxvitv2_rmlp_base_rw_384,153.9,1663.429,256,384,72.98,213.74,116.09
cait_s24_384,152.41,3359.254,512,384,32.17,245.31,47.06
deit3_huge_patch14_224_in21ft1k,150.05,6824.53,1024,224,167.4,139.41,632.13
deit3_huge_patch14_224,149.59,6845.356,1024,224,167.4,139.41,632.13
dm_nfnet_f3,145.48,3519.403,512,416,115.58,141.78,254.92
resnetrs420,142.37,5394.528,768,416,108.45,213.79,191.89
swin_large_patch4_window12_384,138.37,925.016,128,384,104.08,202.16,196.74
resnetv2_50x3_bitm,133.5,1438.189,192,448,145.7,133.37,217.32
maxvit_rmlp_base_rw_384,131.6,1945.285,256,384,70.97,318.95,116.14
xcit_large_24_p8_224_dist,131.32,3898.808,512,224,141.23,181.56,188.93
xcit_large_24_p8_224,131.27,3900.391,512,224,141.23,181.56,188.93
coatnet_5_224,130.48,1471.508,192,224,145.49,194.24,687.47
maxvit_base_tf_384,122.48,1567.652,192,384,73.8,332.9,119.65
resnest269e,119.17,2148.198,256,416,77.69,171.98,110.93
resnetv2_152x2_bitm,117.29,2182.534,256,448,184.99,180.43,236.34
xcit_small_24_p8_384_dist,116.59,3293.649,384,384,105.24,265.91,47.63
tresnet_xl_448,115.63,8855.938,1024,448,60.65,61.31,78.44
swinv2_cr_large_384,113.43,1128.479,128,384,108.95,404.96,196.68
maxvit_small_tf_512,106.82,1198.298,128,512,67.26,383.77,69.13
efficientnet_b8,106.21,1205.18,128,672,63.48,442.89,87.41
tf_efficientnet_b8,102.86,1244.358,128,672,63.48,442.89,87.41
eva_large_patch14_336,102.71,2492.371,256,336,191.1,270.24,304.53
vit_large_patch14_clip_336,102.52,2496.99,256,336,191.11,270.24,304.53
vit_large_patch16_384,102.5,2497.593,256,384,191.21,270.24,304.72
cait_s36_384,101.88,5025.316,512,384,47.99,367.4,68.37
eva_giant_patch14_224,101.84,10055.112,1024,224,267.18,192.64,1012.56
vit_giant_patch14_224,100.71,7625.752,768,224,267.18,192.64,1012.61
vit_giant_patch14_clip_224,100.43,7646.856,768,224,267.18,192.64,1012.65
deit3_large_patch16_384_in21ft1k,99.81,2564.809,256,384,191.21,270.24,304.76
deit3_large_patch16_384,99.8,2564.994,256,384,191.21,270.24,304.76
swinv2_base_window12to24_192to384_22kft1k,96.12,665.832,64,384,55.25,280.36,87.92
nfnet_f4,89.33,5731.574,512,512,216.26,262.26,316.07
beit_large_patch16_384,88.56,2890.58,256,384,191.21,270.24,305.0
maxvit_large_tf_384,86.44,1480.84,128,384,132.55,445.84,212.03
regnety_1280,82.49,4654.845,384,224,127.66,71.58,644.81
xcit_medium_24_p8_384_dist,79.96,3201.705,256,384,186.67,354.73,84.32
resnetv2_101x3_bitm,79.41,2417.67,192,448,280.33,194.78,387.93
volo_d3_448,77.64,2473.021,192,448,96.33,446.83,86.63
dm_nfnet_f4,77.54,4952.036,384,512,216.26,262.26,316.07
nfnet_f5,67.46,5691.915,384,544,290.97,349.71,377.21
tf_efficientnet_l2,63.66,1507.989,96,475,172.11,609.89,480.31
swinv2_large_window12to24_192to384_22kft1k,60.94,787.651,48,384,116.15,407.83,196.74
vit_gigantic_patch14_224,60.18,8507.121,512,224,483.95,275.37,1844.44
vit_gigantic_patch14_clip_224,60.11,8517.85,512,224,483.96,275.37,1844.91
volo_d4_448,57.87,3317.675,192,448,197.13,527.35,193.41
maxvit_base_tf_512,57.86,2212.256,128,512,138.02,703.99,119.88
dm_nfnet_f5,57.78,6645.368,384,544,290.97,349.71,377.21
vit_huge_patch14_clip_336,57.4,4460.085,256,336,390.97,407.54,632.46
ig_resnext101_32x48d,56.43,6804.709,384,224,153.57,131.06,828.41
convnextv2_huge,56.31,1704.92,96,384,337.96,232.35,660.29
convmixer_1536_20,55.47,18461.426,1024,224,48.68,33.03,51.63
swinv2_cr_giant_224,52.39,3665.046,192,224,483.85,309.15,2598.76
nfnet_f6,51.81,7411.574,384,576,378.69,452.2,438.36
maxvit_xlarge_tf_384,50.76,1891.335,96,384,292.78,668.76,475.32
swinv2_cr_huge_384,49.01,1305.73,64,384,352.04,583.18,657.94
regnety_2560,47.69,8051.463,384,224,257.07,87.48,826.14
xcit_large_24_p8_384_dist,44.91,4275.004,192,384,415.0,531.82,188.93
dm_nfnet_f6,44.62,5737.462,256,576,378.69,452.2,438.36
nfnet_f7,41.13,6224.782,256,608,480.39,570.85,499.5
maxvit_large_tf_512,41.04,1559.597,64,512,244.75,942.15,212.33
eva_giant_patch14_336,39.89,6418.269,256,336,620.64,550.67,1013.01
volo_d5_448,39.88,3209.812,128,448,315.06,737.92,295.91
beit_large_patch16_512,35.33,2716.953,96,512,362.24,656.39,305.67
cait_m36_384,32.89,7783.487,256,384,173.11,734.81,271.22
resnetv2_152x4_bitm,30.46,3151.929,96,480,844.84,414.26,936.53
volo_d5_512,27.89,4590.0,128,512,425.09,1105.37,296.09
maxvit_xlarge_tf_512,24.38,1968.424,48,512,534.14,1413.22,475.77
efficientnet_l2,23.13,1383.428,32,800,479.12,1707.39,480.31
swinv2_cr_giant_384,15.06,2124.735,32,384,1450.71,1394.86,2598.76
cait_m48_448,13.86,9235.876,128,448,329.41,1708.23,356.46
eva_giant_patch14_560,10.52,3043.009,32,560,1906.76,2577.17,1014.45
1 model infer_samples_per_sec infer_step_time infer_batch_size infer_img_size infer_gmacs infer_macts param_count
2 tinynet_e 72737.62 14.068 1024 106 0.03 0.69 2.04
3 mobilenetv3_small_050 54822.3 18.668 1024 224 0.03 0.92 1.59
4 lcnet_035 53629.35 19.084 1024 224 0.03 1.04 1.64
5 lcnet_050 45492.41 22.499 1024 224 0.05 1.26 1.88
6 mobilenetv3_small_075 39215.51 26.102 1024 224 0.05 1.3 2.04
7 tinynet_d 37346.61 27.409 1024 152 0.05 1.42 2.34
8 mobilenetv3_small_100 36280.34 28.214 1024 224 0.06 1.42 2.54
9 tf_mobilenetv3_small_minimal_100 31726.33 32.265 1024 224 0.06 1.41 2.04
10 tf_mobilenetv3_small_075 31503.43 32.494 1024 224 0.05 1.3 2.04
11 lcnet_075 29817.69 34.332 1024 224 0.1 1.99 2.36
12 tf_mobilenetv3_small_100 29444.91 34.767 1024 224 0.06 1.42 2.54
13 mnasnet_small 25354.86 40.376 1024 224 0.07 2.16 2.03
14 lcnet_100 24134.76 42.417 1024 224 0.16 2.52 2.95
15 regnetx_002 23983.4 42.686 1024 224 0.2 2.16 2.68
16 levit_128s 22675.73 45.148 1024 224 0.31 1.88 7.78
17 regnety_002 21709.37 47.158 1024 224 0.2 2.17 3.16
18 mobilenetv2_035 21673.44 47.236 1024 224 0.07 2.86 1.68
19 mnasnet_050 20010.27 51.163 1024 224 0.11 3.07 2.22
20 ghostnet_050 18932.82 54.075 1024 224 0.05 1.77 2.59
21 tinynet_c 18428.42 55.556 1024 184 0.11 2.87 2.46
22 semnasnet_050 17215.18 59.471 1024 224 0.11 3.44 2.08
23 mobilenetv2_050 17194.94 59.542 1024 224 0.1 3.64 1.97
24 cs3darknet_focus_s 16189.76 63.24 1024 256 0.69 2.7 3.27
25 lcnet_150 15557.15 65.811 1024 224 0.34 3.79 4.5
26 cs3darknet_s 15369.47 66.615 1024 256 0.72 2.97 3.28
27 levit_128 15337.67 66.754 1024 224 0.41 2.71 9.21
28 gernet_s 15288.68 66.966 1024 224 0.75 2.65 8.17
29 mobilenetv3_large_075 14216.3 72.019 1024 224 0.16 4.0 3.99
30 mixer_s32_224 14182.92 72.188 1024 224 1.0 2.28 19.1
31 vit_tiny_r_s16_p8_224 14125.39 72.482 1024 224 0.44 2.06 6.34
32 resnet10t 14112.07 72.551 1024 224 1.1 2.43 5.44
33 vit_small_patch32_224 13799.47 74.195 1024 224 1.15 2.5 22.88
34 regnetx_004 13610.2 75.225 1024 224 0.4 3.14 5.16
35 levit_192 13524.14 75.706 1024 224 0.66 3.2 10.95
36 mobilenetv3_rw 12956.58 79.021 1024 224 0.23 4.41 5.48
37 hardcorenas_a 12803.61 79.966 1024 224 0.23 4.38 5.26
38 mobilenetv3_large_100 12749.93 80.304 1024 224 0.23 4.41 5.48
39 mnasnet_075 12532.36 81.697 1024 224 0.23 4.77 3.17
40 tf_mobilenetv3_large_075 12186.51 84.017 1024 224 0.16 4.0 3.99
41 tinynet_b 12083.18 84.735 1024 188 0.21 4.44 3.73
42 regnety_004 11918.36 85.906 1024 224 0.41 3.89 4.34
43 tf_mobilenetv3_large_minimal_100 11715.94 87.392 1024 224 0.22 4.4 3.92
44 hardcorenas_c 11548.05 88.662 1024 224 0.28 5.01 5.52
45 hardcorenas_b 11510.71 88.949 1024 224 0.26 5.09 5.18
46 ese_vovnet19b_slim_dw 11501.95 89.018 1024 224 0.4 5.28 1.9
47 ghostnet_100 11332.61 90.348 1024 224 0.15 3.55 5.18
48 mnasnet_100 11138.43 91.923 1024 224 0.33 5.46 4.38
49 gluon_resnet18_v1b 11098.78 92.252 1024 224 1.82 2.48 11.69
50 resnet18 11083.1 92.383 1024 224 1.82 2.48 11.69
51 swsl_resnet18 11062.48 92.555 1024 224 1.82 2.48 11.69
52 ssl_resnet18 11061.11 92.565 1024 224 1.82 2.48 11.69
53 tf_mobilenetv3_large_100 11018.56 92.922 1024 224 0.23 4.41 5.48
54 mnasnet_b1 10993.58 93.135 1024 224 0.33 5.46 4.38
55 hardcorenas_d 10910.47 93.843 1024 224 0.3 4.93 7.5
56 semnasnet_075 10898.09 93.951 1024 224 0.23 5.54 2.91
57 mobilenetv2_075 10893.76 93.988 1024 224 0.22 5.86 2.64
58 seresnet18 10385.56 98.588 1024 224 1.82 2.49 11.78
59 legacy_seresnet18 10064.41 101.734 1024 224 1.82 2.49 11.78
60 spnasnet_100 10009.21 102.296 1024 224 0.35 6.03 4.42
61 tf_efficientnetv2_b0 9930.95 103.1 1024 224 0.73 4.77 7.14
62 levit_256 9858.1 103.863 1024 224 1.13 4.23 18.89
63 tinynet_a 9720.11 105.337 1024 192 0.35 5.41 6.19
64 hardcorenas_f 9714.91 105.393 1024 224 0.35 5.57 8.2
65 semnasnet_100 9623.78 106.393 1024 224 0.32 6.23 3.89
66 mnasnet_a1 9623.77 106.393 1024 224 0.32 6.23 3.89
67 mobilenetv2_100 9598.91 106.667 1024 224 0.31 6.68 3.5
68 hardcorenas_e 9571.87 106.966 1024 224 0.35 5.65 8.07
69 dla46_c 9568.4 107.007 1024 224 0.58 4.5 1.3
70 efficientnet_lite0 9361.14 109.377 1024 224 0.4 6.74 4.65
71 fbnetc_100 9352.03 109.484 1024 224 0.4 6.51 5.57
72 resnet18d 9334.83 109.687 1024 224 2.06 3.29 11.71
73 ese_vovnet19b_slim 9109.47 112.4 1024 224 1.69 3.52 3.17
74 regnety_006 9097.63 112.542 1024 224 0.61 4.33 6.06
75 regnetz_005 8607.49 118.955 1024 224 0.52 5.86 7.12
76 xcit_nano_12_p16_224_dist 8577.2 119.375 1024 224 0.56 4.17 3.05
77 xcit_nano_12_p16_224 8554.61 119.689 1024 224 0.56 4.17 3.05
78 levit_256d 8382.88 122.143 1024 224 1.4 4.93 26.21
79 regnetx_006 8379.52 122.192 1024 224 0.61 3.98 6.2
80 ghostnet_130 8278.59 123.681 1024 224 0.24 4.6 7.36
81 tf_efficientnet_lite0 8080.51 126.714 1024 224 0.4 6.74 4.65
82 efficientnet_b0 7965.17 128.548 1024 224 0.4 6.75 5.29
83 mnasnet_140 7779.42 131.618 1024 224 0.6 7.71 7.12
84 deit_tiny_distilled_patch16_224 7467.68 137.113 1024 224 1.27 6.01 5.91
85 rexnetr_100 7464.12 137.179 1024 224 0.43 7.72 4.88
86 deit_tiny_patch16_224 7430.15 137.806 1024 224 1.26 5.97 5.72
87 resnet14t 7429.68 137.815 1024 224 1.69 5.8 10.08
88 vit_tiny_patch16_224 7424.93 137.902 1024 224 1.26 5.97 5.72
89 regnetx_008 7394.88 138.463 1024 224 0.81 5.15 7.26
90 mobilenetv2_110d 7247.12 141.287 1024 224 0.45 8.71 4.52
91 hrnet_w18_small 7232.93 141.561 1024 224 1.61 5.72 13.19
92 tf_efficientnet_b0 7016.18 145.938 1024 224 0.4 6.75 5.29
93 regnety_008 6938.46 147.571 1024 224 0.81 5.25 6.26
94 mobilevitv2_050 6848.87 149.503 1024 256 0.48 8.04 1.37
95 pit_ti_distilled_224 6811.68 150.317 1024 224 0.71 6.23 5.1
96 pit_ti_224 6784.24 150.927 1024 224 0.7 6.19 4.85
97 gernet_m 6679.85 153.286 1024 224 3.02 5.24 21.14
98 efficientnet_b1_pruned 6642.37 154.15 1024 240 0.4 6.21 6.33
99 resnet34 6496.42 157.614 1024 224 3.67 3.74 21.8
100 gluon_resnet34_v1b 6494.61 157.658 1024 224 3.67 3.74 21.8
101 tv_resnet34 6481.01 157.989 1024 224 3.67 3.74 21.8
102 tf_efficientnetv2_b1 6476.52 158.098 1024 240 1.21 7.34 8.14
103 semnasnet_140 6454.5 158.637 1024 224 0.6 8.87 6.11
104 nf_regnet_b0 6452.24 158.693 1024 256 0.64 5.58 8.76
105 ese_vovnet19b_dw 6335.13 161.627 1024 224 1.34 8.25 6.54
106 mobilenetv2_140 6271.56 163.266 1024 224 0.6 9.57 6.11
107 rexnet_100 6226.48 164.447 1024 224 0.41 7.44 4.8
108 efficientnet_lite1 6187.91 165.472 1024 240 0.62 10.14 5.42
109 efficientnet_es_pruned 6115.4 167.434 1024 224 1.81 8.73 5.44
110 efficientnet_es 6115.12 167.443 1024 224 1.81 8.73 5.44
111 visformer_tiny 6103.09 167.772 1024 224 1.27 5.72 10.32
112 seresnet34 6058.13 169.019 1024 224 3.67 3.74 21.96
113 fbnetv3_b 6018.76 170.124 1024 256 0.55 9.1 8.6
114 selecsls42 5953.76 171.98 1024 224 2.94 4.62 30.35
115 selecsls42b 5921.2 172.924 1024 224 2.98 4.62 32.46
116 resnet26 5895.21 173.69 1024 224 2.36 7.35 16.0
117 edgenext_xx_small 5893.72 173.732 1024 288 0.33 4.21 1.33
118 levit_384 5880.4 174.126 1024 224 2.36 6.26 39.13
119 resnet34d 5865.98 174.555 1024 224 3.91 4.54 21.82
120 legacy_seresnet34 5850.24 175.025 1024 224 3.67 3.74 21.96
121 dla34 5827.3 175.712 1024 224 3.07 5.02 15.74
122 tf_efficientnet_es 5781.29 177.112 1024 224 1.81 8.73 5.44
123 cs3darknet_focus_m 5721.39 178.967 1024 288 2.51 6.19 9.3
124 resnetblur18 5636.65 181.657 1024 224 2.34 3.39 11.69
125 rexnetr_130 5590.0 183.173 1024 224 0.68 9.81 7.61
126 mobilevit_xxs 5524.87 185.333 1024 256 0.42 8.34 1.27
127 tf_efficientnet_lite1 5524.68 185.339 1024 240 0.62 10.14 5.42
128 cs3darknet_m 5478.07 186.916 1024 288 2.63 6.69 9.31
129 convnext_atto 5460.54 187.516 1024 288 0.91 6.3 3.7
130 xcit_tiny_12_p16_224_dist 5457.72 187.611 1024 224 1.24 6.29 6.72
131 xcit_tiny_12_p16_224 5456.63 187.649 1024 224 1.24 6.29 6.72
132 skresnet18 5413.1 189.159 1024 224 1.82 3.24 11.96
133 darknet17 5401.37 189.571 1024 256 3.26 7.18 14.3
134 mixnet_s 5392.58 189.878 1024 224 0.25 6.25 4.13
135 resmlp_12_224 5366.15 190.814 1024 224 3.01 5.5 15.35
136 resmlp_12_distilled_224 5364.91 190.857 1024 224 3.01 5.5 15.35
137 convnext_atto_ols 5288.94 193.6 1024 288 0.96 6.8 3.7
138 vit_base_patch32_clip_224 5280.68 193.903 1024 224 4.41 5.01 88.22
139 vit_base_patch32_224 5280.52 193.908 1024 224 4.41 5.01 88.22
140 pit_xs_distilled_224 5272.13 194.218 1024 224 1.41 7.76 11.0
141 pit_xs_224 5271.0 194.259 1024 224 1.4 7.71 10.62
142 repvgg_b0 5252.66 194.939 1024 224 3.41 6.15 15.82
143 mixer_b32_224 5221.71 196.094 1024 224 3.24 6.29 60.29
144 pvt_v2_b0 5210.31 196.521 1024 224 0.57 7.99 3.67
145 resnetaa34d 5171.78 197.986 1024 224 4.43 5.07 21.82
146 selecsls60 5160.83 198.407 1024 224 3.59 5.52 30.67
147 selecsls60b 5119.51 200.008 1024 224 3.63 5.52 32.77
148 mobilenetv2_120d 5111.95 200.304 1024 224 0.69 11.97 5.83
149 resnet26d 5108.26 200.449 1024 224 2.6 8.15 16.01
150 gmixer_12_224 5064.97 202.162 1024 224 2.67 7.26 12.7
151 gmlp_ti16_224 5007.93 204.464 1024 224 1.34 7.55 5.87
152 mixer_s16_224 4998.69 204.842 1024 224 3.79 5.97 18.53
153 tf_mixnet_s 4989.18 205.231 1024 224 0.25 6.25 4.13
154 efficientnet_b0_g16_evos 4930.67 207.667 1024 224 1.01 7.42 8.11
155 rexnetr_150 4900.22 208.959 1024 224 0.89 11.13 9.78
156 fbnetv3_d 4881.14 209.776 1024 256 0.68 11.1 10.31
157 darknet21 4850.41 211.105 1024 256 3.93 7.47 20.86
158 nf_resnet26 4816.48 212.591 1024 224 2.41 7.35 16.0
159 efficientnet_lite2 4781.65 214.14 1024 260 0.89 12.9 6.09
160 convnext_femto 4749.12 215.607 1024 288 1.3 7.56 5.22
161 tf_efficientnetv2_b2 4718.26 217.018 1024 260 1.72 9.84 10.1
162 sedarknet21 4656.51 219.895 1024 256 3.93 7.47 20.95
163 dla46x_c 4636.77 220.831 1024 224 0.54 5.66 1.07
164 convnext_femto_ols 4618.33 221.714 1024 288 1.35 8.06 5.23
165 resnext26ts 4603.25 222.441 1024 256 2.43 10.52 10.3
166 efficientformer_l1 4566.14 224.248 1024 224 1.3 5.53 12.29
167 dpn48b 4506.78 227.201 1024 224 1.69 8.92 9.13
168 crossvit_tiny_240 4481.69 228.473 1024 240 1.57 9.08 7.01
169 dla60x_c 4459.27 229.622 1024 224 0.59 6.01 1.32
170 eca_resnext26ts 4456.63 229.759 1024 256 2.43 10.52 10.3
171 seresnext26ts 4453.99 229.896 1024 256 2.43 10.52 10.39
172 legacy_seresnext26_32x4d 4441.15 230.558 1024 224 2.49 9.39 16.79
173 gernet_l 4396.56 232.898 1024 256 4.57 8.0 31.08
174 mobilevitv2_075 4393.87 233.041 1024 256 1.05 12.06 2.87
175 gcresnext26ts 4384.92 233.516 1024 256 2.43 10.53 10.48
176 tf_efficientnet_b1 4370.6 234.282 1024 240 0.71 10.88 7.79
177 tf_efficientnet_lite2 4293.9 238.467 1024 260 0.89 12.9 6.09
178 rexnet_130 4262.16 240.243 1024 224 0.68 9.71 7.56
179 efficientnet_b1 4239.44 241.53 1024 256 0.77 12.22 7.79
180 vit_small_patch32_384 4239.1 241.55 1024 384 3.45 8.25 22.92
181 crossvit_9_240 4212.37 243.082 1024 240 1.85 9.52 8.55
182 crossvit_9_dagger_240 4095.03 250.049 1024 240 1.99 9.97 8.78
183 nf_ecaresnet26 4091.86 250.24 1024 224 2.41 7.36 16.0
184 nf_seresnet26 4088.47 250.449 1024 224 2.41 7.36 17.4
185 efficientnet_cc_b0_8e 4076.51 251.183 1024 224 0.42 9.42 24.01
186 efficientnet_cc_b0_4e 4073.3 251.382 1024 224 0.41 9.42 13.31
187 ecaresnet50d_pruned 4055.39 252.492 1024 224 2.53 6.43 19.94
188 efficientnet_b2_pruned 4030.92 254.025 1024 260 0.73 9.13 8.31
189 ecaresnext50t_32x4d 4018.73 254.796 1024 224 2.7 10.09 15.41
190 ecaresnext26t_32x4d 4017.09 254.9 1024 224 2.7 10.09 15.41
191 seresnext26t_32x4d 4014.43 255.069 1024 224 2.7 10.09 16.81
192 seresnext26tn_32x4d 4014.36 255.074 1024 224 2.7 10.09 16.81
193 repvgg_a2 3987.84 256.77 1024 224 5.7 6.26 28.21
194 poolformer_s12 3982.67 257.103 1024 224 1.82 5.53 11.92
195 seresnext26d_32x4d 3979.57 257.303 1024 224 2.73 10.19 16.81
196 vit_tiny_r_s16_p8_384 3963.05 258.374 1024 384 1.34 6.49 6.36
197 resnet26t 3939.46 259.923 1024 256 3.35 10.52 16.01
198 nf_regnet_b1 3911.64 261.772 1024 288 1.02 9.2 10.22
199 rexnet_150 3881.93 263.775 1024 224 0.9 11.21 9.73
200 nf_regnet_b2 3879.78 263.921 1024 272 1.22 9.27 14.31
201 resnetv2_50 3865.49 264.896 1024 224 4.11 11.11 25.55
202 regnetx_016 3852.41 265.794 1024 224 1.62 7.93 9.19
203 tf_efficientnet_cc_b0_4e 3812.08 268.608 1024 224 0.41 9.42 13.31
204 tf_efficientnet_cc_b0_8e 3803.67 269.202 1024 224 0.42 9.42 24.01
205 convnext_pico 3747.49 273.239 1024 288 2.27 10.08 9.05
206 ecaresnetlight 3744.45 273.459 1024 224 4.11 8.42 30.16
207 dpn68 3724.59 274.917 1024 224 2.35 10.47 12.61
208 edgenext_x_small 3714.71 275.646 1024 288 0.68 7.5 2.34
209 gluon_resnet50_v1b 3672.76 278.798 1024 224 4.11 11.11 25.56
210 ssl_resnet50 3671.85 278.866 1024 224 4.11 11.11 25.56
211 efficientnet_em 3671.25 278.913 1024 240 3.04 14.34 6.9
212 resnet50 3668.58 279.116 1024 224 4.11 11.11 25.56
213 swsl_resnet50 3668.32 279.136 1024 224 4.11 11.11 25.56
214 tv_resnet50 3667.14 279.225 1024 224 4.11 11.11 25.56
215 dpn68b 3667.07 279.229 1024 224 2.35 10.47 12.61
216 rexnetr_200 3659.45 279.811 1024 224 1.59 15.11 16.52
217 convnext_pico_ols 3651.34 280.434 1024 288 2.37 10.74 9.06
218 botnet26t_256 3594.28 284.883 1024 256 3.32 11.98 12.49
219 bat_resnext26ts 3569.91 286.828 1024 256 2.53 12.51 10.73
220 resnetv2_50t 3547.32 288.657 1024 224 4.32 11.82 25.57
221 mixnet_m 3537.26 289.477 1024 224 0.36 8.19 5.01
222 regnety_016 3531.88 289.919 1024 224 1.63 8.04 11.2
223 tf_efficientnet_em 3529.62 290.106 1024 240 3.04 14.34 6.9
224 resnetv2_50d 3525.02 290.482 1024 224 4.35 11.92 25.57
225 halonet26t 3515.15 291.299 1024 256 3.19 11.69 12.48
226 resnet32ts 3492.62 293.179 1024 256 4.63 11.58 17.96
227 hrnet_w18_small_v2 3482.81 294.001 1024 224 2.62 9.65 15.6
228 gluon_resnet50_v1c 3481.59 294.107 1024 224 4.35 11.92 25.58
229 dla60 3466.91 295.351 1024 224 4.26 10.16 22.04
230 resnet33ts 3460.78 295.875 1024 256 4.76 11.66 19.68
231 tf_efficientnet_b2 3402.3 300.962 1024 260 1.02 13.83 9.11
232 convit_tiny 3399.61 301.199 1024 224 1.26 7.94 5.71
233 resnet50t 3373.72 303.51 1024 224 4.32 11.82 25.57
234 tf_mixnet_m 3366.38 304.167 1024 224 0.36 8.19 5.01
235 efficientnet_b3_pruned 3360.1 304.74 1024 300 1.04 11.86 9.86
236 seresnet33ts 3354.27 305.27 1024 256 4.76 11.66 19.78
237 resnet50d 3351.47 305.527 1024 224 4.35 11.92 25.58
238 eca_resnet33ts 3350.95 305.574 1024 256 4.76 11.66 19.68
239 vit_small_resnet26d_224 3346.77 305.954 1024 224 5.07 11.12 63.61
240 cs3darknet_focus_l 3335.18 307.018 1024 288 5.9 10.16 21.15
241 gluon_resnet50_v1d 3334.65 307.068 1024 224 4.35 11.92 25.58
242 mobilevitv2_100 3324.63 307.994 1024 256 1.84 16.08 4.9
243 vovnet39a 3320.12 308.408 1024 224 7.09 6.73 22.6
244 legacy_seresnet50 3312.33 309.135 1024 224 3.88 10.6 28.09
245 efficientnet_b0_gn 3307.86 309.554 1024 224 0.42 6.75 5.29
246 gcresnet33ts 3307.01 309.633 1024 256 4.76 11.68 19.88
247 pit_s_distilled_224 3301.25 310.173 1024 224 2.9 11.64 24.04
248 pit_s_224 3299.97 310.295 1024 224 2.88 11.56 23.46
249 mobilevit_xs 3252.28 314.844 1024 256 1.05 16.33 2.32
250 deit_small_distilled_patch16_224 3233.6 316.663 1024 224 4.63 12.02 22.44
251 efficientnet_b2a 3223.97 317.608 1024 288 1.12 16.2 9.11
252 efficientnet_b2 3223.9 317.615 1024 288 1.12 16.2 9.11
253 deit_small_patch16_224 3218.99 318.1 1024 224 4.61 11.95 22.05
254 vit_small_patch16_224 3218.38 318.16 1024 224 4.61 11.95 22.05
255 cs3darknet_l 3210.26 318.965 1024 288 6.16 10.83 21.16
256 ese_vovnet39b 3206.21 319.369 1024 224 7.09 6.74 24.57
257 eca_vovnet39b 3203.77 319.612 1024 224 7.09 6.74 22.6
258 convnextv2_atto 3196.73 320.315 1024 288 0.91 6.3 3.71
259 coatnet_pico_rw_224 3189.82 321.008 1024 224 2.05 14.62 10.85
260 seresnet50 3181.57 321.841 1024 224 4.11 11.13 28.09
261 pvt_v2_b1 3147.37 325.339 1024 224 2.12 15.39 14.01
262 coat_lite_tiny 3146.41 325.439 1024 224 1.6 11.65 5.72
263 res2net50_48w_2s 3127.52 327.404 1024 224 4.18 11.72 25.29
264 eca_botnext26ts_256 3112.32 329.003 1024 256 2.46 11.6 10.59
265 ecaresnet101d_pruned 3103.16 329.973 1024 224 3.48 7.69 24.88
266 efficientnet_b0_g8_gn 3073.2 333.192 1024 224 0.66 6.75 6.56
267 ssl_resnext50_32x4d 3071.68 333.356 1024 224 4.26 14.4 25.03
268 dla60x 3071.64 333.359 1024 224 3.54 13.8 17.35
269 swsl_resnext50_32x4d 3070.7 333.464 1024 224 4.26 14.4 25.03
270 tv_resnext50_32x4d 3069.81 333.56 1024 224 4.26 14.4 25.03
271 resnext50_32x4d 3069.72 333.57 1024 224 4.26 14.4 25.03
272 gluon_resnext50_32x4d 3068.47 333.704 1024 224 4.26 14.4 25.03
273 vit_small_r26_s32_224 3061.92 334.417 1024 224 3.56 9.85 36.43
274 skresnet34 3055.95 335.073 1024 224 3.67 5.13 22.28
275 deit3_small_patch16_224_in21ft1k 3048.82 335.855 1024 224 4.61 11.95 22.06
276 deit3_small_patch16_224 3047.23 336.031 1024 224 4.61 11.95 22.06
277 eca_halonext26ts 3035.71 337.305 1024 256 2.44 11.46 10.76
278 haloregnetz_b 3032.47 337.665 1024 224 1.97 11.94 11.68
279 vit_relpos_base_patch32_plus_rpn_256 3026.45 338.338 1024 256 7.68 8.01 119.42
280 vit_relpos_small_patch16_rpn_224 3019.95 339.067 1024 224 4.59 13.05 21.97
281 vit_relpos_small_patch16_224 3008.26 340.383 1024 224 4.59 13.05 21.98
282 vit_srelpos_small_patch16_224 3000.96 341.213 1024 224 4.59 12.16 21.97
283 xcit_nano_12_p16_384_dist 3000.48 341.266 1024 384 1.64 12.15 3.05
284 cs3sedarknet_l 2995.41 341.845 1024 288 6.16 10.83 21.91
285 resnetaa50d 2993.03 342.116 1024 224 5.39 12.44 25.58
286 vgg11 2983.47 85.796 256 224 7.61 7.44 132.86
287 selecsls84 2973.16 344.402 1024 224 5.9 7.57 50.95
288 resnetrs50 2963.42 345.535 1024 224 4.48 12.14 35.69
289 seresnet50t 2957.12 346.271 1024 224 4.32 11.83 28.1
290 resnest14d 2954.69 346.556 1024 224 2.76 7.33 10.61
291 gluon_resnet50_v1s 2953.65 346.677 1024 224 5.47 13.52 25.68
292 coat_lite_mini 2952.61 346.799 1024 224 2.0 12.25 11.01
293 ecaresnet50d 2945.96 347.583 1024 224 4.35 11.93 25.58
294 densenet121 2933.45 349.064 1024 224 2.87 6.9 7.98
295 tv_densenet121 2929.69 349.514 1024 224 2.87 6.9 7.98
296 vit_base_patch32_plus_256 2929.65 349.519 1024 256 7.79 7.76 119.48
297 rexnet_200 2927.94 349.723 1024 224 1.56 14.91 16.37
298 xcit_tiny_24_p16_224_dist 2927.0 349.834 1024 224 2.34 11.82 12.12
299 xcit_tiny_24_p16_224 2921.97 350.436 1024 224 2.34 11.82 12.12
300 coatnet_nano_cc_224 2867.38 357.108 1024 224 2.24 15.02 13.76
301 gcresnext50ts 2857.34 358.363 1024 256 3.75 15.46 15.67
302 lambda_resnet26rpt_256 2853.55 358.839 1024 256 3.16 11.87 10.99
303 resnext50d_32x4d 2845.08 359.908 1024 224 4.5 15.2 25.05
304 mixnet_l 2828.6 361.996 1024 224 0.58 10.84 7.33
305 densenet121d 2824.08 362.584 1024 224 3.11 7.7 8.0
306 efficientnet_lite3 2821.84 362.87 1024 300 1.65 21.85 8.2
307 cspresnet50 2793.65 366.534 1024 256 4.54 11.5 21.62
308 coatnet_nano_rw_224 2781.93 368.077 1024 224 2.41 15.41 15.14
309 vgg11_bn 2760.38 370.949 1024 224 7.62 7.44 132.87
310 vovnet57a 2755.77 371.572 1024 224 8.95 7.52 36.64
311 resmlp_24_224 2750.33 372.306 1024 224 5.96 10.91 30.02
312 resmlp_24_distilled_224 2740.33 373.665 1024 224 5.96 10.91 30.02
313 convnextv2_femto 2735.91 374.269 1024 288 1.3 7.56 5.23
314 flexivit_small 2735.78 374.287 1024 240 5.35 14.18 22.06
315 gcresnet50t 2732.04 374.8 1024 256 5.42 14.67 25.9
316 legacy_seresnext50_32x4d 2722.84 376.065 1024 224 4.26 14.42 27.56
317 seresnext50_32x4d 2721.47 376.256 1024 224 4.26 14.42 27.56
318 gluon_seresnext50_32x4d 2720.58 376.379 1024 224 4.26 14.42 27.56
319 visformer_small 2719.93 376.468 1024 224 4.88 11.43 40.22
320 twins_svt_small 2713.39 377.374 1024 224 2.94 13.75 24.06
321 resnetv2_50x1_bit_distilled 2708.81 378.014 1024 224 4.23 11.11 25.55
322 res2net50_14w_8s 2692.9 380.248 1024 224 4.21 13.28 25.06
323 resnetblur50 2685.97 381.228 1024 224 5.16 12.02 25.56
324 vit_base_resnet26d_224 2684.6 381.421 1024 224 6.97 13.16 101.4
325 tf_mixnet_l 2680.8 381.958 1024 224 0.58 10.84 7.33
326 seresnetaa50d 2658.93 385.106 1024 224 5.4 12.46 28.11
327 dla60_res2net 2656.16 385.506 1024 224 4.15 12.34 20.85
328 cspresnet50d 2655.05 385.668 1024 256 4.86 12.55 21.64
329 coatnext_nano_rw_224 2655.0 385.674 1024 224 2.47 12.8 14.7
330 ese_vovnet57b 2654.33 385.773 1024 224 8.95 7.52 38.61
331 tf_efficientnetv2_b3 2654.14 385.8 1024 300 3.04 15.74 14.36
332 cspresnet50w 2641.68 387.621 1024 256 5.04 12.19 28.12
333 res2net50_26w_4s 2629.64 389.395 1024 224 4.28 12.61 25.7
334 regnetz_b16 2626.71 389.828 1024 288 2.39 16.43 9.72
335 convnext_nano 2611.78 392.059 1024 288 4.06 13.84 15.59
336 efficientnetv2_rw_t 2601.49 393.609 1024 288 3.19 16.42 13.65
337 fbnetv3_g 2595.29 394.549 1024 288 1.77 21.09 16.62
338 gmixer_24_224 2595.15 394.571 1024 224 5.28 14.45 24.72
339 mobilevit_s 2586.09 395.952 1024 256 2.03 19.94 5.58
340 coatnet_rmlp_nano_rw_224 2569.7 398.478 1024 224 2.62 20.34 15.15
341 gcvit_xxtiny 2561.41 399.768 1024 224 2.14 15.36 12.0
342 tf_efficientnet_lite3 2530.94 404.582 1024 300 1.65 21.85 8.2
343 efficientnet_cc_b1_8e 2530.65 404.628 1024 240 0.75 15.44 39.72
344 densenetblur121d 2522.66 405.908 1024 224 3.11 7.9 8.0
345 resnetblur50d 2509.45 408.045 1024 224 5.4 12.82 25.58
346 nf_ecaresnet50 2490.39 411.168 1024 224 4.21 11.13 25.56
347 inception_v3 2485.21 412.025 1024 299 5.73 8.97 23.83
348 nf_seresnet50 2482.66 412.449 1024 224 4.21 11.13 28.09
349 tf_inception_v3 2481.38 412.658 1024 299 5.73 8.97 23.83
350 gc_efficientnetv2_rw_t 2480.59 412.793 1024 288 3.2 16.45 13.68
351 adv_inception_v3 2479.41 412.983 1024 299 5.73 8.97 23.83
352 repvgg_b1g4 2473.34 414.003 1024 224 8.15 10.64 39.97
353 mobilevitv2_125 2472.28 414.18 1024 256 2.86 20.1 7.48
354 gluon_inception_v3 2468.42 414.827 1024 299 5.73 8.97 23.83
355 nf_regnet_b3 2461.52 415.991 1024 320 2.05 14.61 18.59
356 xcit_small_12_p16_224_dist 2446.89 418.478 1024 224 4.82 12.58 26.25
357 xcit_small_12_p16_224 2446.42 418.558 1024 224 4.82 12.58 26.25
358 cspresnext50 2438.96 419.836 1024 256 4.05 15.86 20.57
359 convnext_nano_ols 2435.0 420.521 1024 288 4.38 15.5 15.65
360 regnetx_032 2429.42 421.489 1024 224 3.2 11.37 15.3
361 densenet169 2426.29 422.031 1024 224 3.4 7.3 14.15
362 sehalonet33ts 2419.4 423.234 1024 256 3.55 14.7 13.69
363 tf_efficientnet_cc_b1_8e 2406.19 425.557 1024 240 0.75 15.44 39.72
364 semobilevit_s 2402.02 426.294 1024 256 2.03 19.95 5.74
365 resnetv2_101 2330.6 439.36 1024 224 7.83 16.23 44.54
366 twins_pcpvt_small 2312.72 442.754 1024 224 3.83 18.08 24.11
367 xcit_nano_12_p8_224_dist 2295.5 446.077 1024 224 2.16 15.71 3.05
368 xcit_nano_12_p8_224 2292.87 446.587 1024 224 2.16 15.71 3.05
369 gmlp_s16_224 2290.73 447.007 1024 224 4.42 15.1 19.42
370 cs3darknet_focus_x 2287.2 447.697 1024 256 8.03 10.69 35.02
371 vit_base_r26_s32_224 2275.25 450.047 1024 224 6.81 12.36 101.38
372 gluon_resnet101_v1b 2260.37 453.01 1024 224 7.83 16.23 44.55
373 tv_resnet101 2258.59 453.368 1024 224 7.83 16.23 44.55
374 resnet101 2258.28 453.43 1024 224 7.83 16.23 44.55
375 skresnet50 2234.62 458.23 1024 224 4.11 12.5 25.8
376 ecaresnet26t 2232.29 458.709 1024 320 5.24 16.44 16.01
377 edgenext_small 2226.69 459.86 1024 320 1.97 14.16 5.59
378 dla102 2219.96 461.255 1024 224 7.19 14.18 33.27
379 res2next50 2214.71 462.347 1024 224 4.2 13.71 24.67
380 dla60_res2next 2210.67 463.194 1024 224 3.49 13.17 17.03
381 resnetv2_101d 2203.82 464.633 1024 224 8.07 17.04 44.56
382 gluon_resnet101_v1c 2194.65 466.578 1024 224 8.08 17.04 44.57
383 resnest26d 2170.04 471.869 1024 224 3.64 9.97 17.07
384 vgg13 2149.71 476.331 1024 224 11.31 12.25 133.05
385 gluon_resnet101_v1d 2137.49 479.053 1024 224 8.08 17.04 44.57
386 skresnet50d 2115.22 484.098 1024 224 4.36 13.31 25.82
387 convnextv2_pico 2108.5 485.64 1024 288 2.27 10.08 9.07
388 vit_base_resnet50d_224 2101.17 487.333 1024 224 8.73 16.92 110.97
389 coatnet_0_rw_224 2082.49 491.706 1024 224 4.43 18.73 27.44
390 crossvit_small_240 2081.5 491.94 1024 240 5.63 18.17 26.86
391 deit3_medium_patch16_224_in21ft1k 2076.53 493.118 1024 224 8.0 15.93 38.85
392 deit3_medium_patch16_224 2072.34 494.116 1024 224 8.0 15.93 38.85
393 mobilevitv2_150 2071.36 494.349 1024 256 4.09 24.11 10.59
394 mobilevitv2_150_in22ft1k 2070.3 494.603 1024 256 4.09 24.11 10.59
395 sebotnet33ts_256 2067.91 247.581 512 256 3.89 17.46 13.7
396 wide_resnet50_2 2057.08 497.78 1024 224 11.43 14.4 68.88
397 vit_relpos_medium_patch16_rpn_224 2044.85 500.757 1024 224 7.97 17.02 38.73
398 efficientformer_l3 2041.79 501.507 1024 224 3.93 12.01 31.41
399 poolformer_s24 2040.35 501.863 1024 224 3.41 10.68 21.39
400 vit_relpos_medium_patch16_224 2037.47 502.572 1024 224 7.97 17.02 38.75
401 cspdarknet53 2035.94 502.949 1024 256 6.57 16.81 27.64
402 resnet51q 2034.41 503.329 1024 288 8.07 20.94 35.7
403 vit_srelpos_medium_patch16_224 2033.15 503.638 1024 224 7.96 16.21 38.74
404 maxvit_rmlp_pico_rw_256 2008.78 509.748 1024 256 1.85 24.86 7.52
405 vit_relpos_medium_patch16_cls_224 2007.24 510.141 1024 224 8.03 18.24 38.76
406 dla102x 2006.55 510.315 1024 224 5.89 19.42 26.31
407 legacy_seresnet101 2003.12 511.188 1024 224 7.61 15.74 49.33
408 swin_tiny_patch4_window7_224 1995.14 513.235 1024 224 4.51 17.06 28.29
409 repvgg_b1 1985.42 515.747 1024 224 13.16 10.64 57.42
410 resnetaa101d 1982.98 516.381 1024 224 9.12 17.56 44.57
411 coatnet_rmlp_0_rw_224 1981.75 516.703 1024 224 4.72 24.89 27.45
412 tf_efficientnet_b3 1975.92 518.226 1024 300 1.87 23.83 12.23
413 gcvit_xtiny 1969.68 519.869 1024 224 2.93 20.26 19.98
414 hrnet_w18 1967.17 520.531 1024 224 4.32 16.31 21.3
415 gluon_resnet101_v1s 1965.68 520.926 1024 224 9.19 18.64 44.67
416 maxvit_pico_rw_256 1965.38 521.006 1024 256 1.83 22.3 7.46
417 resnetaa50 1958.15 522.93 1024 288 8.52 19.24 25.56
418 seresnet101 1954.63 523.871 1024 224 7.84 16.27 49.33
419 efficientnet_b3 1949.54 525.239 1024 320 2.01 26.52 12.23
420 efficientnet_b3a 1949.11 525.356 1024 320 2.01 26.52 12.23
421 edgenext_small_rw 1932.68 529.816 1024 320 2.46 14.85 7.83
422 regnetx_040 1932.62 529.839 1024 224 3.99 12.2 22.12
423 cs3sedarknet_xdw 1925.4 531.825 1024 256 5.97 17.18 21.6
424 coatnet_bn_0_rw_224 1920.71 533.123 1024 224 4.67 22.04 27.44
425 xcit_tiny_12_p16_384_dist 1911.65 535.652 1024 384 3.64 18.26 6.72
426 ssl_resnext101_32x4d 1910.73 535.909 1024 224 8.01 21.23 44.18
427 swsl_resnext101_32x4d 1910.43 535.993 1024 224 8.01 21.23 44.18
428 resnext101_32x4d 1909.99 536.115 1024 224 8.01 21.23 44.18
429 gluon_resnext101_32x4d 1909.34 536.298 1024 224 8.01 21.23 44.18
430 darknet53 1903.77 537.866 1024 288 11.78 15.68 41.61
431 darknetaa53 1898.12 539.468 1024 288 10.08 15.68 36.02
432 crossvit_15_240 1892.46 541.083 1024 240 5.81 19.77 27.53
433 halonet50ts 1881.53 544.226 1024 256 5.3 19.2 22.73
434 vgg13_bn 1879.72 544.749 1024 224 11.33 12.25 133.05
435 mixnet_xl 1872.46 546.86 1024 224 0.93 14.57 11.9
436 res2net50_26w_6s 1870.88 547.321 1024 224 6.33 15.28 37.05
437 ecaresnet101d 1869.88 547.616 1024 224 8.08 17.07 44.57
438 densenet201 1869.57 547.706 1024 224 4.34 7.85 20.01
439 nf_resnet101 1858.48 550.976 1024 224 8.01 16.23 44.55
440 coatnet_0_224 1857.28 275.661 512 224 4.58 24.01 25.04
441 pvt_v2_b2 1854.85 552.053 1024 224 4.05 27.53 25.36
442 crossvit_15_dagger_240 1850.69 553.295 1024 240 6.13 20.43 28.21
443 resmlp_36_224 1846.41 554.574 1024 224 8.91 16.33 44.69
444 resmlp_36_distilled_224 1845.04 554.99 1024 224 8.91 16.33 44.69
445 resnet61q 1841.84 555.954 1024 288 9.87 21.52 36.85
446 swin_s3_tiny_224 1817.5 563.398 1024 224 4.64 19.13 28.33
447 cait_xxs24_224 1796.55 569.968 1024 224 2.53 20.29 11.96
448 cs3darknet_x 1789.33 572.268 1024 288 10.6 14.36 35.05
449 vit_medium_patch16_gap_240 1785.54 573.481 1024 240 9.22 18.81 44.4
450 nf_resnet50 1784.84 573.708 1024 288 6.88 18.37 25.56
451 resnet50_gn 1764.31 580.385 1024 224 4.14 11.11 25.56
452 mixer_b16_224_miil 1761.45 581.327 1024 224 12.62 14.53 59.88
453 mixer_b16_224 1759.76 581.885 1024 224 12.62 14.53 59.88
454 resnetblur101d 1757.96 582.482 1024 224 9.12 17.94 44.57
455 eca_nfnet_l0 1726.58 593.068 1024 288 7.12 17.29 24.14
456 nfnet_l0 1721.83 594.705 1024 288 7.13 17.29 35.07
457 vit_large_patch32_224 1717.59 596.169 1024 224 15.41 13.32 327.9
458 vgg16 1717.44 596.224 1024 224 15.47 13.56 138.36
459 regnetz_c16 1710.89 598.505 1024 320 3.92 25.88 13.46
460 pvt_v2_b2_li 1709.89 598.855 1024 224 3.91 27.6 22.55
461 resnest50d_1s4x24d 1705.52 600.391 1024 224 4.43 13.57 25.68
462 coat_lite_small 1704.55 600.733 1024 224 3.96 22.09 19.84
463 resnetv2_50d_frn 1697.1 603.368 1024 224 4.33 11.92 25.59
464 cs3sedarknet_x 1689.8 605.975 1024 288 10.6 14.37 35.4
465 seresnext101_32x4d 1687.65 606.747 1024 224 8.02 21.26 48.96
466 gluon_seresnext101_32x4d 1687.1 606.945 1024 224 8.02 21.26 48.96
467 legacy_seresnext101_32x4d 1684.69 607.813 1024 224 8.02 21.26 48.96
468 regnetv_040 1682.92 608.454 1024 288 6.6 20.3 20.64
469 mobilevitv2_175 1677.66 457.769 768 256 5.54 28.13 14.25
470 regnety_040 1677.03 610.59 1024 288 6.61 20.3 20.65
471 mobilevitv2_175_in22ft1k 1677.0 457.949 768 256 5.54 28.13 14.25
472 convnext_tiny_hnf 1676.16 610.908 1024 288 7.39 22.21 28.59
473 res2net101_26w_4s 1675.37 611.195 1024 224 8.1 18.45 45.21
474 vit_tiny_patch16_384 1665.76 614.72 1024 384 4.7 25.39 5.79
475 sequencer2d_s 1661.32 616.362 1024 224 4.96 11.31 27.65
476 ese_vovnet39b_evos 1661.21 616.404 1024 224 7.07 6.74 24.58
477 vit_base_patch32_384 1649.27 620.868 1024 384 13.06 16.5 88.3
478 vit_base_patch32_clip_384 1648.64 621.105 1024 384 13.06 16.5 88.3
479 mixer_l32_224 1645.23 622.393 1024 224 11.27 19.86 206.94
480 convnext_tiny 1642.14 623.562 1024 288 7.39 22.21 28.59
481 botnet50ts_256 1639.64 312.25 512 256 5.54 22.23 22.74
482 swinv2_cr_tiny_224 1630.02 628.199 1024 224 4.66 28.45 28.33
483 resnetv2_50d_evob 1627.44 629.196 1024 224 4.33 11.92 25.59
484 twins_pcpvt_base 1615.12 633.996 1024 224 6.68 25.25 43.83
485 resnetv2_152 1614.43 634.268 1024 224 11.55 22.56 60.19
486 hrnet_w32 1605.06 637.96 1024 224 8.97 22.02 41.23
487 swinv2_cr_tiny_ns_224 1600.43 639.811 1024 224 4.66 28.45 28.33
488 xception41p 1598.79 480.351 768 299 9.25 39.86 26.91
489 tv_resnet152 1582.54 647.049 1024 224 11.56 22.56 60.19
490 gluon_resnet152_v1b 1581.57 647.444 1024 224 11.56 22.56 60.19
491 resnet152 1581.02 647.671 1024 224 11.56 22.56 60.19
492 xception 1579.88 648.138 1024 299 8.4 35.83 22.86
493 halo2botnet50ts_256 1572.75 651.076 1024 256 5.02 21.78 22.64
494 res2net50_26w_8s 1568.85 652.695 1024 224 8.37 17.95 48.4
495 vit_medium_patch16_gap_256 1564.22 654.626 1024 256 10.59 22.15 38.86
496 resnetv2_152d 1557.03 657.648 1024 224 11.8 23.36 60.2
497 efficientnet_el_pruned 1555.14 658.449 1024 300 8.0 30.7 10.59
498 maxvit_rmlp_nano_rw_256 1551.85 659.845 1024 256 4.47 31.92 15.5
499 regnetx_064 1550.52 660.413 1024 224 6.49 16.37 26.21
500 efficientnet_el 1549.97 660.646 1024 300 8.0 30.7 10.59
501 gluon_resnet152_v1c 1548.96 661.078 1024 224 11.8 23.36 60.21
502 nf_ecaresnet101 1546.58 662.091 1024 224 8.01 16.27 44.55
503 nf_seresnet101 1539.38 665.191 1024 224 8.02 16.27 49.33
504 mvitv2_tiny 1537.54 665.985 1024 224 4.7 21.16 24.17
505 nfnet_f0 1525.01 671.456 1024 256 12.62 18.05 71.49
506 vgg16_bn 1523.86 671.963 1024 224 15.5 13.56 138.37
507 cs3edgenet_x 1521.21 673.136 1024 288 14.59 16.36 47.82
508 gluon_resnet152_v1d 1520.11 673.621 1024 224 11.8 23.36 60.21
509 maxvit_nano_rw_256 1517.43 674.812 1024 256 4.46 30.28 15.45
510 tf_efficientnet_el 1506.16 679.862 1024 300 8.0 30.7 10.59
511 convnextv2_nano 1500.71 511.746 768 288 4.06 13.84 15.62
512 resnest50d 1492.63 686.022 1024 224 5.4 14.36 27.48
513 ese_vovnet99b 1489.17 687.617 1024 224 16.51 11.27 63.2
514 dla169 1471.11 696.059 1024 224 11.6 20.2 53.39
515 regnety_032 1467.85 697.604 1024 288 5.29 18.61 19.44
516 skresnext50_32x4d 1463.28 699.785 1024 224 4.5 17.18 27.48
517 xcit_tiny_12_p8_224_dist 1458.7 701.981 1024 224 4.81 23.6 6.71
518 xcit_tiny_12_p8_224 1458.23 702.211 1024 224 4.81 23.6 6.71
519 convit_small 1457.54 702.541 1024 224 5.76 17.87 27.78
520 mobilevitv2_200_in22ft1k 1456.59 527.247 768 256 7.22 32.15 18.45
521 mobilevitv2_200 1456.02 527.451 768 256 7.22 32.15 18.45
522 ecaresnet50t 1438.32 711.929 1024 320 8.82 24.13 25.57
523 gluon_resnet152_v1s 1432.22 714.961 1024 224 12.92 24.96 60.32
524 nest_tiny 1415.33 542.618 768 224 5.83 25.48 17.06
525 regnety_040s_gn 1412.65 724.867 1024 224 4.03 12.29 20.65
526 vgg19 1393.71 183.67 256 224 19.63 14.86 143.67
527 jx_nest_tiny 1389.62 552.657 768 224 5.83 25.48 17.06
528 legacy_seresnet152 1383.83 739.96 1024 224 11.33 22.08 66.82
529 densenet161 1376.52 743.891 1024 224 7.79 11.06 28.68
530 poolformer_s36 1370.67 747.069 1024 224 5.0 15.82 30.86
531 vit_small_resnet50d_s16_224 1367.59 748.748 1024 224 13.48 24.82 57.53
532 twins_svt_base 1362.65 751.463 1024 224 8.59 26.33 56.07
533 seresnet152 1361.7 751.99 1024 224 11.57 22.61 66.82
534 xception41 1356.44 566.173 768 299 9.28 39.86 26.97
535 maxvit_tiny_rw_224 1350.45 758.254 1024 224 5.11 33.11 29.06
536 crossvit_18_240 1348.85 759.154 1024 240 9.05 26.26 43.27
537 maxxvit_rmlp_nano_rw_256 1347.73 759.767 1024 256 4.37 26.05 16.78
538 efficientnet_lite4 1343.74 571.528 768 380 4.04 45.66 13.01
539 gcvit_tiny 1339.65 764.364 1024 224 4.79 29.82 28.22
540 pvt_v2_b3 1325.92 772.282 1024 224 6.92 37.7 45.24
541 crossvit_18_dagger_240 1313.78 779.419 1024 240 9.5 27.03 44.27
542 volo_d1_224 1312.37 780.255 1024 224 6.94 24.43 26.63
543 xcit_small_24_p16_224_dist 1307.3 783.278 1024 224 9.1 23.64 47.67
544 tresnet_m 1305.71 784.234 1024 224 5.74 7.31 31.39
545 inception_v4 1305.41 784.412 1024 299 12.28 15.09 42.68
546 repvgg_b2 1305.22 784.529 1024 224 20.45 12.9 89.02
547 xcit_small_24_p16_224 1303.71 785.433 1024 224 9.1 23.64 47.67
548 sequencer2d_m 1295.72 790.281 1024 224 6.55 14.26 38.31
549 edgenext_base 1283.77 797.633 1024 320 6.01 24.32 18.51
550 hrnet_w30 1280.53 799.653 1024 224 8.15 21.21 37.71
551 dm_nfnet_f0 1275.46 802.834 1024 256 12.62 18.05 71.49
552 coatnet_rmlp_1_rw_224 1268.37 807.322 1024 224 7.85 35.47 41.69
553 maxxvitv2_nano_rw_256 1259.7 812.877 1024 256 6.26 23.05 23.7
554 efficientnetv2_s 1254.49 816.255 1024 384 8.44 35.77 21.46
555 vgg19_bn 1246.52 205.36 256 224 19.66 14.86 143.68
556 nf_regnet_b4 1235.79 828.604 1024 384 4.7 28.61 30.21
557 swin_small_patch4_window7_224 1235.74 828.641 1024 224 8.77 27.47 49.61
558 tf_efficientnet_lite4 1232.22 623.25 768 380 4.04 45.66 13.01
559 regnetz_d32 1223.51 836.919 1024 320 9.33 37.08 27.58
560 mixnet_xxl 1219.27 629.871 768 224 2.04 23.43 23.96
561 tf_efficientnetv2_s 1219.16 839.906 1024 384 8.44 35.77 21.46
562 deit_base_patch16_224 1213.08 844.121 1024 224 17.58 23.9 86.57
563 deit_base_distilled_patch16_224 1212.98 844.19 1024 224 17.68 24.05 87.34
564 vit_base_patch16_clip_224 1211.82 844.996 1024 224 17.58 23.9 86.57
565 vit_base_patch16_224_miil 1211.26 845.389 1024 224 17.59 23.91 94.4
566 dpn92 1210.45 845.948 1024 224 6.54 18.21 37.67
567 vit_base_patch16_224 1210.28 846.074 1024 224 17.58 23.9 86.57
568 coatnet_rmlp_1_rw2_224 1208.65 847.215 1024 224 8.11 40.13 41.72
569 cait_xxs36_224 1205.51 849.419 1024 224 3.77 30.34 17.3
570 maxvit_tiny_tf_224 1200.3 639.828 768 224 5.6 35.78 30.92
571 swinv2_tiny_window8_256 1200.06 853.274 1024 256 5.96 24.57 28.35
572 efficientnetv2_rw_s 1199.87 853.413 1024 384 8.72 38.03 23.94
573 dla102x2 1198.52 854.374 1024 224 9.34 29.91 41.28
574 regnetx_160 1195.08 856.833 1024 224 15.99 25.52 54.28
575 dpn98 1183.92 864.908 1024 224 11.73 25.2 61.57
576 vit_base_patch16_rpn_224 1180.39 867.498 1024 224 17.49 23.75 86.54
577 twins_pcpvt_large 1168.64 876.22 1024 224 9.84 35.82 60.99
578 deit3_base_patch16_224 1164.77 879.134 1024 224 17.58 23.9 86.59
579 deit3_base_patch16_224_in21ft1k 1164.5 879.334 1024 224 17.58 23.9 86.59
580 regnetz_d8 1163.64 879.982 1024 320 6.19 37.08 23.37
581 swsl_resnext101_32x8d 1158.15 884.156 1024 224 16.48 31.21 88.79
582 resnext101_32x8d 1158.05 884.232 1024 224 16.48 31.21 88.79
583 ssl_resnext101_32x8d 1158.02 884.255 1024 224 16.48 31.21 88.79
584 wide_resnet101_2 1157.66 884.531 1024 224 22.8 21.23 126.89
585 ig_resnext101_32x8d 1157.3 884.8 1024 224 16.48 31.21 88.79
586 coatnet_1_rw_224 1155.72 886.014 1024 224 8.04 34.6 41.72
587 vit_base_patch16_gap_224 1154.73 886.777 1024 224 17.49 25.59 86.57
588 vit_base_patch32_clip_448 1154.21 887.173 1024 448 17.93 23.9 88.34
589 resnet200 1149.71 890.646 1024 224 15.07 32.19 64.67
590 mvitv2_small 1146.92 892.812 1024 224 7.0 28.08 34.87
591 xception65p 1145.07 670.686 768 299 13.91 52.48 39.82
592 cs3se_edgenet_x 1143.17 895.738 1024 320 18.01 20.21 50.72
593 vit_relpos_base_patch16_rpn_224 1143.15 895.76 1024 224 17.51 24.97 86.41
594 vit_relpos_base_patch16_224 1141.31 897.204 1024 224 17.51 24.97 86.43
595 tnt_s_patch16_224 1135.32 901.935 1024 224 5.24 24.37 23.76
596 resnetrs101 1134.67 902.454 1024 288 13.56 28.53 63.62
597 vit_relpos_base_patch16_clsgap_224 1128.94 907.03 1024 224 17.6 25.12 86.43
598 vit_relpos_base_patch16_cls_224 1126.78 908.771 1024 224 17.6 25.12 86.43
599 inception_resnet_v2 1126.73 908.809 1024 299 13.18 25.06 55.84
600 ens_adv_inception_resnet_v2 1125.41 909.877 1024 299 13.18 25.06 55.84
601 beit_base_patch16_224 1112.26 920.631 1024 224 17.58 23.9 86.53
602 coat_tiny 1108.72 923.572 1024 224 4.35 27.2 5.5
603 beitv2_base_patch16_224 1108.55 923.711 1024 224 17.58 23.9 86.53
604 mvitv2_small_cls 1101.66 929.491 1024 224 7.04 28.17 34.87
605 resnetv2_50d_gn 1092.35 937.413 1024 288 7.24 19.7 25.57
606 pit_b_distilled_224 1078.48 474.731 512 224 12.5 33.07 74.79
607 pit_b_224 1075.34 476.117 512 224 12.42 32.94 73.76
608 hrnet_w40 1059.78 966.217 1024 224 12.75 25.29 57.56
609 coatnet_1_224 1045.17 489.859 512 224 8.7 39.0 42.23
610 resnet101d 1039.88 984.712 1024 320 16.48 34.77 44.57
611 flexivit_base 1037.21 987.248 1024 240 20.29 28.36 86.59
612 gluon_resnext101_64x4d 1034.86 989.491 1024 224 15.52 31.21 83.46
613 vit_small_patch16_36x1_224 1033.13 991.146 1024 224 13.71 35.69 64.67
614 vit_large_r50_s32_224 1030.67 993.517 1024 224 19.58 24.41 328.99
615 maxvit_rmlp_tiny_rw_256 1029.25 746.162 768 256 6.77 46.92 29.15
616 xcit_tiny_24_p16_384_dist 1027.64 996.444 1024 384 6.87 34.29 12.12
617 efficientnet_b4 1014.08 504.879 512 384 4.51 50.04 19.34
618 maxvit_tiny_rw_256 1008.0 1015.861 1024 256 6.74 44.35 29.07
619 vit_small_patch16_18x2_224 1006.7 1017.169 1024 224 13.71 35.69 64.67
620 swinv2_cr_small_224 1005.28 1018.603 1024 224 9.07 50.27 49.7
621 regnetx_080 1004.51 1019.384 1024 224 8.02 14.06 39.57
622 repvgg_b3 994.23 1029.925 1024 224 29.16 15.1 123.09
623 swinv2_cr_small_ns_224 993.75 1030.424 1024 224 9.08 50.27 49.7
624 repvgg_b2g4 988.97 1035.405 1024 224 12.63 12.9 61.76
625 convnext_small 988.3 1036.113 1024 288 14.39 35.65 50.22
626 gluon_xception65 987.82 777.458 768 299 13.96 52.48 39.92
627 vit_small_r26_s32_384 982.68 1042.031 1024 384 10.43 29.85 36.47
628 xception65 978.83 784.597 768 299 13.96 52.48 39.92
629 regnetz_040 975.77 787.056 768 320 6.35 37.78 27.12
630 regnetz_040h 971.51 790.512 768 320 6.43 37.94 28.94
631 gluon_seresnext101_64x4d 965.3 1060.794 1024 224 15.53 31.25 88.23
632 maxvit_tiny_pm_256 964.03 1062.189 1024 256 6.61 47.9 30.09
633 efficientformer_l7 962.55 1063.825 1024 224 10.17 24.45 82.23
634 twins_svt_large 962.19 1064.229 1024 224 15.15 35.1 99.27
635 tf_efficientnet_b4 957.62 534.646 512 380 4.49 49.49 19.34
636 pvt_v2_b4 957.38 1069.569 1024 224 10.14 53.74 62.56
637 poolformer_m36 954.91 1072.334 1024 224 8.8 22.02 56.17
638 cait_s24_224 954.44 1072.866 1024 224 9.35 40.58 46.92
639 regnetz_b16_evos 950.47 808.013 768 288 2.36 16.43 9.74
640 resnest50d_4s2x40d 938.07 1091.586 1024 224 4.4 17.94 30.42
641 hrnet_w48 936.07 1093.917 1024 224 17.34 28.56 77.47
642 gmlp_b16_224 930.95 1099.935 1024 224 15.78 30.21 73.08
643 convnextv2_tiny 930.82 550.041 512 288 7.39 22.21 28.64
644 convnextv2_small 928.68 1102.629 1024 224 8.71 21.56 50.32
645 maxxvit_rmlp_tiny_rw_256 918.72 1114.583 1024 256 6.66 39.76 29.64
646 mobilevitv2_150_384_in22ft1k 915.49 419.435 384 384 9.2 54.25 10.59
647 pvt_v2_b5 909.79 1125.516 1024 224 11.76 50.92 81.96
648 nest_small 903.21 850.284 768 224 10.35 40.04 38.35
649 swin_s3_small_224 899.98 853.339 768 224 9.43 37.84 49.74
650 xcit_medium_24_p16_224_dist 898.61 1139.525 1024 224 16.13 31.71 84.4
651 xcit_medium_24_p16_224 898.6 1139.542 1024 224 16.13 31.71 84.4
652 jx_nest_small 892.03 860.939 768 224 10.35 40.04 38.35
653 coat_mini 880.8 1162.569 1024 224 6.82 33.68 10.34
654 swin_base_patch4_window7_224 875.38 1169.764 1024 224 15.47 36.63 87.77
655 dpn131 865.2 1183.527 1024 224 16.09 32.97 79.25
656 resnetv2_50d_evos 854.82 1197.895 1024 288 7.15 19.7 25.59
657 xcit_small_12_p16_384_dist 853.54 1199.694 1024 384 14.14 36.51 26.25
658 sequencer2d_l 839.78 1219.347 1024 224 9.74 22.12 54.3
659 crossvit_base_240 839.43 914.892 768 240 21.22 36.33 105.03
660 hrnet_w44 821.37 1246.671 1024 224 14.94 26.92 67.06
661 eca_nfnet_l1 818.87 1250.489 1024 320 14.92 34.42 41.41
662 vit_base_r50_s16_224 817.55 1252.502 1024 224 21.67 35.31 114.69
663 maxvit_rmlp_small_rw_224 816.34 1254.368 1024 224 10.75 49.3 64.9
664 gcvit_small 815.24 1256.055 1024 224 8.57 41.61 51.09
665 regnety_080 811.28 1262.191 1024 288 13.22 29.69 39.18
666 densenet264 804.85 1272.268 1024 224 12.95 12.8 72.69
667 mvitv2_base 804.14 1273.395 1024 224 10.16 40.5 51.47
668 repvgg_b3g4 802.85 1275.443 1024 224 17.89 15.1 83.83
669 vit_base_patch16_plus_240 782.25 1309.022 1024 240 27.41 33.08 117.56
670 swinv2_tiny_window16_256 781.61 655.045 512 256 6.68 39.02 28.35
671 maxvit_small_tf_224 777.04 658.899 512 224 11.66 53.17 68.93
672 xcit_tiny_24_p8_224 771.1 1327.958 1024 224 9.21 45.39 12.11
673 xcit_tiny_24_p8_224_dist 770.21 1329.496 1024 224 9.21 45.39 12.11
674 coatnet_2_rw_224 763.52 670.562 512 224 15.09 49.22 73.87
675 vit_relpos_base_patch16_plus_240 763.4 1341.361 1024 240 27.3 34.33 117.38
676 efficientnet_b3_gn 763.0 671.023 512 320 2.14 28.83 11.73
677 coatnet_rmlp_2_rw_224 759.73 673.906 512 224 15.18 54.78 73.88
678 vit_small_patch16_384 753.82 1018.79 768 384 15.52 50.78 22.2
679 hrnet_w64 750.36 1364.663 1024 224 28.97 35.09 128.06
680 xception71 749.7 1024.396 768 299 18.09 69.92 42.34
681 resnet152d 742.37 1379.356 1024 320 24.08 47.67 60.21
682 swinv2_small_window8_256 741.95 1380.134 1024 256 11.58 40.14 49.73
683 mobilevitv2_175_384_in22ft1k 739.09 519.544 384 384 12.47 63.29 14.25
684 ecaresnet200d 736.17 1390.959 1024 256 20.0 43.15 64.69
685 seresnet200d 733.28 1396.444 1024 256 20.01 43.15 71.86
686 swin_s3_base_224 733.27 1396.459 1024 224 13.69 48.26 71.13
687 convit_base 731.09 1400.636 1024 224 17.52 31.77 86.54
688 resnest101e 726.65 1409.184 1024 256 13.38 28.66 48.28
689 deit3_small_patch16_384 726.49 1057.125 768 384 15.52 50.78 22.21
690 deit3_small_patch16_384_in21ft1k 726.32 1057.368 768 384 15.52 50.78 22.21
691 volo_d2_224 722.61 1417.079 1024 224 14.34 41.34 58.68
692 tnt_b_patch16_224 721.24 1419.762 1024 224 14.09 39.01 65.41
693 xcit_nano_12_p8_384_dist 720.41 1421.4 1024 384 6.34 46.08 3.05
694 swinv2_cr_base_224 719.23 1423.721 1024 224 15.86 59.66 87.88
695 poolformer_m48 719.07 1424.046 1024 224 11.59 29.17 73.47
696 coatnet_2_224 715.36 715.711 512 224 16.5 52.67 74.68
697 swinv2_cr_base_ns_224 712.96 1436.239 1024 224 15.86 59.66 87.88
698 dpn107 691.0 1481.897 1024 224 18.38 33.46 86.92
699 convnext_base 687.14 1490.219 1024 288 25.43 47.53 88.59
700 resnetv2_50x1_bitm 684.31 374.087 256 448 16.62 44.46 25.55
701 efficientnet_b3_g8_gn 664.63 770.341 512 320 3.2 28.83 14.25
702 regnety_064 657.71 1556.911 1024 288 10.56 27.11 30.58
703 regnetv_064 652.6 1569.096 1024 288 10.55 27.11 30.58
704 xcit_small_12_p8_224 651.3 1572.214 1024 224 18.69 47.21 26.21
705 xcit_small_12_p8_224_dist 651.08 1572.755 1024 224 18.69 47.21 26.21
706 resnetrs152 649.95 1575.501 1024 320 24.34 48.14 86.62
707 mobilevitv2_200_384_in22ft1k 647.42 395.4 256 384 16.24 72.34 18.45
708 seresnet152d 645.69 1585.88 1024 320 24.09 47.72 66.84
709 tresnet_l 644.38 1589.105 1024 224 10.88 11.9 55.99
710 tresnet_v2_l 642.3 1594.246 1024 224 8.81 16.34 46.17
711 nest_base 640.98 798.76 512 224 17.96 53.39 67.72
712 regnetx_120 640.37 1599.07 1024 224 12.13 21.37 46.11
713 seresnext101_32x8d 639.53 1601.159 1024 288 27.24 51.63 93.57
714 regnetz_e8 639.43 1601.423 1024 320 15.46 63.94 57.7
715 ese_vovnet99b_iabn 636.1 1609.798 1024 224 16.49 11.27 63.2
716 jx_nest_base 634.61 806.787 512 224 17.96 53.39 67.72
717 regnety_120 625.75 1636.422 1024 224 12.14 21.38 51.82
718 efficientnetv2_m 624.53 1639.618 1024 416 18.6 67.5 54.14
719 seresnext101d_32x8d 621.55 1647.466 1024 288 27.64 52.95 93.59
720 resnext101_64x4d 619.77 1652.21 1024 288 25.66 51.59 83.46
721 swsl_resnext101_32x16d 612.21 1672.624 1024 224 36.27 51.18 194.03
722 ig_resnext101_32x16d 611.98 1673.243 1024 224 36.27 51.18 194.03
723 maxvit_rmlp_small_rw_256 611.67 1255.571 768 256 14.15 66.09 64.9
724 ssl_resnext101_32x16d 611.31 1675.063 1024 224 36.27 51.18 194.03
725 regnety_320 605.31 1691.684 1024 224 32.34 30.26 145.05
726 gcvit_base 602.42 1699.782 1024 224 14.87 55.48 90.32
727 regnetz_c16_evos 596.93 857.706 512 320 3.86 25.88 13.49
728 maxxvit_rmlp_small_rw_256 590.18 1735.046 1024 256 14.67 58.38 66.01
729 legacy_senet154 585.86 1747.854 1024 224 20.77 38.69 115.09
730 senet154 585.53 1748.836 1024 224 20.77 38.69 115.09
731 seresnextaa101d_32x8d 585.08 1750.175 1024 288 28.51 56.44 93.59
732 gluon_senet154 584.86 1750.843 1024 224 20.77 38.69 115.09
733 convmixer_768_32 581.95 1759.577 1024 224 19.55 25.95 21.11
734 seresnet269d 574.5 1782.4 1024 256 26.59 53.6 113.67
735 nf_regnet_b5 565.36 905.602 512 456 11.7 61.95 49.74
736 mixer_l16_224 553.66 1849.49 1024 224 44.6 41.69 208.2
737 resnet200d 545.14 1878.401 1024 320 31.25 67.33 64.69
738 nfnet_f1 544.28 1881.353 1024 320 35.97 46.77 132.63
739 vit_large_patch32_384 543.45 1884.237 1024 384 45.31 43.86 306.63
740 efficientnetv2_rw_m 543.37 1884.512 1024 416 21.49 79.62 53.24
741 vit_medium_patch16_gap_384 539.24 949.475 512 384 26.08 67.54 39.03
742 efficientnet_b5 533.21 960.212 512 448 9.59 93.56 30.39
743 swinv2_base_window8_256 531.81 1925.495 1024 256 20.37 52.59 87.92
744 maxxvitv2_rmlp_base_rw_224 525.72 1947.791 1024 224 24.2 62.77 116.09
745 xcit_large_24_p16_224_dist 509.19 2011.039 1024 224 35.86 47.27 189.1
746 xcit_large_24_p16_224 509.15 2011.169 1024 224 35.86 47.27 189.1
747 swin_large_patch4_window7_224 504.4 1522.593 768 224 34.53 54.94 196.53
748 halonet_h1 503.39 508.543 256 256 3.0 51.17 8.1
749 volo_d3_224 502.58 2037.467 1024 224 20.78 60.09 86.33
750 swinv2_small_window16_256 488.97 1047.084 512 256 12.82 66.29 49.73
751 tresnet_xl 481.58 2126.301 1024 224 15.17 15.34 78.44
752 vit_small_patch8_224 479.11 1068.641 512 224 22.44 80.84 21.67
753 tf_efficientnet_b5 476.47 805.919 384 456 10.46 98.86 30.39
754 maxvit_rmlp_base_rw_224 472.06 2169.196 1024 224 23.15 92.64 116.14
755 resnetrs200 471.68 2170.964 1024 320 31.51 67.81 93.21
756 xcit_tiny_12_p8_384_dist 471.45 2172.002 1024 384 14.13 69.14 6.71
757 dm_nfnet_f1 461.24 2220.087 1024 320 35.97 46.77 132.63
758 tf_efficientnetv2_m 458.93 1673.426 768 480 24.76 89.84 54.14
759 xcit_small_24_p16_384_dist 457.16 2239.891 1024 384 26.72 68.58 47.67
760 coatnet_rmlp_3_rw_224 439.5 582.463 256 224 33.56 79.47 165.15
761 maxvit_base_tf_224 430.05 1190.542 512 224 24.04 95.01 119.47
762 swinv2_cr_large_224 423.86 1811.887 768 224 35.1 78.42 196.68
763 resnetv2_152x2_bit_teacher 423.36 2418.743 1024 224 46.95 45.11 236.34
764 swinv2_cr_tiny_384 423.1 907.565 384 384 15.34 161.01 28.33
765 coatnet_3_rw_224 421.95 606.701 256 224 33.44 73.83 181.81
766 resnetv2_101x1_bitm 419.35 610.453 256 448 31.65 64.93 44.54
767 coatnet_3_224 405.07 631.982 256 224 36.56 79.01 166.97
768 convnextv2_base 403.59 1268.593 512 288 25.43 47.53 88.72
769 eca_nfnet_l2 401.73 2548.946 1024 384 30.05 68.28 56.72
770 regnetz_d8_evos 394.39 1947.294 768 320 7.03 38.92 23.46
771 convmixer_1024_20_ks9_p14 393.5 2602.254 1024 224 5.55 5.51 24.38
772 eva_large_patch14_196 392.3 2610.234 1024 196 61.57 63.52 304.14
773 crossvit_15_dagger_408 390.72 655.182 256 408 21.45 95.05 28.5
774 vit_large_patch16_224 390.66 2621.182 1024 224 61.6 63.52 304.33
775 vit_base_patch16_18x2_224 384.38 2663.987 1024 224 52.51 71.38 256.73
776 deit3_large_patch16_224_in21ft1k 377.58 2711.976 1024 224 61.6 63.52 304.37
777 deit3_large_patch16_224 377.53 2712.348 1024 224 61.6 63.52 304.37
778 convnext_large 373.02 2058.836 768 288 56.87 71.29 197.77
779 beit_large_patch16_224 360.62 2839.572 1024 224 61.6 63.52 304.43
780 beitv2_large_patch16_224 360.58 2839.86 1024 224 61.6 63.52 304.43
781 swinv2_base_window12to16_192to256_22kft1k 360.56 1065.006 384 256 22.02 84.71 87.92
782 swinv2_base_window16_256 360.23 1065.959 384 256 22.02 84.71 87.92
783 regnety_160 353.5 2172.566 768 288 26.37 38.07 83.59
784 nasnetalarge 345.63 1111.004 384 331 23.89 90.56 88.75
785 maxvit_tiny_tf_384 344.01 744.157 256 384 17.53 123.42 30.98
786 xcit_small_24_p8_224 342.37 2990.915 1024 224 35.81 90.78 47.63
787 xcit_small_24_p8_224_dist 342.26 2991.817 1024 224 35.81 90.78 47.63
788 flexivit_large 335.35 3053.52 1024 240 70.99 75.39 304.36
789 maxxvitv2_rmlp_large_rw_224 332.33 3081.271 1024 224 44.14 87.15 215.42
790 vit_large_r50_s32_384 329.8 3104.921 1024 384 57.43 76.52 329.09
791 pnasnet5large 328.89 1167.534 384 331 25.04 92.89 86.06
792 tresnet_m_448 325.8 3143.01 1024 448 22.94 29.21 31.39
793 volo_d1_384 323.04 1584.906 512 384 22.75 108.55 26.78
794 volo_d4_224 318.96 3210.439 1024 224 44.34 80.22 192.96
795 xcit_medium_24_p16_384_dist 312.74 3274.268 1024 384 47.39 91.64 84.4
796 nfnet_f2 310.6 3296.869 1024 352 63.22 79.06 193.78
797 vit_base_patch16_384 307.09 1250.42 384 384 55.54 101.56 86.86
798 deit_base_patch16_384 306.8 1251.599 384 384 55.54 101.56 86.86
799 vit_base_patch16_clip_384 306.29 1253.685 384 384 55.54 101.56 86.86
800 deit_base_distilled_patch16_384 305.48 1257.017 384 384 55.65 101.82 87.63
801 ecaresnet269d 305.06 3356.684 1024 352 50.25 101.25 102.09
802 maxvit_large_tf_224 301.43 1273.908 384 224 43.68 127.35 211.79
803 deit3_base_patch16_384_in21ft1k 298.01 1288.526 384 384 55.54 101.56 86.88
804 deit3_base_patch16_384 297.88 1289.093 384 384 55.54 101.56 86.88
805 resnetrs270 296.97 3448.186 1024 352 51.13 105.48 129.86
806 regnetx_320 289.44 2653.413 768 224 31.81 36.3 107.81
807 efficientnet_b6 287.31 890.997 256 528 19.4 167.39 43.04
808 vit_large_patch14_224 286.23 3577.501 1024 224 81.08 88.79 304.2
809 vit_large_patch14_clip_224 285.99 3580.5 1024 224 81.08 88.79 304.2
810 crossvit_18_dagger_408 285.18 673.248 192 408 32.47 124.87 44.61
811 cait_xxs24_384 281.48 3637.936 1024 384 9.63 122.66 12.03
812 ig_resnext101_32x32d 275.12 1860.956 512 224 87.29 91.12 468.53
813 tf_efficientnet_b6 274.07 700.545 192 528 19.4 167.39 43.04
814 dm_nfnet_f2 264.79 2900.408 768 352 63.22 79.06 193.78
815 beit_base_patch16_384 261.27 1469.733 384 384 55.54 101.56 86.74
816 efficientnetv2_l 260.33 1966.694 512 480 56.4 157.99 118.52
817 swinv2_cr_small_384 259.75 985.56 256 384 29.7 298.03 49.7
818 tf_efficientnetv2_l 257.29 1989.923 512 480 56.4 157.99 118.52
819 resnest200e 254.36 1006.453 256 320 35.69 82.78 70.2
820 mvitv2_large 249.99 2048.061 512 224 43.87 112.02 217.99
821 xcit_tiny_24_p8_384_dist 248.25 4124.916 1024 384 27.05 132.95 12.11
822 convnext_xlarge 242.63 2110.182 512 288 100.8 95.05 350.2
823 resmlp_big_24_224_in22ft1k 241.9 4233.056 1024 224 100.23 87.31 129.14
824 resmlp_big_24_224 241.74 4235.988 1024 224 100.23 87.31 129.14
825 resmlp_big_24_distilled_224 241.44 4241.249 1024 224 100.23 87.31 129.14
826 convnextv2_large 239.52 1068.782 256 288 56.87 71.29 197.96
827 coatnet_4_224 238.62 1072.827 256 224 62.48 129.26 275.43
828 swin_base_patch4_window12_384 236.12 813.144 192 384 47.19 134.78 87.9
829 xcit_medium_24_p8_224_dist 233.5 3289.007 768 224 63.53 121.23 84.32
830 xcit_medium_24_p8_224 233.5 3289.104 768 224 63.53 121.23 84.32
831 eca_nfnet_l3 229.87 2227.284 512 448 52.55 118.4 72.04
832 vit_base_r50_s16_384 226.32 1696.687 384 384 67.43 135.03 98.95
833 maxvit_small_tf_384 224.01 857.105 192 384 35.87 183.65 69.02
834 xcit_small_12_p8_384_dist 221.54 1733.28 384 384 54.92 138.29 26.21
835 swinv2_large_window12to16_192to256_22kft1k 220.1 1163.101 256 256 47.81 121.53 196.74
836 volo_d5_224 210.88 4855.76 1024 224 72.4 118.11 295.46
837 vit_base_patch8_224 199.67 1282.079 256 224 78.22 161.69 86.58
838 cait_xs24_384 197.64 3885.811 768 384 19.28 183.98 26.67
839 resnetrs350 196.19 5219.377 1024 384 77.59 154.74 163.96
840 cait_xxs36_384 188.27 5439.03 1024 384 14.35 183.7 17.37
841 swinv2_cr_base_384 185.68 1378.725 256 384 50.57 333.68 87.88
842 coatnet_rmlp_2_rw_384 184.84 1038.746 192 384 47.69 209.43 73.88
843 swinv2_cr_huge_224 184.09 2085.934 384 224 115.97 121.08 657.83
844 convnext_xxlarge 183.68 2787.486 512 224 151.66 95.29 846.47
845 volo_d2_384 180.56 2126.753 384 384 46.17 184.51 58.87
846 xcit_large_24_p16_384_dist 176.39 5805.281 1024 384 105.35 137.17 189.1
847 regnety_640 174.81 4393.396 768 224 64.16 42.5 281.38
848 maxvit_xlarge_tf_224 171.63 1491.6 256 224 97.49 191.02 474.95
849 nfnet_f3 170.11 4514.791 768 416 115.58 141.78 254.92
850 densenet264d_iabn 167.13 6126.84 1024 224 13.47 14.0 72.74
851 efficientnet_b7 166.38 1153.975 192 600 38.33 289.94 66.35
852 maxvit_tiny_tf_512 163.72 781.809 128 512 33.49 257.59 31.05
853 efficientnetv2_xl 162.7 3146.865 512 512 93.85 247.32 208.12
854 tf_efficientnetv2_xl 161.32 3173.821 512 512 93.85 247.32 208.12
855 tf_efficientnet_b7 160.43 1196.798 192 600 38.33 289.94 66.35
856 resnetv2_152x2_bit_teacher_384 159.54 1604.579 256 384 136.16 132.56 236.34
857 tresnet_l_448 154.66 6620.743 1024 448 43.5 47.56 55.99
858 vit_huge_patch14_224 154.27 6637.58 1024 224 167.43 139.43 658.75
859 vit_huge_patch14_clip_224 154.17 6642.017 1024 224 167.4 139.41 632.05
860 maxxvitv2_rmlp_base_rw_384 153.9 1663.429 256 384 72.98 213.74 116.09
861 cait_s24_384 152.41 3359.254 512 384 32.17 245.31 47.06
862 deit3_huge_patch14_224_in21ft1k 150.05 6824.53 1024 224 167.4 139.41 632.13
863 deit3_huge_patch14_224 149.59 6845.356 1024 224 167.4 139.41 632.13
864 dm_nfnet_f3 145.48 3519.403 512 416 115.58 141.78 254.92
865 resnetrs420 142.37 5394.528 768 416 108.45 213.79 191.89
866 swin_large_patch4_window12_384 138.37 925.016 128 384 104.08 202.16 196.74
867 resnetv2_50x3_bitm 133.5 1438.189 192 448 145.7 133.37 217.32
868 maxvit_rmlp_base_rw_384 131.6 1945.285 256 384 70.97 318.95 116.14
869 xcit_large_24_p8_224_dist 131.32 3898.808 512 224 141.23 181.56 188.93
870 xcit_large_24_p8_224 131.27 3900.391 512 224 141.23 181.56 188.93
871 coatnet_5_224 130.48 1471.508 192 224 145.49 194.24 687.47
872 maxvit_base_tf_384 122.48 1567.652 192 384 73.8 332.9 119.65
873 resnest269e 119.17 2148.198 256 416 77.69 171.98 110.93
874 resnetv2_152x2_bitm 117.29 2182.534 256 448 184.99 180.43 236.34
875 xcit_small_24_p8_384_dist 116.59 3293.649 384 384 105.24 265.91 47.63
876 tresnet_xl_448 115.63 8855.938 1024 448 60.65 61.31 78.44
877 swinv2_cr_large_384 113.43 1128.479 128 384 108.95 404.96 196.68
878 maxvit_small_tf_512 106.82 1198.298 128 512 67.26 383.77 69.13
879 efficientnet_b8 106.21 1205.18 128 672 63.48 442.89 87.41
880 tf_efficientnet_b8 102.86 1244.358 128 672 63.48 442.89 87.41
881 eva_large_patch14_336 102.71 2492.371 256 336 191.1 270.24 304.53
882 vit_large_patch14_clip_336 102.52 2496.99 256 336 191.11 270.24 304.53
883 vit_large_patch16_384 102.5 2497.593 256 384 191.21 270.24 304.72
884 cait_s36_384 101.88 5025.316 512 384 47.99 367.4 68.37
885 eva_giant_patch14_224 101.84 10055.112 1024 224 267.18 192.64 1012.56
886 vit_giant_patch14_224 100.71 7625.752 768 224 267.18 192.64 1012.61
887 vit_giant_patch14_clip_224 100.43 7646.856 768 224 267.18 192.64 1012.65
888 deit3_large_patch16_384_in21ft1k 99.81 2564.809 256 384 191.21 270.24 304.76
889 deit3_large_patch16_384 99.8 2564.994 256 384 191.21 270.24 304.76
890 swinv2_base_window12to24_192to384_22kft1k 96.12 665.832 64 384 55.25 280.36 87.92
891 nfnet_f4 89.33 5731.574 512 512 216.26 262.26 316.07
892 beit_large_patch16_384 88.56 2890.58 256 384 191.21 270.24 305.0
893 maxvit_large_tf_384 86.44 1480.84 128 384 132.55 445.84 212.03
894 regnety_1280 82.49 4654.845 384 224 127.66 71.58 644.81
895 xcit_medium_24_p8_384_dist 79.96 3201.705 256 384 186.67 354.73 84.32
896 resnetv2_101x3_bitm 79.41 2417.67 192 448 280.33 194.78 387.93
897 volo_d3_448 77.64 2473.021 192 448 96.33 446.83 86.63
898 dm_nfnet_f4 77.54 4952.036 384 512 216.26 262.26 316.07
899 nfnet_f5 67.46 5691.915 384 544 290.97 349.71 377.21
900 tf_efficientnet_l2 63.66 1507.989 96 475 172.11 609.89 480.31
901 swinv2_large_window12to24_192to384_22kft1k 60.94 787.651 48 384 116.15 407.83 196.74
902 vit_gigantic_patch14_224 60.18 8507.121 512 224 483.95 275.37 1844.44
903 vit_gigantic_patch14_clip_224 60.11 8517.85 512 224 483.96 275.37 1844.91
904 volo_d4_448 57.87 3317.675 192 448 197.13 527.35 193.41
905 maxvit_base_tf_512 57.86 2212.256 128 512 138.02 703.99 119.88
906 dm_nfnet_f5 57.78 6645.368 384 544 290.97 349.71 377.21
907 vit_huge_patch14_clip_336 57.4 4460.085 256 336 390.97 407.54 632.46
908 ig_resnext101_32x48d 56.43 6804.709 384 224 153.57 131.06 828.41
909 convnextv2_huge 56.31 1704.92 96 384 337.96 232.35 660.29
910 convmixer_1536_20 55.47 18461.426 1024 224 48.68 33.03 51.63
911 swinv2_cr_giant_224 52.39 3665.046 192 224 483.85 309.15 2598.76
912 nfnet_f6 51.81 7411.574 384 576 378.69 452.2 438.36
913 maxvit_xlarge_tf_384 50.76 1891.335 96 384 292.78 668.76 475.32
914 swinv2_cr_huge_384 49.01 1305.73 64 384 352.04 583.18 657.94
915 regnety_2560 47.69 8051.463 384 224 257.07 87.48 826.14
916 xcit_large_24_p8_384_dist 44.91 4275.004 192 384 415.0 531.82 188.93
917 dm_nfnet_f6 44.62 5737.462 256 576 378.69 452.2 438.36
918 nfnet_f7 41.13 6224.782 256 608 480.39 570.85 499.5
919 maxvit_large_tf_512 41.04 1559.597 64 512 244.75 942.15 212.33
920 eva_giant_patch14_336 39.89 6418.269 256 336 620.64 550.67 1013.01
921 volo_d5_448 39.88 3209.812 128 448 315.06 737.92 295.91
922 beit_large_patch16_512 35.33 2716.953 96 512 362.24 656.39 305.67
923 cait_m36_384 32.89 7783.487 256 384 173.11 734.81 271.22
924 resnetv2_152x4_bitm 30.46 3151.929 96 480 844.84 414.26 936.53
925 volo_d5_512 27.89 4590.0 128 512 425.09 1105.37 296.09
926 maxvit_xlarge_tf_512 24.38 1968.424 48 512 534.14 1413.22 475.77
927 efficientnet_l2 23.13 1383.428 32 800 479.12 1707.39 480.31
928 swinv2_cr_giant_384 15.06 2124.735 32 384 1450.71 1394.86 2598.76
929 cait_m48_448 13.86 9235.876 128 448 329.41 1708.23 356.46
930 eva_giant_patch14_560 10.52 3043.009 32 560 1906.76 2577.17 1014.45

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -14,12 +14,12 @@ exec(open('timm/version.py').read())
setup(
name='timm',
version=__version__,
description='(Unofficial) PyTorch Image Models',
description='PyTorch Image Models',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/rwightman/pytorch-image-models',
url='https://github.com/huggingface/pytorch-image-models',
author='Ross Wightman',
author_email='hello@rwightman.com',
author_email='ross@huggingface.co',
classifiers=[
# How mature is this project? Common values are
# 3 - Alpha
@ -29,11 +29,11 @@ setup(
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development',
@ -45,7 +45,7 @@ setup(
keywords='pytorch pretrained models efficientnet mobilenetv3 mnasnet resnet vision transformer vit',
packages=find_packages(exclude=['convert', 'tests', 'results']),
include_package_data=True,
install_requires=['torch >= 1.7', 'torchvision', 'pyyaml', 'huggingface_hub'],
python_requires='>=3.6',
install_requires=['torch >= 1.7', 'torchvision', 'pyyaml', 'huggingface_hub', 'safetensors'],
python_requires='>=3.7',
)

@ -1,10 +1,7 @@
import pytest
import torch
import torch.nn as nn
import platform
import os
from timm.models.layers import create_act_layer, get_act_layer, set_layer_config
from timm.layers import create_act_layer, set_layer_config
class MLP(nn.Module):

@ -13,9 +13,8 @@ except ImportError:
has_fx_feature_extraction = False
import timm
from timm import list_models, create_model, set_scriptable, has_pretrained_cfg_key, is_pretrained_cfg_key, \
get_pretrained_cfg_value
from timm.models.fx_features import _leaf_modules, _autowrap_functions
from timm import list_models, create_model, set_scriptable, get_pretrained_cfg_value
from timm.models._features_fx import _leaf_modules, _autowrap_functions
if hasattr(torch._C, '_jit_set_profiling_executor'):
# legacy executor is too slow to compile large models for unit tests
@ -28,7 +27,7 @@ NON_STD_FILTERS = [
'vit_*', 'tnt_*', 'pit_*', 'swin_*', 'coat_*', 'cait_*', '*mixer_*', 'gmlp_*', 'resmlp_*', 'twins_*',
'convit_*', 'levit*', 'visformer*', 'deit*', 'jx_nest_*', 'nest_*', 'xcit_*', 'crossvit_*', 'beit*',
'poolformer_*', 'volo_*', 'sequencer2d_*', 'swinv2_*', 'pvt_v2*', 'mvitv2*', 'gcvit*', 'efficientformer*',
'coatnet*', 'coatnext*', 'maxvit*', 'maxxvit*',
'eva_*', 'flexivit*'
]
NUM_NON_STD = len(NON_STD_FILTERS)
@ -39,8 +38,8 @@ if 'GITHUB_ACTIONS' in os.environ:
'*efficientnet_l2*', '*resnext101_32x48d', '*in21k', '*152x4_bitm', '*101x3_bitm', '*50x3_bitm',
'*nfnet_f3*', '*nfnet_f4*', '*nfnet_f5*', '*nfnet_f6*', '*nfnet_f7*', '*efficientnetv2_xl*',
'*resnetrs350*', '*resnetrs420*', 'xcit_large_24_p8*', 'vit_huge*', 'vit_gi*', 'swin*huge*',
'swin*giant*']
NON_STD_EXCLUDE_FILTERS = ['vit_huge*', 'vit_gi*', 'swin*giant*']
'swin*giant*', 'convnextv2_huge*', 'maxvit_xlarge*', 'davit_giant', 'davit_huge']
NON_STD_EXCLUDE_FILTERS = ['vit_huge*', 'vit_gi*', 'swin*giant*', 'eva_giant*']
else:
EXCLUDE_FILTERS = []
NON_STD_EXCLUDE_FILTERS = ['vit_gi*']
@ -54,7 +53,7 @@ MAX_JIT_SIZE = 320
TARGET_FFEAT_SIZE = 96
MAX_FFEAT_SIZE = 256
TARGET_FWD_FX_SIZE = 128
MAX_FWD_FX_SIZE = 224
MAX_FWD_FX_SIZE = 256
TARGET_BWD_FX_SIZE = 128
MAX_BWD_FX_SIZE = 224
@ -130,7 +129,7 @@ def test_model_backward(model_name, batch_size):
@pytest.mark.timeout(300)
@pytest.mark.parametrize('model_name', list_models(exclude_filters=NON_STD_FILTERS))
@pytest.mark.parametrize('model_name', list_models(exclude_filters=NON_STD_FILTERS, include_tags=True))
@pytest.mark.parametrize('batch_size', [1])
def test_model_default_cfgs(model_name, batch_size):
"""Run a single forward pass with each model"""
@ -192,7 +191,7 @@ def test_model_default_cfgs(model_name, batch_size):
@pytest.mark.timeout(300)
@pytest.mark.parametrize('model_name', list_models(filter=NON_STD_FILTERS, exclude_filters=NON_STD_EXCLUDE_FILTERS))
@pytest.mark.parametrize('model_name', list_models(filter=NON_STD_FILTERS, exclude_filters=NON_STD_EXCLUDE_FILTERS, include_tags=True))
@pytest.mark.parametrize('batch_size', [1])
def test_model_default_cfgs_non_std(model_name, batch_size):
"""Run a single forward pass with each model"""
@ -270,7 +269,7 @@ if 'GITHUB_ACTIONS' not in os.environ:
EXCLUDE_JIT_FILTERS = [
'*iabn*', 'tresnet*', # models using inplace abn unlikely to ever be scriptable
'dla*', 'hrnet*', 'ghostnet*', # hopefully fix at some point
'dla*', 'hrnet*', 'ghostnet*' # hopefully fix at some point
'vit_large_*', 'vit_huge_*', 'vit_gi*',
]
@ -305,7 +304,7 @@ if 'GITHUB_ACTIONS' in os.environ: # and 'Linux' in platform.system():
@pytest.mark.timeout(120)
@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FEAT_FILTERS))
@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FEAT_FILTERS, include_tags=True))
@pytest.mark.parametrize('batch_size', [1])
def test_model_forward_features(model_name, batch_size):
"""Run a single forward pass with each model in feature extraction mode"""

@ -1,4 +1,4 @@
from .version import __version__
from .models import create_model, list_models, is_model, list_modules, model_entrypoint, \
is_scriptable, is_exportable, set_scriptable, set_exportable, has_pretrained_cfg_key, is_pretrained_cfg_key, \
get_pretrained_cfg_value, is_model_pretrained
from .layers import is_scriptable, is_exportable, set_scriptable, set_exportable
from .models import create_model, list_models, list_pretrained, is_model, list_modules, model_entrypoint, \
is_model_pretrained, get_pretrained_cfg, get_pretrained_cfg_value

@ -1,9 +1,11 @@
from .auto_augment import RandAugment, AutoAugment, rand_augment_ops, auto_augment_policy,\
rand_augment_transform, auto_augment_transform
from .config import resolve_data_config
from .config import resolve_data_config, resolve_model_data_config
from .constants import *
from .dataset import ImageDataset, IterableImageDataset, AugMixDataset
from .dataset_factory import create_dataset
from .dataset_info import DatasetInfo, CustomDatasetInfo
from .imagenet_info import ImageNetInfo, infer_imagenet_subset
from .loader import create_loader
from .mixup import Mixup, FastCollateMixup
from .readers import create_reader

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,4 +1,4 @@
""" AutoAugment, RandAugment, and AugMix for PyTorch
""" AutoAugment, RandAugment, AugMix, and 3-Augment for PyTorch
This code implements the searched ImageNet policies with various tweaks and improvements and
does not include any of the search code.
@ -9,18 +9,24 @@ AA and RA Implementation adapted from:
AugMix adapted from:
https://github.com/google-research/augmix
3-Augment based on: https://github.com/facebookresearch/deit/blob/main/README_revenge.md
Papers:
AutoAugment: Learning Augmentation Policies from Data - https://arxiv.org/abs/1805.09501
Learning Data Augmentation Strategies for Object Detection - https://arxiv.org/abs/1906.11172
RandAugment: Practical automated data augmentation... - https://arxiv.org/abs/1909.13719
AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - https://arxiv.org/abs/1912.02781
3-Augment: DeiT III: Revenge of the ViT - https://arxiv.org/abs/2204.07118
Hacked together by / Copyright 2019, Ross Wightman
"""
import random
import math
import re
from PIL import Image, ImageOps, ImageEnhance, ImageChops
from functools import partial
from typing import Dict, List, Optional, Union
from PIL import Image, ImageOps, ImageEnhance, ImageChops, ImageFilter
import PIL
import numpy as np
@ -175,6 +181,24 @@ def sharpness(img, factor, **__):
return ImageEnhance.Sharpness(img).enhance(factor)
def gaussian_blur(img, factor, **__):
img = img.filter(ImageFilter.GaussianBlur(radius=factor))
return img
def gaussian_blur_rand(img, factor, **__):
radius_min = 0.1
radius_max = 2.0
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(radius_min, radius_max * factor)))
return img
def desaturate(img, factor, **_):
factor = min(1., max(0., 1. - factor))
# enhance factor 0 = grayscale, 1.0 = no-change
return ImageEnhance.Color(img).enhance(factor)
def _randomly_negate(v):
"""With 50% prob, negate the value"""
return -v if random.random() > 0.5 else v
@ -200,6 +224,14 @@ def _enhance_increasing_level_to_arg(level, _hparams):
return level,
def _minmax_level_to_arg(level, _hparams, min_val=0., max_val=1.0, clamp=True):
level = (level / _LEVEL_DENOM)
min_val + (max_val - min_val) * level
if clamp:
level = max(min_val, min(max_val, level))
return level,
def _shear_level_to_arg(level, _hparams):
# range [-0.3, 0.3]
level = (level / _LEVEL_DENOM) * 0.3
@ -246,7 +278,7 @@ def _posterize_original_level_to_arg(level, _hparams):
def _solarize_level_to_arg(level, _hparams):
# range [0, 256]
# intensity/severity of augmentation decreases with level
return int((level / _LEVEL_DENOM) * 256),
return min(256, int((level / _LEVEL_DENOM) * 256)),
def _solarize_increasing_level_to_arg(level, _hparams):
@ -257,7 +289,7 @@ def _solarize_increasing_level_to_arg(level, _hparams):
def _solarize_add_level_to_arg(level, _hparams):
# range [0, 110]
return int((level / _LEVEL_DENOM) * 110),
return min(128, int((level / _LEVEL_DENOM) * 110)),
LEVEL_TO_ARG = {
@ -286,6 +318,9 @@ LEVEL_TO_ARG = {
'TranslateY': _translate_abs_level_to_arg,
'TranslateXRel': _translate_rel_level_to_arg,
'TranslateYRel': _translate_rel_level_to_arg,
'Desaturate': partial(_minmax_level_to_arg, min_val=0.5, max_val=1.0),
'GaussianBlur': partial(_minmax_level_to_arg, min_val=0.1, max_val=2.0),
'GaussianBlurRand': _minmax_level_to_arg,
}
@ -314,6 +349,9 @@ NAME_TO_OP = {
'TranslateY': translate_y_abs,
'TranslateXRel': translate_x_rel,
'TranslateYRel': translate_y_rel,
'Desaturate': desaturate,
'GaussianBlur': gaussian_blur,
'GaussianBlurRand': gaussian_blur_rand,
}
@ -347,6 +385,7 @@ class AugmentOp:
if self.magnitude_std > 0:
# magnitude randomization enabled
if self.magnitude_std == float('inf'):
# inf == uniform sampling
magnitude = random.uniform(0, magnitude)
elif self.magnitude_std > 0:
magnitude = random.gauss(magnitude, self.magnitude_std)
@ -499,6 +538,16 @@ def auto_augment_policy_originalr(hparams):
return pc
def auto_augment_policy_3a(hparams):
policy = [
[('Solarize', 1.0, 5)], # 128 solarize threshold @ 5 magnitude
[('Desaturate', 1.0, 10)], # grayscale at 10 magnitude
[('GaussianBlurRand', 1.0, 10)],
]
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
return pc
def auto_augment_policy(name='v0', hparams=None):
hparams = hparams or _HPARAMS_DEFAULT
if name == 'original':
@ -509,6 +558,8 @@ def auto_augment_policy(name='v0', hparams=None):
return auto_augment_policy_v0(hparams)
elif name == 'v0r':
return auto_augment_policy_v0r(hparams)
elif name == '3a':
return auto_augment_policy_3a(hparams)
else:
assert False, 'Unknown AA policy (%s)' % name
@ -534,19 +585,23 @@ class AutoAugment:
return fs
def auto_augment_transform(config_str, hparams):
def auto_augment_transform(config_str: str, hparams: Optional[Dict] = None):
"""
Create a AutoAugment transform
:param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by
dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr').
The remaining sections, not order sepecific determine
'mstd' - float std deviation of magnitude noise applied
Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5
Args:
config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by
dashes ('-').
The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr').
:param hparams: Other hparams (kwargs) for the AutoAugmentation scheme
The remaining sections:
'mstd' - float std deviation of magnitude noise applied
Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5
:return: A PyTorch compatible Transform
hparams: Other hparams (kwargs) for the AutoAugmentation scheme
Returns:
A PyTorch compatible Transform
"""
config = config_str.split('-')
policy_name = config[0]
@ -605,42 +660,80 @@ _RAND_INCREASING_TRANSFORMS = [
]
_RAND_3A = [
'SolarizeIncreasing',
'Desaturate',
'GaussianBlur',
]
_RAND_CHOICE_3A = {
'SolarizeIncreasing': 6,
'Desaturate': 6,
'GaussianBlur': 6,
'Rotate': 3,
'ShearX': 2,
'ShearY': 2,
'PosterizeIncreasing': 1,
'AutoContrast': 1,
'ColorIncreasing': 1,
'SharpnessIncreasing': 1,
'ContrastIncreasing': 1,
'BrightnessIncreasing': 1,
'Equalize': 1,
'Invert': 1,
}
# These experimental weights are based loosely on the relative improvements mentioned in paper.
# They may not result in increased performance, but could likely be tuned to so.
_RAND_CHOICE_WEIGHTS_0 = {
'Rotate': 0.3,
'ShearX': 0.2,
'ShearY': 0.2,
'TranslateXRel': 0.1,
'TranslateYRel': 0.1,
'Color': .025,
'Sharpness': 0.025,
'AutoContrast': 0.025,
'Solarize': .005,
'SolarizeAdd': .005,
'Contrast': .005,
'Brightness': .005,
'Equalize': .005,
'Posterize': 0,
'Invert': 0,
'Rotate': 3,
'ShearX': 2,
'ShearY': 2,
'TranslateXRel': 1,
'TranslateYRel': 1,
'ColorIncreasing': .25,
'SharpnessIncreasing': 0.25,
'AutoContrast': 0.25,
'SolarizeIncreasing': .05,
'SolarizeAdd': .05,
'ContrastIncreasing': .05,
'BrightnessIncreasing': .05,
'Equalize': .05,
'PosterizeIncreasing': 0.05,
'Invert': 0.05,
}
def _select_rand_weights(weight_idx=0, transforms=None):
transforms = transforms or _RAND_TRANSFORMS
assert weight_idx == 0 # only one set of weights currently
rand_weights = _RAND_CHOICE_WEIGHTS_0
probs = [rand_weights[k] for k in transforms]
probs /= np.sum(probs)
return probs
def _get_weighted_transforms(transforms: Dict):
transforms, probs = list(zip(*transforms.items()))
probs = np.array(probs)
probs = probs / np.sum(probs)
return transforms, probs
def rand_augment_choices(name: str, increasing=True):
if name == 'weights':
return _RAND_CHOICE_WEIGHTS_0
elif name == '3aw':
return _RAND_CHOICE_3A
elif name == '3a':
return _RAND_3A
else:
return _RAND_INCREASING_TRANSFORMS if increasing else _RAND_TRANSFORMS
def rand_augment_ops(magnitude=10, hparams=None, transforms=None):
def rand_augment_ops(
magnitude: Union[int, float] = 10,
prob: float = 0.5,
hparams: Optional[Dict] = None,
transforms: Optional[Union[Dict, List]] = None,
):
hparams = hparams or _HPARAMS_DEFAULT
transforms = transforms or _RAND_TRANSFORMS
return [AugmentOp(
name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms]
name, prob=prob, magnitude=magnitude, hparams=hparams) for name in transforms]
class RandAugment:
@ -652,7 +745,11 @@ class RandAugment:
def __call__(self, img):
# no replacement when using weighted choice
ops = np.random.choice(
self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights)
self.ops,
self.num_layers,
replace=self.choice_weights is None,
p=self.choice_weights,
)
for op in ops:
img = op(img)
return img
@ -665,61 +762,84 @@ class RandAugment:
return fs
def rand_augment_transform(config_str, hparams):
def rand_augment_transform(
config_str: str,
hparams: Optional[Dict] = None,
transforms: Optional[Union[str, Dict, List]] = None,
):
"""
Create a RandAugment transform
:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
sections, not order sepecific determine
'm' - integer magnitude of rand augment
'n' - integer num layers (number of transform ops selected per image)
'w' - integer probabiliy weight index (index of a set of weights to influence choice of op)
'mstd' - float std deviation of magnitude noise applied, or uniform sampling if infinity (or > 100)
'mmax' - set upper bound for magnitude to something other than default of _LEVEL_DENOM (10)
'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0)
Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5
'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2
:param hparams: Other hparams (kwargs) for the RandAugmentation scheme
:return: A PyTorch compatible Transform
Args:
config_str (str): String defining configuration of random augmentation. Consists of multiple sections separated
by dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand').
The remaining sections, not order sepecific determine
'm' - integer magnitude of rand augment
'n' - integer num layers (number of transform ops selected per image)
'p' - float probability of applying each layer (default 0.5)
'mstd' - float std deviation of magnitude noise applied, or uniform sampling if infinity (or > 100)
'mmax' - set upper bound for magnitude to something other than default of _LEVEL_DENOM (10)
'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0)
't' - str name of transform set to use
Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5
'rand-mstd1-tweights' results in mag std 1.0, weighted transforms, default mag of 10 and num_layers 2
hparams (dict): Other hparams (kwargs) for the RandAugmentation scheme
Returns:
A PyTorch compatible Transform
"""
magnitude = _LEVEL_DENOM # default to _LEVEL_DENOM for magnitude (currently 10)
num_layers = 2 # default to 2 ops per image
weight_idx = None # default to no probability weights for op choice
transforms = _RAND_TRANSFORMS
increasing = False
prob = 0.5
config = config_str.split('-')
assert config[0] == 'rand'
config = config[1:]
for c in config:
cs = re.split(r'(\d.*)', c)
if len(cs) < 2:
continue
key, val = cs[:2]
if key == 'mstd':
# noise param / randomization of magnitude values
mstd = float(val)
if mstd > 100:
# use uniform sampling in 0 to magnitude if mstd is > 100
mstd = float('inf')
hparams.setdefault('magnitude_std', mstd)
elif key == 'mmax':
# clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM]
hparams.setdefault('magnitude_max', int(val))
elif key == 'inc':
if bool(val):
transforms = _RAND_INCREASING_TRANSFORMS
elif key == 'm':
magnitude = int(val)
elif key == 'n':
num_layers = int(val)
elif key == 'w':
weight_idx = int(val)
if c.startswith('t'):
# NOTE old 'w' key was removed, 'w0' is not equivalent to 'tweights'
val = str(c[1:])
if transforms is None:
transforms = val
else:
assert False, 'Unknown RandAugment config section'
ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)
# numeric options
cs = re.split(r'(\d.*)', c)
if len(cs) < 2:
continue
key, val = cs[:2]
if key == 'mstd':
# noise param / randomization of magnitude values
mstd = float(val)
if mstd > 100:
# use uniform sampling in 0 to magnitude if mstd is > 100
mstd = float('inf')
hparams.setdefault('magnitude_std', mstd)
elif key == 'mmax':
# clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM]
hparams.setdefault('magnitude_max', int(val))
elif key == 'inc':
if bool(val):
increasing = True
elif key == 'm':
magnitude = int(val)
elif key == 'n':
num_layers = int(val)
elif key == 'p':
prob = float(val)
else:
assert False, 'Unknown RandAugment config section'
if isinstance(transforms, str):
transforms = rand_augment_choices(transforms, increasing=increasing)
elif transforms is None:
transforms = _RAND_INCREASING_TRANSFORMS if increasing else _RAND_TRANSFORMS
choice_weights = None
if isinstance(transforms, Dict):
transforms, choice_weights = _get_weighted_transforms(transforms)
ra_ops = rand_augment_ops(magnitude=magnitude, prob=prob, hparams=hparams, transforms=transforms)
return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)
@ -740,11 +860,19 @@ _AUGMIX_TRANSFORMS = [
]
def augmix_ops(magnitude=10, hparams=None, transforms=None):
def augmix_ops(
magnitude: Union[int, float] = 10,
hparams: Optional[Dict] = None,
transforms: Optional[Union[str, Dict, List]] = None,
):
hparams = hparams or _HPARAMS_DEFAULT
transforms = transforms or _AUGMIX_TRANSFORMS
return [AugmentOp(
name, prob=1.0, magnitude=magnitude, hparams=hparams) for name in transforms]
name,
prob=1.0,
magnitude=magnitude,
hparams=hparams
) for name in transforms]
class AugMixAugment:
@ -820,22 +948,24 @@ class AugMixAugment:
return fs
def augment_and_mix_transform(config_str, hparams):
def augment_and_mix_transform(config_str: str, hparams: Optional[Dict] = None):
""" Create AugMix PyTorch transform
:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
sections, not order sepecific determine
'm' - integer magnitude (severity) of augmentation mix (default: 3)
'w' - integer width of augmentation chain (default: 3)
'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0)
'mstd' - float std deviation of magnitude noise applied (default: 0)
Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2
:param hparams: Other hparams (kwargs) for the Augmentation transforms
:return: A PyTorch compatible Transform
Args:
config_str (str): String defining configuration of random augmentation. Consists of multiple sections separated
by dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand').
The remaining sections, not order sepecific determine
'm' - integer magnitude (severity) of augmentation mix (default: 3)
'w' - integer width of augmentation chain (default: 3)
'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0)
'mstd' - float std deviation of magnitude noise applied (default: 0)
Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2
hparams: Other hparams (kwargs) for the Augmentation transforms
Returns:
A PyTorch compatible Transform
"""
magnitude = 3
width = 3

@ -5,78 +5,123 @@ from .constants import *
_logger = logging.getLogger(__name__)
def resolve_data_config(args, default_cfg={}, model=None, use_test_size=False, verbose=False):
new_config = {}
default_cfg = default_cfg
if not default_cfg and model is not None and hasattr(model, 'default_cfg'):
default_cfg = model.default_cfg
def resolve_data_config(
args=None,
pretrained_cfg=None,
model=None,
use_test_size=False,
verbose=False
):
assert model or args or pretrained_cfg, "At least one of model, args, or pretrained_cfg required for data config."
args = args or {}
pretrained_cfg = pretrained_cfg or {}
if not pretrained_cfg and model is not None and hasattr(model, 'pretrained_cfg'):
pretrained_cfg = model.pretrained_cfg
data_config = {}
# Resolve input/image size
in_chans = 3
if 'chans' in args and args['chans'] is not None:
if args.get('chans', None) is not None:
in_chans = args['chans']
input_size = (in_chans, 224, 224)
if 'input_size' in args and args['input_size'] is not None:
if args.get('input_size', None) is not None:
assert isinstance(args['input_size'], (tuple, list))
assert len(args['input_size']) == 3
input_size = tuple(args['input_size'])
in_chans = input_size[0] # input_size overrides in_chans
elif 'img_size' in args and args['img_size'] is not None:
elif args.get('img_size', None) is not None:
assert isinstance(args['img_size'], int)
input_size = (in_chans, args['img_size'], args['img_size'])
else:
if use_test_size and 'test_input_size' in default_cfg:
input_size = default_cfg['test_input_size']
elif 'input_size' in default_cfg:
input_size = default_cfg['input_size']
new_config['input_size'] = input_size
if use_test_size and pretrained_cfg.get('test_input_size', None) is not None:
input_size = pretrained_cfg['test_input_size']
elif pretrained_cfg.get('input_size', None) is not None:
input_size = pretrained_cfg['input_size']
data_config['input_size'] = input_size
# resolve interpolation method
new_config['interpolation'] = 'bicubic'
if 'interpolation' in args and args['interpolation']:
new_config['interpolation'] = args['interpolation']
elif 'interpolation' in default_cfg:
new_config['interpolation'] = default_cfg['interpolation']
data_config['interpolation'] = 'bicubic'
if args.get('interpolation', None):
data_config['interpolation'] = args['interpolation']
elif pretrained_cfg.get('interpolation', None):
data_config['interpolation'] = pretrained_cfg['interpolation']
# resolve dataset + model mean for normalization
new_config['mean'] = IMAGENET_DEFAULT_MEAN
if 'mean' in args and args['mean'] is not None:
data_config['mean'] = IMAGENET_DEFAULT_MEAN
if args.get('mean', None) is not None:
mean = tuple(args['mean'])
if len(mean) == 1:
mean = tuple(list(mean) * in_chans)
else:
assert len(mean) == in_chans
new_config['mean'] = mean
elif 'mean' in default_cfg:
new_config['mean'] = default_cfg['mean']
data_config['mean'] = mean
elif pretrained_cfg.get('mean', None):
data_config['mean'] = pretrained_cfg['mean']
# resolve dataset + model std deviation for normalization
new_config['std'] = IMAGENET_DEFAULT_STD
if 'std' in args and args['std'] is not None:
data_config['std'] = IMAGENET_DEFAULT_STD
if args.get('std', None) is not None:
std = tuple(args['std'])
if len(std) == 1:
std = tuple(list(std) * in_chans)
else:
assert len(std) == in_chans
new_config['std'] = std
elif 'std' in default_cfg:
new_config['std'] = default_cfg['std']
data_config['std'] = std
elif pretrained_cfg.get('std', None):
data_config['std'] = pretrained_cfg['std']
# resolve default crop percentage
# resolve default inference crop
crop_pct = DEFAULT_CROP_PCT
if 'crop_pct' in args and args['crop_pct'] is not None:
if args.get('crop_pct', None):
crop_pct = args['crop_pct']
else:
if use_test_size and 'test_crop_pct' in default_cfg:
crop_pct = default_cfg['test_crop_pct']
elif 'crop_pct' in default_cfg:
crop_pct = default_cfg['crop_pct']
new_config['crop_pct'] = crop_pct
if use_test_size and pretrained_cfg.get('test_crop_pct', None):
crop_pct = pretrained_cfg['test_crop_pct']
elif pretrained_cfg.get('crop_pct', None):
crop_pct = pretrained_cfg['crop_pct']
data_config['crop_pct'] = crop_pct
# resolve default crop percentage
crop_mode = DEFAULT_CROP_MODE
if args.get('crop_mode', None):
crop_mode = args['crop_mode']
elif pretrained_cfg.get('crop_mode', None):
crop_mode = pretrained_cfg['crop_mode']
data_config['crop_mode'] = crop_mode
if verbose:
_logger.info('Data processing configuration for current model + dataset:')
for n, v in new_config.items():
for n, v in data_config.items():
_logger.info('\t%s: %s' % (n, str(v)))
return new_config
return data_config
def resolve_model_data_config(
model,
args=None,
pretrained_cfg=None,
use_test_size=False,
verbose=False,
):
""" Resolve Model Data Config
This is equivalent to resolve_data_config() but with arguments re-ordered to put model first.
Args:
model (nn.Module): the model instance
args (dict): command line arguments / configuration in dict form (overrides pretrained_cfg)
pretrained_cfg (dict): pretrained model config (overrides pretrained_cfg attached to model)
use_test_size (bool): use the test time input resolution (if one exists) instead of default train resolution
verbose (bool): enable extra logging of resolved values
Returns:
dictionary of config
"""
return resolve_data_config(
args=args,
pretrained_cfg=pretrained_cfg,
model=model,
use_test_size=use_test_size,
verbose=verbose,
)

@ -1,4 +1,5 @@
DEFAULT_CROP_PCT = 0.875
DEFAULT_CROP_MODE = 'center'
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)

@ -151,7 +151,7 @@ def create_dataset(
elif name.startswith('hfds/'):
# NOTE right now, HF datasets default arrow format is a random-access Dataset,
# There will be a IterableDataset variant too, TBD
ds = ImageDataset(root, reader=name, split=split, **kwargs)
ds = ImageDataset(root, reader=name, split=split, class_map=class_map, **kwargs)
elif name.startswith('tfds/'):
ds = IterableImageDataset(
root,

@ -0,0 +1,73 @@
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Union
class DatasetInfo(ABC):
def __init__(self):
pass
@abstractmethod
def num_classes(self):
pass
@abstractmethod
def label_names(self):
pass
@abstractmethod
def label_descriptions(self, detailed: bool = False, as_dict: bool = False) -> Union[List[str], Dict[str, str]]:
pass
@abstractmethod
def index_to_label_name(self, index) -> str:
pass
@abstractmethod
def index_to_description(self, index: int, detailed: bool = False) -> str:
pass
@abstractmethod
def label_name_to_description(self, label: str, detailed: bool = False) -> str:
pass
class CustomDatasetInfo(DatasetInfo):
""" DatasetInfo that wraps passed values for custom datasets."""
def __init__(
self,
label_names: Union[List[str], Dict[int, str]],
label_descriptions: Optional[Dict[str, str]] = None
):
super().__init__()
assert len(label_names) > 0
self._label_names = label_names # label index => label name mapping
self._label_descriptions = label_descriptions # label name => label description mapping
if self._label_descriptions is not None:
# validate descriptions (label names required)
assert isinstance(self._label_descriptions, dict)
for n in self._label_names:
assert n in self._label_descriptions
def num_classes(self):
return len(self._label_names)
def label_names(self):
return self._label_names
def label_descriptions(self, detailed: bool = False, as_dict: bool = False) -> Union[List[str], Dict[str, str]]:
return self._label_descriptions
def label_name_to_description(self, label: str, detailed: bool = False) -> str:
if self._label_descriptions:
return self._label_descriptions[label]
return label # return label name itself if a descriptions is not present
def index_to_label_name(self, index) -> str:
assert 0 <= index < len(self._label_names)
return self._label_names[index]
def index_to_description(self, index: int, detailed: bool = False) -> str:
label = self.index_to_label_name(index)
return self.label_name_to_description(label, detailed=detailed)

@ -0,0 +1,92 @@
import csv
import os
import pkgutil
import re
from typing import Dict, List, Optional, Union
from .dataset_info import DatasetInfo
_NUM_CLASSES_TO_SUBSET = {
1000: 'imagenet-1k',
11821: 'imagenet-12k',
21841: 'imagenet-22k',
21843: 'imagenet-21k-goog',
11221: 'imagenet-21k-miil',
}
_SUBSETS = {
'imagenet1k': 'imagenet_synsets.txt',
'imagenet12k': 'imagenet12k_synsets.txt',
'imagenet22k': 'imagenet22k_synsets.txt',
'imagenet21k': 'imagenet21k_goog_synsets.txt',
'imagenet21kgoog': 'imagenet21k_goog_synsets.txt',
'imagenet21kmiil': 'imagenet21k_miil_synsets.txt',
}
_LEMMA_FILE = 'imagenet_synset_to_lemma.txt'
_DEFINITION_FILE = 'imagenet_synset_to_definition.txt'
def infer_imagenet_subset(model_or_cfg) -> Optional[str]:
if isinstance(model_or_cfg, dict):
num_classes = model_or_cfg.get('num_classes', None)
else:
num_classes = getattr(model_or_cfg, 'num_classes', None)
if not num_classes:
pretrained_cfg = getattr(model_or_cfg, 'pretrained_cfg', {})
# FIXME at some point pretrained_cfg should include dataset-tag,
# which will be more robust than a guess based on num_classes
num_classes = pretrained_cfg.get('num_classes', None)
if not num_classes or num_classes not in _NUM_CLASSES_TO_SUBSET:
return None
return _NUM_CLASSES_TO_SUBSET[num_classes]
class ImageNetInfo(DatasetInfo):
def __init__(self, subset: str = 'imagenet-1k'):
super().__init__()
subset = re.sub(r'[-_\s]', '', subset.lower())
assert subset in _SUBSETS, f'Unknown imagenet subset {subset}.'
# WordNet synsets (part-of-speach + offset) are the unique class label names for ImageNet classifiers
synset_file = _SUBSETS[subset]
synset_data = pkgutil.get_data(__name__, os.path.join('_info', synset_file))
self._synsets = synset_data.decode('utf-8').splitlines()
# WordNet lemmas (canonical dictionary form of word) and definitions are used to build
# the class descriptions. If detailed=True both are used, otherwise just the lemmas.
lemma_data = pkgutil.get_data(__name__, os.path.join('_info', _LEMMA_FILE))
reader = csv.reader(lemma_data.decode('utf-8').splitlines(), delimiter='\t')
self._lemmas = dict(reader)
definition_data = pkgutil.get_data(__name__, os.path.join('_info', _DEFINITION_FILE))
reader = csv.reader(definition_data.decode('utf-8').splitlines(), delimiter='\t')
self._definitions = dict(reader)
def num_classes(self):
return len(self._synsets)
def label_names(self):
return self._synsets
def label_descriptions(self, detailed: bool = False, as_dict: bool = False) -> Union[List[str], Dict[str, str]]:
if as_dict:
return {label: self.label_name_to_description(label, detailed=detailed) for label in self._synsets}
else:
return [self.label_name_to_description(label, detailed=detailed) for label in self._synsets]
def index_to_label_name(self, index) -> str:
assert 0 <= index < len(self._synsets), \
f'Index ({index}) out of range for dataset with {len(self._synsets)} classes.'
return self._synsets[index]
def index_to_description(self, index: int, detailed: bool = False) -> str:
label = self.index_to_label_name(index)
return self.label_name_to_description(label, detailed=detailed)
def label_name_to_description(self, label: str, detailed: bool = False) -> str:
if detailed:
description = f'{self._lemmas[label]}: {self._definitions[label]}'
else:
description = f'{self._lemmas[label]}'
return description

@ -211,6 +211,7 @@ def create_loader(
num_workers=1,
distributed=False,
crop_pct=None,
crop_mode=None,
collate_fn=None,
pin_memory=False,
fp16=False, # deprecated, use img_dtype
@ -240,6 +241,7 @@ def create_loader(
mean=mean,
std=std,
crop_pct=crop_pct,
crop_mode=crop_mode,
tf_preprocessing=tf_preprocessing,
re_prob=re_prob,
re_mode=re_mode,

@ -1,6 +1,7 @@
import os
import pickle
def load_class_map(map_or_filename, root=''):
if isinstance(map_or_filename, dict):
assert dict, 'class_map dict must be non-empty'
@ -14,7 +15,7 @@ def load_class_map(map_or_filename, root=''):
with open(class_map_path) as f:
class_to_idx = {v.strip(): k for k, v in enumerate(f)}
elif class_map_ext == '.pkl':
with open(class_map_path,'rb') as f:
with open(class_map_path, 'rb') as f:
class_to_idx = pickle.load(f)
else:
assert False, f'Unsupported class map file extension ({class_map_ext}).'

@ -6,7 +6,7 @@ from .reader_image_in_tar import ReaderImageInTar
def create_reader(name, root, split='train', **kwargs):
name = name.lower()
name = name.split('/', 2)
name = name.split('/', 1)
prefix = ''
if len(name) > 1:
prefix = name[0]

@ -13,13 +13,14 @@ try:
except ImportError as e:
print("Please install Hugging Face datasets package `pip install datasets`.")
exit(1)
from .class_map import load_class_map
from .reader import Reader
def get_class_labels(info):
def get_class_labels(info, label_key='label'):
if 'label' not in info.features:
return {}
class_label = info.features['label']
class_label = info.features[label_key]
class_to_idx = {n: class_label.str2int(n) for n in class_label.names}
return class_to_idx
@ -32,6 +33,7 @@ class ReaderHfds(Reader):
name,
split='train',
class_map=None,
label_key='label',
download=False,
):
"""
@ -43,12 +45,17 @@ class ReaderHfds(Reader):
name, # 'name' maps to path arg in hf datasets
split=split,
cache_dir=self.root, # timm doesn't expect hidden cache dir for datasets, specify a path
#use_auth_token=True,
)
# leave decode for caller, plus we want easy access to original path names...
self.dataset = self.dataset.cast_column('image', datasets.Image(decode=False))
self.class_to_idx = get_class_labels(self.dataset.info)
self.label_key = label_key
self.remap_class = False
if class_map:
self.class_to_idx = load_class_map(class_map)
self.remap_class = True
else:
self.class_to_idx = get_class_labels(self.dataset.info, self.label_key)
self.split_info = self.dataset.info.splits[split]
self.num_samples = self.split_info.num_examples
@ -60,7 +67,10 @@ class ReaderHfds(Reader):
else:
assert 'path' in image and image['path']
image = open(image['path'], 'rb')
return image, item['label']
label = item[self.label_key]
if self.remap_class:
label = self.class_to_idx[label]
return image, label
def __len__(self):
return len(self.dataset)

@ -43,6 +43,15 @@ SHUFFLE_SIZE = int(os.environ.get('TFDS_SHUFFLE_SIZE', 8192)) # samples to shuf
PREFETCH_SIZE = int(os.environ.get('TFDS_PREFETCH_SIZE', 2048)) # samples to prefetch
@tfds.decode.make_decoder()
def decode_example(serialized_image, feature, dct_method='INTEGER_ACCURATE'):
return tf.image.decode_jpeg(
serialized_image,
channels=3,
dct_method=dct_method,
)
def even_split_indices(split, n, num_samples):
partitions = [round(i * num_samples / n) for i in range(n + 1)]
return [f"{split}[{partitions[i]}:{partitions[i + 1]}]" for i in range(n)]
@ -242,6 +251,7 @@ class ReaderTfds(Reader):
ds = self.builder.as_dataset(
split=self.subsplit or self.split,
shuffle_files=self.is_training,
decoders=dict(image=decode_example()),
read_config=read_config,
)
# avoid overloading threading w/ combo of TF ds threads + PyTorch workers

@ -7,14 +7,19 @@ Hacked together by / Copyright 2020 Ross Wightman
import os
import json
import numpy as np
import pkgutil
class RealLabelsImagenet:
def __init__(self, filenames, real_json='real.json', topk=(1, 5)):
with open(real_json) as real_labels:
real_labels = json.load(real_labels)
real_labels = {f'ILSVRC2012_val_{i + 1:08d}.JPEG': labels for i, labels in enumerate(real_labels)}
def __init__(self, filenames, real_json=None, topk=(1, 5)):
if real_json is not None:
with open(real_json) as real_labels:
real_labels = json.load(real_labels)
else:
real_labels = json.loads(
pkgutil.get_data(__name__, os.path.join('_info', 'imagenet_real_labels.json')).decode('utf-8'))
real_labels = {f'ILSVRC2012_val_{i + 1:08d}.JPEG': labels for i, labels in enumerate(real_labels)}
self.real_labels = real_labels
self.filenames = filenames
assert len(self.filenames) == len(self.real_labels)

@ -22,12 +22,13 @@ Hacked together by / Copyright 2020 Ross Wightman
# limitations under the License.
# ==============================================================================
"""ImageNet preprocessing for MnasNet."""
import tensorflow as tf
import tensorflow.compat.v1 as tf
import numpy as np
IMAGE_SIZE = 224
CROP_PADDING = 32
tf.compat.v1.disable_eager_execution()
def distorted_bounding_box_crop(image_bytes,
bbox,

@ -1,3 +1,9 @@
import math
import numbers
import random
import warnings
from typing import List, Sequence
import torch
import torchvision.transforms.functional as F
try:
@ -6,9 +12,6 @@ try:
except ImportError:
has_interpolation_mode = False
from PIL import Image
import warnings
import math
import random
import numpy as np
@ -96,6 +99,19 @@ def interp_mode_to_str(mode):
_RANDOM_INTERPOLATION = (str_to_interp_mode('bilinear'), str_to_interp_mode('bicubic'))
def _setup_size(size, error_msg):
if isinstance(size, numbers.Number):
return int(size), int(size)
if isinstance(size, Sequence) and len(size) == 1:
return size[0], size[0]
if len(size) != 2:
raise ValueError(error_msg)
return size
class RandomResizedCropAndInterpolation:
"""Crop the given PIL Image to random size and aspect ratio with random interpolation.
@ -195,3 +211,132 @@ class RandomResizedCropAndInterpolation:
format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
format_string += ', interpolation={0})'.format(interpolate_str)
return format_string
def center_crop_or_pad(img: torch.Tensor, output_size: List[int], fill=0) -> torch.Tensor:
"""Center crops and/or pads the given image.
If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
Args:
img (PIL Image or Tensor): Image to be cropped.
output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int,
it is used for both directions.
fill (int, Tuple[int]): Padding color
Returns:
PIL Image or Tensor: Cropped image.
"""
if isinstance(output_size, numbers.Number):
output_size = (int(output_size), int(output_size))
elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
output_size = (output_size[0], output_size[0])
_, image_height, image_width = F.get_dimensions(img)
crop_height, crop_width = output_size
if crop_width > image_width or crop_height > image_height:
padding_ltrb = [
(crop_width - image_width) // 2 if crop_width > image_width else 0,
(crop_height - image_height) // 2 if crop_height > image_height else 0,
(crop_width - image_width + 1) // 2 if crop_width > image_width else 0,
(crop_height - image_height + 1) // 2 if crop_height > image_height else 0,
]
img = F.pad(img, padding_ltrb, fill=fill)
_, image_height, image_width = F.get_dimensions(img)
if crop_width == image_width and crop_height == image_height:
return img
crop_top = int(round((image_height - crop_height) / 2.0))
crop_left = int(round((image_width - crop_width) / 2.0))
return F.crop(img, crop_top, crop_left, crop_height, crop_width)
class CenterCropOrPad(torch.nn.Module):
"""Crops the given image at the center.
If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
"""
def __init__(self, size, fill=0):
super().__init__()
self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
self.fill = fill
def forward(self, img):
"""
Args:
img (PIL Image or Tensor): Image to be cropped.
Returns:
PIL Image or Tensor: Cropped image.
"""
return center_crop_or_pad(img, self.size, fill=self.fill)
def __repr__(self) -> str:
return f"{self.__class__.__name__}(size={self.size})"
class ResizeKeepRatio:
""" Resize and Keep Ratio
"""
def __init__(
self,
size,
longest=0.,
interpolation='bilinear',
fill=0,
):
if isinstance(size, (list, tuple)):
self.size = tuple(size)
else:
self.size = (size, size)
self.interpolation = str_to_interp_mode(interpolation)
self.longest = float(longest)
self.fill = fill
@staticmethod
def get_params(img, target_size, longest):
"""Get parameters
Args:
img (PIL Image): Image to be cropped.
target_size (Tuple[int, int]): Size of output
Returns:
tuple: params (h, w) and (l, r, t, b) to be passed to ``resize`` and ``pad`` respectively
"""
source_size = img.size[::-1] # h, w
h, w = source_size
target_h, target_w = target_size
ratio_h = h / target_h
ratio_w = w / target_w
ratio = max(ratio_h, ratio_w) * longest + min(ratio_h, ratio_w) * (1. - longest)
size = [round(x / ratio) for x in source_size]
return size
def __call__(self, img):
"""
Args:
img (PIL Image): Image to be cropped and resized.
Returns:
PIL Image: Resized, padded to at least target size, possibly cropped to exactly target size
"""
size = self.get_params(img, self.size, self.longest)
img = F.resize(img, size, self.interpolation)
return img
def __repr__(self):
interpolate_str = interp_mode_to_str(self.interpolation)
format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
format_string += f', interpolation={interpolate_str})'
format_string += f', longest={self.longest:.3f})'
return format_string

@ -10,7 +10,8 @@ from torchvision import transforms
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, DEFAULT_CROP_PCT
from timm.data.auto_augment import rand_augment_transform, augment_and_mix_transform, auto_augment_transform
from timm.data.transforms import str_to_interp_mode, str_to_pil_interp, RandomResizedCropAndInterpolation, ToNumpy
from timm.data.transforms import str_to_interp_mode, str_to_pil_interp, RandomResizedCropAndInterpolation,\
ResizeKeepRatio, CenterCropOrPad, ToNumpy
from timm.data.random_erasing import RandomErasing
@ -58,6 +59,7 @@ def transforms_imagenet_train(
re_count=1,
re_num_splits=0,
separate=False,
force_color_jitter=False,
):
"""
If separate==True, the transforms are returned as a tuple of 3 separate transforms
@ -76,8 +78,12 @@ def transforms_imagenet_train(
primary_tfl += [transforms.RandomVerticalFlip(p=vflip)]
secondary_tfl = []
disable_color_jitter = False
if auto_augment:
assert isinstance(auto_augment, str)
# color jitter is typically disabled if AA/RA on,
# this allows override without breaking old hparm cfgs
disable_color_jitter = not (force_color_jitter or '3a' in auto_augment)
if isinstance(img_size, (tuple, list)):
img_size_min = min(img_size)
else:
@ -95,8 +101,9 @@ def transforms_imagenet_train(
secondary_tfl += [augment_and_mix_transform(auto_augment, aa_params)]
else:
secondary_tfl += [auto_augment_transform(auto_augment, aa_params)]
elif color_jitter is not None:
# color jitter is enabled when not using AA
if color_jitter is not None and not disable_color_jitter:
# color jitter is enabled when not using AA or when forced
if isinstance(color_jitter, (list, tuple)):
# color jitter should be a 3-tuple/list if spec brightness/contrast/saturation
# or 4 if also augmenting hue
@ -130,26 +137,49 @@ def transforms_imagenet_train(
def transforms_imagenet_eval(
img_size=224,
crop_pct=None,
crop_mode=None,
interpolation='bilinear',
use_prefetcher=False,
mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD):
std=IMAGENET_DEFAULT_STD
):
crop_pct = crop_pct or DEFAULT_CROP_PCT
if isinstance(img_size, (tuple, list)):
assert len(img_size) == 2
if img_size[-1] == img_size[-2]:
# fall-back to older behaviour so Resize scales to shortest edge if target is square
scale_size = int(math.floor(img_size[0] / crop_pct))
else:
scale_size = tuple([int(x / crop_pct) for x in img_size])
scale_size = tuple([math.floor(x / crop_pct) for x in img_size])
else:
scale_size = int(math.floor(img_size / crop_pct))
scale_size = math.floor(img_size / crop_pct)
scale_size = (scale_size, scale_size)
if crop_mode == 'squash':
# squash mode scales each edge to 1/pct of target, then crops
# aspect ratio is not preserved, no img lost if crop_pct == 1.0
tfl = [
transforms.Resize(scale_size, interpolation=str_to_interp_mode(interpolation)),
transforms.CenterCrop(img_size),
]
elif crop_mode == 'border':
# scale the longest edge of image to 1/pct of target edge, add borders to pad, then crop
# no image lost if crop_pct == 1.0
fill = [round(255 * v) for v in mean]
tfl = [
ResizeKeepRatio(scale_size, interpolation=interpolation, longest=1.0),
CenterCropOrPad(img_size, fill=fill),
]
else:
# default crop model is center
# aspect ratio is preserved, crops center within image, no borders are added, image is lost
if scale_size[0] == scale_size[1]:
# simple case, use torchvision built-in Resize w/ shortest edge mode (scalar size arg)
tfl = [
transforms.Resize(scale_size[0], interpolation=str_to_interp_mode(interpolation))
]
else:
# resize shortest edge to matching target dim for non-square target
tfl = [ResizeKeepRatio(scale_size)]
tfl += [transforms.CenterCrop(img_size)]
tfl = [
transforms.Resize(scale_size, interpolation=str_to_interp_mode(interpolation)),
transforms.CenterCrop(img_size),
]
if use_prefetcher:
# prefetcher and collate will handle tensor conversion and norm
tfl += [ToNumpy()]
@ -157,8 +187,9 @@ def transforms_imagenet_eval(
tfl += [
transforms.ToTensor(),
transforms.Normalize(
mean=torch.tensor(mean),
std=torch.tensor(std))
mean=torch.tensor(mean),
std=torch.tensor(std),
)
]
return transforms.Compose(tfl)
@ -183,6 +214,7 @@ def create_transform(
re_count=1,
re_num_splits=0,
crop_pct=None,
crop_mode=None,
tf_preprocessing=False,
separate=False):
@ -204,7 +236,8 @@ def create_transform(
interpolation=interpolation,
use_prefetcher=use_prefetcher,
mean=mean,
std=std)
std=std,
)
elif is_training:
transform = transforms_imagenet_train(
img_size,
@ -222,7 +255,8 @@ def create_transform(
re_mode=re_mode,
re_count=re_count,
re_num_splits=re_num_splits,
separate=separate)
separate=separate,
)
else:
assert not separate, "Separate transforms not supported for validation preprocessing"
transform = transforms_imagenet_eval(
@ -231,6 +265,8 @@ def create_transform(
use_prefetcher=use_prefetcher,
mean=mean,
std=std,
crop_pct=crop_pct)
crop_pct=crop_pct,
crop_mode=crop_mode,
)
return transform

@ -0,0 +1,50 @@
from .activations import *
from .adaptive_avgmax_pool import \
adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d
from .attention_pool2d import AttentionPool2d, RotAttentionPool2d, RotaryEmbedding
from .blur_pool import BlurPool2d
from .classifier import ClassifierHead, create_classifier, NormMlpClassifierHead
from .cond_conv2d import CondConv2d, get_condconv_initializer
from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\
set_layer_config
from .conv2d_same import Conv2dSame, conv2d_same
from .conv_bn_act import ConvNormAct, ConvNormActAa, ConvBnAct
from .create_act import create_act_layer, get_act_layer, get_act_fn
from .create_attn import get_attn, create_attn
from .create_conv2d import create_conv2d
from .create_norm import get_norm_layer, create_norm_layer
from .create_norm_act import get_norm_act_layer, create_norm_act_layer, get_norm_act_layer
from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path
from .eca import EcaModule, CecaModule, EfficientChannelAttn, CircularEfficientChannelAttn
from .evo_norm import EvoNorm2dB0, EvoNorm2dB1, EvoNorm2dB2,\
EvoNorm2dS0, EvoNorm2dS0a, EvoNorm2dS1, EvoNorm2dS1a, EvoNorm2dS2, EvoNorm2dS2a
from .fast_norm import is_fast_norm, set_fast_norm, fast_group_norm, fast_layer_norm
from .filter_response_norm import FilterResponseNormTlu2d, FilterResponseNormAct2d
from .gather_excite import GatherExcite
from .global_context import GlobalContext
from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible, extend_tuple
from .inplace_abn import InplaceAbn
from .linear import Linear
from .mixed_conv2d import MixedConv2d
from .mlp import Mlp, GluMlp, GatedMlp, ConvMlp, GlobalResponseNormMlp
from .non_local_attn import NonLocalAttn, BatNonLocalAttn
from .norm import GroupNorm, GroupNorm1, LayerNorm, LayerNorm2d, RmsNorm
from .norm_act import BatchNormAct2d, GroupNormAct, GroupNorm1Act, LayerNormAct, LayerNormAct2d,\
SyncBatchNormAct, convert_sync_batchnorm, FrozenBatchNormAct2d, freeze_batch_norm_2d, unfreeze_batch_norm_2d
from .padding import get_padding, get_same_padding, pad_same
from .patch_embed import PatchEmbed, resample_patch_embed
from .pool2d_same import AvgPool2dSame, create_pool2d
from .pos_embed import resample_abs_pos_embed
from .pos_embed_rel import RelPosMlp, RelPosBias, RelPosBiasTf, gen_relative_position_index, gen_relative_log_coords
from .pos_embed_sincos import build_sincos2d_pos_embed, build_fourier_pos_embed, build_rotary_pos_embed, \
FourierEmbed, RotaryEmbedding
from .squeeze_excite import SEModule, SqueezeExcite, EffectiveSEModule, EffectiveSqueezeExcite
from .selective_kernel import SelectiveKernel
from .separable_conv import SeparableConv2d, SeparableConvNormAct
from .space_to_depth import SpaceToDepthModule
from .split_attn import SplitAttn
from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model
from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame
from .test_time_pool import TestTimePoolHead, apply_test_time_pool
from .trace_utils import _assert, _float_to_int
from .weight_init import trunc_normal_, trunc_normal_tf_, variance_scaling_, lecun_normal_

@ -143,3 +143,17 @@ class GELU(nn.Module):
def forward(self, input: torch.Tensor) -> torch.Tensor:
return F.gelu(input)
def gelu_tanh(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:
return F.gelu(x, approximate='tanh')
class GELUTanh(nn.Module):
"""Applies the Gaussian Error Linear Units function (w/ dummy inplace arg)
"""
def __init__(self, inplace: bool = False):
super(GELUTanh, self).__init__()
def forward(self, input: torch.Tensor) -> torch.Tensor:
return F.gelu(input, approximate='tanh')

@ -13,7 +13,7 @@ import torch
import torch.nn as nn
from .helpers import to_2tuple
from .pos_embed import apply_rot_embed, RotaryEmbedding
from .pos_embed_sincos import apply_rot_embed, RotaryEmbedding
from .weight_init import trunc_normal_

@ -0,0 +1,161 @@
""" Classifier head and layer factory
Hacked together by / Copyright 2020 Ross Wightman
"""
from collections import OrderedDict
from functools import partial
from typing import Optional, Union, Callable
import torch
import torch.nn as nn
from torch.nn import functional as F
from .adaptive_avgmax_pool import SelectAdaptivePool2d
from .create_act import get_act_layer
from .create_norm import get_norm_layer
def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False):
flatten_in_pool = not use_conv # flatten when we use a Linear layer after pooling
if not pool_type:
assert num_classes == 0 or use_conv,\
'Pooling can only be disabled if classifier is also removed or conv classifier is used'
flatten_in_pool = False # disable flattening if pooling is pass-through (no pooling)
global_pool = SelectAdaptivePool2d(pool_type=pool_type, flatten=flatten_in_pool)
num_pooled_features = num_features * global_pool.feat_mult()
return global_pool, num_pooled_features
def _create_fc(num_features, num_classes, use_conv=False):
if num_classes <= 0:
fc = nn.Identity() # pass-through (no classifier)
elif use_conv:
fc = nn.Conv2d(num_features, num_classes, 1, bias=True)
else:
fc = nn.Linear(num_features, num_classes, bias=True)
return fc
def create_classifier(num_features, num_classes, pool_type='avg', use_conv=False):
global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv)
fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
return global_pool, fc
class ClassifierHead(nn.Module):
"""Classifier head w/ configurable global pooling and dropout."""
def __init__(
self,
in_features: int,
num_classes: int,
pool_type: str = 'avg',
drop_rate: float = 0.,
use_conv: bool = False,
):
"""
Args:
in_features: The number of input features.
num_classes: The number of classes for the final classifier layer (output).
pool_type: Global pooling type, pooling disabled if empty string ('').
drop_rate: Pre-classifier dropout rate.
"""
super(ClassifierHead, self).__init__()
self.drop_rate = drop_rate
self.in_features = in_features
self.use_conv = use_conv
self.global_pool, num_pooled_features = _create_pool(in_features, num_classes, pool_type, use_conv=use_conv)
self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity()
def reset(self, num_classes, global_pool=None):
if global_pool is not None:
if global_pool != self.global_pool.pool_type:
self.global_pool, _ = _create_pool(self.in_features, num_classes, global_pool, use_conv=self.use_conv)
self.flatten = nn.Flatten(1) if self.use_conv and global_pool else nn.Identity()
num_pooled_features = self.in_features * self.global_pool.feat_mult()
self.fc = _create_fc(num_pooled_features, num_classes, use_conv=self.use_conv)
def forward(self, x, pre_logits: bool = False):
x = self.global_pool(x)
if self.drop_rate:
x = F.dropout(x, p=float(self.drop_rate), training=self.training)
if pre_logits:
return x.flatten(1)
else:
x = self.fc(x)
return self.flatten(x)
class NormMlpClassifierHead(nn.Module):
def __init__(
self,
in_features: int,
num_classes: int,
hidden_size: Optional[int] = None,
pool_type: str = 'avg',
drop_rate: float = 0.,
norm_layer: Union[str, Callable] = 'layernorm2d',
act_layer: Union[str, Callable] = 'tanh',
):
"""
Args:
in_features: The number of input features.
num_classes: The number of classes for the final classifier layer (output).
hidden_size: The hidden size of the MLP (pre-logits FC layer) if not None.
pool_type: Global pooling type, pooling disabled if empty string ('').
drop_rate: Pre-classifier dropout rate.
norm_layer: Normalization layer type.
act_layer: MLP activation layer type (only used if hidden_size is not None).
"""
super().__init__()
self.drop_rate = drop_rate
self.in_features = in_features
self.hidden_size = hidden_size
self.num_features = in_features
self.use_conv = not pool_type
norm_layer = get_norm_layer(norm_layer)
act_layer = get_act_layer(act_layer)
linear_layer = partial(nn.Conv2d, kernel_size=1) if self.use_conv else nn.Linear
self.global_pool = SelectAdaptivePool2d(pool_type=pool_type)
self.norm = norm_layer(in_features)
self.flatten = nn.Flatten(1) if pool_type else nn.Identity()
if hidden_size:
self.pre_logits = nn.Sequential(OrderedDict([
('fc', linear_layer(in_features, hidden_size)),
('act', act_layer()),
]))
self.num_features = hidden_size
else:
self.pre_logits = nn.Identity()
self.drop = nn.Dropout(self.drop_rate)
self.fc = linear_layer(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
def reset(self, num_classes, global_pool=None):
if global_pool is not None:
self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
self.flatten = nn.Flatten(1) if global_pool else nn.Identity()
self.use_conv = self.global_pool.is_identity()
linear_layer = partial(nn.Conv2d, kernel_size=1) if self.use_conv else nn.Linear
if self.hidden_size:
if ((isinstance(self.pre_logits.fc, nn.Conv2d) and not self.use_conv) or
(isinstance(self.pre_logits.fc, nn.Linear) and self.use_conv)):
with torch.no_grad():
new_fc = linear_layer(self.in_features, self.hidden_size)
new_fc.weight.copy_(self.pre_logits.fc.weight.reshape(new_fc.weight.shape))
new_fc.bias.copy_(self.pre_logits.fc.bias)
self.pre_logits.fc = new_fc
self.fc = linear_layer(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
def forward(self, x, pre_logits: bool = False):
x = self.global_pool(x)
x = self.norm(x)
x = self.flatten(x)
x = self.pre_logits(x)
if pre_logits:
return x
x = self.fc(x)
return x

@ -28,6 +28,7 @@ _ACT_FN_DEFAULT = dict(
celu=F.celu,
selu=F.selu,
gelu=gelu,
gelu_tanh=gelu_tanh,
sigmoid=sigmoid,
tanh=tanh,
hard_sigmoid=F.hardsigmoid if _has_hardsigmoid else hard_sigmoid,
@ -71,6 +72,7 @@ _ACT_LAYER_DEFAULT = dict(
celu=nn.CELU,
selu=nn.SELU,
gelu=GELU,
gelu_tanh=GELUTanh,
sigmoid=Sigmoid,
tanh=Tanh,
hard_sigmoid=nn.Hardsigmoid if _has_hardsigmoid else HardSigmoid,

@ -23,9 +23,9 @@ _NORM_MAP = dict(
_NORM_TYPES = {m for n, m in _NORM_MAP.items()}
def create_norm_layer(layer_name, num_features, act_layer=None, apply_act=True, **kwargs):
layer = get_norm_layer(layer_name, act_layer=act_layer)
layer_instance = layer(num_features, apply_act=apply_act, **kwargs)
def create_norm_layer(layer_name, num_features, **kwargs):
layer = get_norm_layer(layer_name)
layer_instance = layer(num_features, **kwargs)
return layer_instance

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save