From 4f6e4af7959cd3cc80a42fc4ebe23d15a825f65b Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Wed, 5 Aug 2020 16:09:09 -0800
Subject: [PATCH] Deployed 80c3051 with MkDocs version: 1.1.2

---
 404.html                            |   72 +-
 archived_changes/index.html         |  726 ++++++++++
 changes/index.html                  |  209 +--
 feature_extraction/index.html       |  785 +++++++++++
 index.html                          |  157 ++-
 javascripts/tables.js               |    6 +
 models/index.html                   | 2019 ++++++---------------------
 results/index.html                  | 1797 ++++++++++++++++++++++++
 scripts/index.html                  |  482 +++++++
 search/search_index.json            |    2 +-
 sitemap.xml                         |   26 +-
 sitemap.xml.gz                      |  Bin 194 -> 197 bytes
 training_hparam_examples/index.html |  591 ++++++++
 13 files changed, 5197 insertions(+), 1675 deletions(-)
 create mode 100644 archived_changes/index.html
 create mode 100644 feature_extraction/index.html
 create mode 100644 javascripts/tables.js
 create mode 100644 results/index.html
 create mode 100644 scripts/index.html
 create mode 100644 training_hparam_examples/index.html
diff --git a/404.html b/404.html
index b08f0f7f..d07c61f4 100644
--- a/404.html
+++ b/404.html
@@ -186,8 +186,8 @@
 
 
   <li class="md-nav__item">
-    <a href="/models/" title="Models" class="md-nav__link">
-      Models
+    <a href="/models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
     </a>
   </li>
 
@@ -198,8 +198,68 @@
 
 
   <li class="md-nav__item">
-    <a href="/changes/" title="Changes" class="md-nav__link">
-      Changes
+    <a href="/results/" title="Results" class="md-nav__link">
+      Results
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="/scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="/training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="/feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="/changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="/archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
     </a>
   </li>
 
@@ -261,6 +321,10 @@
       
         <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
       
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="/javascripts/tables.js"></script>
+      
     
   </body>
 </html>
\ No newline at end of file
diff --git a/archived_changes/index.html b/archived_changes/index.html
new file mode 100644
index 00000000..53cb788e
--- /dev/null
+++ b/archived_changes/index.html
@@ -0,0 +1,726 @@
+
+
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="Pretained Image Recognition Models">
+      
+      
+      
+      <link rel="shortcut icon" href="../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.1.2, mkdocs-material-5.4.0">
+    
+    
+      
+        <title>Archived Changes - Pytorch Image Models</title>
+      
+    
+    
+      <link rel="stylesheet" href="../assets/stylesheets/main.fe0cca5b.min.css">
+      
+      
+    
+    
+    
+      
+        <link href="https://fonts.gstatic.com" rel="preconnect" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,400i,700%7CRoboto+Mono&display=fallback">
+        <style>body,input{font-family:"Roboto",-apple-system,BlinkMacSystemFont,Helvetica,Arial,sans-serif}code,kbd,pre{font-family:"Roboto Mono",SFMono-Regular,Consolas,Menlo,monospace}</style>
+      
+    
+    
+    
+    
+      
+    
+    
+  </head>
+  
+  
+    <body dir="ltr">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#archived-changes" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+      <header class="md-header" data-md-component="header">
+  <nav class="md-header-nav md-grid" aria-label="Header">
+    <a href=".." title="Pytorch Image Models" class="md-header-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    <label class="md-header-nav__button md-icon" for="__drawer">
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2z"/></svg>
+    </label>
+    <div class="md-header-nav__title" data-md-component="header-title">
+      
+        <div class="md-header-nav__ellipsis">
+          <span class="md-header-nav__topic md-ellipsis">
+            Pytorch Image Models
+          </span>
+          <span class="md-header-nav__topic md-ellipsis">
+            
+              Archived Changes
+            
+          </span>
+        </div>
+      
+    </div>
+    
+      <label class="md-header-nav__button md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+      </label>
+      
+<div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" data-md-state="active">
+      <label class="md-search__icon md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </label>
+      <button type="reset" class="md-search__icon md-icon" aria-label="Clear" data-md-component="search-reset" tabindex="-1">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"/></svg>
+      </button>
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header-nav__source">
+        
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+      </div>
+    
+  </nav>
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+        
+      
+      
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href=".." title="Pytorch Image Models" class="md-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    Pytorch Image Models
+  </label>
+  
+    <div class="md-nav__source">
+      
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href=".." title="Getting Started" class="md-nav__link">
+      Getting Started
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../results/" title="Results" class="md-nav__link">
+      Results
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+  
+
+
+  <li class="md-nav__item md-nav__item--active">
+    
+    <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
+    
+      
+    
+    
+      <label class="md-nav__link md-nav__link--active" for="__toc">
+        Archived Changes
+        <span class="md-nav__icon md-icon">
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 9h14V7H3v2m0 4h14v-2H3v2m0 4h14v-2H3v2m16 0h2v-2h-2v2m0-10v2h2V7h-2m0 6h2v-2h-2v2z"/></svg>
+        </span>
+      </label>
+    
+    <a href="./" title="Archived Changes" class="md-nav__link md-nav__link--active">
+      Archived Changes
+    </a>
+    
+      
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#feb-29-2020" class="md-nav__link">
+    Feb 29, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-18-2020" class="md-nav__link">
+    Feb 18, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-12-2020" class="md-nav__link">
+    Feb 12, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-6-2020" class="md-nav__link">
+    Feb 6, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-12-2020_1" class="md-nav__link">
+    Feb 1/2, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#jan-31-2020" class="md-nav__link">
+    Jan 31, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#jan-1112-2020" class="md-nav__link">
+    Jan 11/12, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#jan-3-2020" class="md-nav__link">
+    Jan 3, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-30-2019" class="md-nav__link">
+    Dec 30, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-28-2019" class="md-nav__link">
+    Dec 28, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-23-2019" class="md-nav__link">
+    Dec 23, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-4-2019" class="md-nav__link">
+    Dec 4, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#nov-29-2019" class="md-nav__link">
+    Nov 29, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#nov-22-2019" class="md-nav__link">
+    Nov 22, 2019
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+    
+  </li>
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="toc">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#feb-29-2020" class="md-nav__link">
+    Feb 29, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-18-2020" class="md-nav__link">
+    Feb 18, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-12-2020" class="md-nav__link">
+    Feb 12, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-6-2020" class="md-nav__link">
+    Feb 6, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#feb-12-2020_1" class="md-nav__link">
+    Feb 1/2, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#jan-31-2020" class="md-nav__link">
+    Jan 31, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#jan-1112-2020" class="md-nav__link">
+    Jan 11/12, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#jan-3-2020" class="md-nav__link">
+    Jan 3, 2020
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-30-2019" class="md-nav__link">
+    Dec 30, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-28-2019" class="md-nav__link">
+    Dec 28, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-23-2019" class="md-nav__link">
+    Dec 23, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#dec-4-2019" class="md-nav__link">
+    Dec 4, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#nov-29-2019" class="md-nav__link">
+    Nov 29, 2019
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#nov-22-2019" class="md-nav__link">
+    Nov 22, 2019
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          <div class="md-content">
+            <article class="md-content__inner md-typeset">
+              
+                
+                  <a href="https://github.com/rwightman/pytorch-image-models/edit/master/docs/archived_changes.md" title="Edit this page" class="md-content__button md-icon">
+                    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z"/></svg>
+                  </a>
+                
+                
+                  
+                
+                
+                <h1 id="archived-changes">Archived Changes</h1>
+<h3 id="feb-29-2020">Feb 29, 2020</h3>
+<ul>
+<li>New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1</li>
+<li>IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models</li>
+<li>overall results similar to a bit better training from scratch on a few smaller models tried</li>
+<li>performance early in training seems consistently improved but less difference by end</li>
+<li>set <code>fix_group_fanout=False</code> in <code>_init_weight_goog</code> fn if you need to reproducte past behaviour</li>
+<li>Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training</li>
+</ul>
+<h3 id="feb-18-2020">Feb 18, 2020</h3>
+<ul>
+<li>Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' (<a href="https://arxiv.org/abs/2001.06268">https://arxiv.org/abs/2001.06268</a>):</li>
+<li>Move layer/module impl into <code>layers</code> subfolder/module of <code>models</code> and organize in a more granular fashion</li>
+<li>ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks</li>
+<li>Add Selective Kernel Nets on top of ResNet base, pretrained weights<ul>
+<li>skresnet18 - 73% top-1</li>
+<li>skresnet34 - 76.9% top-1 </li>
+<li>skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1</li>
+</ul>
+</li>
+<li>ECA and CECA (circular padding) attention layer contributed by <a href="https://github.com/VRandme">Chris Ha</a></li>
+<li>CBAM attention experiment (not the best results so far, may remove)</li>
+<li>Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the <code>.se</code> position for all ResNets</li>
+<li>Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants</li>
+<li>Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights</li>
+</ul>
+<h3 id="feb-12-2020">Feb 12, 2020</h3>
+<ul>
+<li>Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from <a href="https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet">Tensorflow TPU</a></li>
+</ul>
+<h3 id="feb-6-2020">Feb 6, 2020</h3>
+<ul>
+<li>Add RandAugment trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by <a href="https://github.com/andravin">Andrew Lavin</a> (see Training section for hparams)</li>
+</ul>
+<h3 id="feb-12-2020_1">Feb &frac12;, 2020</h3>
+<ul>
+<li>Port new EfficientNet-B8 (RandAugment) weights, these are different than the B8 AdvProp, different input normalization.</li>
+<li>Update results csv files on all models for ImageNet validation and three other test sets</li>
+<li>Push PyPi package update</li>
+</ul>
+<h3 id="jan-31-2020">Jan 31, 2020</h3>
+<ul>
+<li>Update ResNet50 weights with a new 79.038 result from further JSD / AugMix experiments. Full command line for reproduction in training section below.</li>
+</ul>
+<h3 id="jan-1112-2020">Jan 11/12, 2020</h3>
+<ul>
+<li>Master may be a bit unstable wrt to training, these changes have been tested but not all combos</li>
+<li>Implementations of AugMix added to existing RA and AA. Including numerous supporting pieces like JSD loss (Jensen-Shannon divergence + CE), and AugMixDataset</li>
+<li>SplitBatchNorm adaptation layer added for implementing Auxiliary BN as per AdvProp paper</li>
+<li>ResNet-50 AugMix trained model w/ 79% top-1 added</li>
+<li><code>seresnext26tn_32x4d</code> - 77.99 top-1, 93.75 top-5 added to tiered experiment, higher img/s than 't' and 'd'</li>
+</ul>
+<h3 id="jan-3-2020">Jan 3, 2020</h3>
+<ul>
+<li>Add RandAugment trained EfficientNet-B0 weight with 77.7 top-1. Trained by <a href="https://github.com/michaelklachko">Michael Klachko</a> with this code and recent hparams (see Training section)</li>
+<li>Add <code>avg_checkpoints.py</code> script for post training weight averaging and update all scripts with header docstrings and shebangs.</li>
+</ul>
+<h3 id="dec-30-2019">Dec 30, 2019</h3>
+<ul>
+<li>Merge <a href="https://github.com/mehtadushy">Dushyant Mehta's</a> PR for SelecSLS (Selective Short and Long Range Skip Connections) networks. Good GPU memory consumption and throughput. Original: <a href="https://github.com/mehtadushy/SelecSLS-Pytorch">https://github.com/mehtadushy/SelecSLS-Pytorch</a></li>
+</ul>
+<h3 id="dec-28-2019">Dec 28, 2019</h3>
+<ul>
+<li>Add new model weights and training hparams (see Training Hparams section)</li>
+<li><code>efficientnet_b3</code> - 81.5 top-1, 95.7 top-5 at default res/crop, 81.9, 95.8 at 320x320 1.0 crop-pct<ul>
+<li>trained with RandAugment, ended up with an interesting but less than perfect result (see training section)</li>
+</ul>
+</li>
+<li><code>seresnext26d_32x4d</code>- 77.6 top-1, 93.6 top-5<ul>
+<li>deep stem (32, 32, 64), avgpool downsample</li>
+<li>stem/dowsample from bag-of-tricks paper</li>
+</ul>
+</li>
+<li><code>seresnext26t_32x4d</code>- 78.0 top-1, 93.7 top-5<ul>
+<li>deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant)</li>
+<li>stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments</li>
+</ul>
+</li>
+</ul>
+<h3 id="dec-23-2019">Dec 23, 2019</h3>
+<ul>
+<li>Add RandAugment trained MixNet-XL weights with 80.48 top-1.</li>
+<li><code>--dist-bn</code> argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval</li>
+</ul>
+<h3 id="dec-4-2019">Dec 4, 2019</h3>
+<ul>
+<li>Added weights from the first training from scratch of an EfficientNet (B2) with my new RandAugment implementation. Much better than my previous B2 and very close to the official AdvProp ones (80.4 top-1, 95.08 top-5).</li>
+</ul>
+<h3 id="nov-29-2019">Nov 29, 2019</h3>
+<ul>
+<li>Brought EfficientNet and MobileNetV3 up to date with my <a href="https://github.com/rwightman/gen-efficientnet-pytorch">https://github.com/rwightman/gen-efficientnet-pytorch</a> code. Torchscript and ONNX export compat excluded.</li>
+<li>AdvProp weights added</li>
+<li>Official TF MobileNetv3 weights added</li>
+<li>EfficientNet and MobileNetV3 hook based 'feature extraction' classes added. Will serve as basis for using models as backbones in obj detection/segmentation tasks. Lots more to be done here...</li>
+<li>HRNet classification models and weights added from <a href="https://github.com/HRNet/HRNet-Image-Classification">https://github.com/HRNet/HRNet-Image-Classification</a></li>
+<li>Consistency in global pooling, <code>reset_classifer</code>, and <code>forward_features</code> across models</li>
+<li><code>forward_features</code> always returns unpooled feature maps now</li>
+<li>Reasonable chance I broke something... let me know</li>
+</ul>
+<h3 id="nov-22-2019">Nov 22, 2019</h3>
+<ul>
+<li>Add ImageNet training RandAugment implementation alongside AutoAugment. PyTorch Transform compatible format, using PIL. Currently training two EfficientNet models from scratch with promising results... will update.</li>
+<li><code>drop-connect</code> cmd line arg finally added to <code>train.py</code>, no need to hack model fns. Works for efficientnet/mobilenetv3 based models, ignored otherwise.</li>
+</ul>
+                
+              
+              
+                
+
+
+              
+            </article>
+          </div>
+        </div>
+      </main>
+      
+        
+<footer class="md-footer">
+  
+    <div class="md-footer-nav">
+      <nav class="md-footer-nav__inner md-grid" aria-label="Footer">
+        
+          <a href="../changes/" title="Recent Changes" class="md-footer-nav__link md-footer-nav__link--prev" rel="prev">
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+            </div>
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Previous
+                </span>
+                Recent Changes
+              </div>
+            </div>
+          </a>
+        
+        
+      </nav>
+    </div>
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-footer-copyright">
+        
+        Made with
+        <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+          Material for MkDocs
+        </a>
+      </div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    
+      <script src="../assets/javascripts/vendor.d710d30a.min.js"></script>
+      <script src="../assets/javascripts/bundle.b39636ac.min.js"></script><script id="__lang" type="application/json">{"clipboard.copy": "Copy to clipboard", "clipboard.copied": "Copied to clipboard", "search.config.lang": "en", "search.config.pipeline": "trimmer, stopWordFilter", "search.config.separator": "[\\s\\-]+", "search.result.placeholder": "Type to start searching", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents"}</script>
+      
+      <script>
+        app = initialize({
+          base: "..",
+          features: [],
+          search: Object.assign({
+            worker: "../assets/javascripts/worker/search.a68abb33.min.js"
+          }, typeof search !== "undefined" && search)
+        })
+      </script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="../javascripts/tables.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/changes/index.html b/changes/index.html
index cb1117da..78c6ddce 100644
--- a/changes/index.html
+++ b/changes/index.html
@@ -17,7 +17,7 @@
     
     
       
-        <title>Changes - Pytorch Image Models</title>
+        <title>Recent Changes - Pytorch Image Models</title>
       
     
     
@@ -51,7 +51,7 @@
     <div data-md-component="skip">
       
         
-        <a href="#june-11-2020" class="md-skip">
+        <a href="#recent-changes" class="md-skip">
           Skip to content
         </a>
       
@@ -79,7 +79,7 @@
           </span>
           <span class="md-header-nav__topic md-ellipsis">
             
-              Changes
+              Recent Changes
             
           </span>
         </div>
@@ -191,8 +191,56 @@
 
 
   <li class="md-nav__item">
-    <a href="../models/" title="Models" class="md-nav__link">
-      Models
+    <a href="../models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../results/" title="Results" class="md-nav__link">
+      Results
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
     </a>
   </li>
 
@@ -208,22 +256,26 @@
     
     <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
     
+      
+    
     
       <label class="md-nav__link md-nav__link--active" for="__toc">
-        Changes
+        Recent Changes
         <span class="md-nav__icon md-icon">
           <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 9h14V7H3v2m0 4h14v-2H3v2m0 4h14v-2H3v2m16 0h2v-2h-2v2m0-10v2h2V7h-2m0 6h2v-2h-2v2z"/></svg>
         </span>
       </label>
     
-    <a href="./" title="Changes" class="md-nav__link md-nav__link--active">
-      Changes
+    <a href="./" title="Recent Changes" class="md-nav__link md-nav__link--active">
+      Recent Changes
     </a>
     
       
 <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
   
   
+    
+  
   
     <label class="md-nav__title" for="__toc">
       <span class="md-nav__icon md-icon">
@@ -233,6 +285,13 @@
     </label>
     <ul class="md-nav__list" data-md-scrollfix>
       
+        <li class="md-nav__item">
+  <a href="#aug-1-2020" class="md-nav__link">
+    Aug 1, 2020
+  </a>
+  
+</li>
+      
         <li class="md-nav__item">
   <a href="#june-11-2020" class="md-nav__link">
     June 11, 2020
@@ -273,27 +332,6 @@
     March 18, 2020
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#feb-29-2020" class="md-nav__link">
-    Feb 29, 2020
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#feb-18-2020" class="md-nav__link">
-    Feb 18, 2020
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#feb-12-2020" class="md-nav__link">
-    Feb 12, 2020
-  </a>
-  
 </li>
       
     </ul>
@@ -303,6 +341,18 @@
   </li>
 
     
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
+    </a>
+  </li>
+
+    
   </ul>
 </nav>
                   </div>
@@ -317,6 +367,8 @@
 <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
   
   
+    
+  
   
     <label class="md-nav__title" for="__toc">
       <span class="md-nav__icon md-icon">
@@ -326,6 +378,13 @@
     </label>
     <ul class="md-nav__list" data-md-scrollfix>
       
+        <li class="md-nav__item">
+  <a href="#aug-1-2020" class="md-nav__link">
+    Aug 1, 2020
+  </a>
+  
+</li>
+      
         <li class="md-nav__item">
   <a href="#june-11-2020" class="md-nav__link">
     June 11, 2020
@@ -366,27 +425,6 @@
     March 18, 2020
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#feb-29-2020" class="md-nav__link">
-    Feb 29, 2020
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#feb-18-2020" class="md-nav__link">
-    Feb 18, 2020
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#feb-12-2020" class="md-nav__link">
-    Feb 12, 2020
-  </a>
-  
 </li>
       
     </ul>
@@ -409,9 +447,22 @@
                   
                 
                 
-                  <h1>Changes</h1>
-                
-                <h3 id="june-11-2020">June 11, 2020</h3>
+                <h1 id="recent-changes">Recent Changes</h1>
+<h3 id="aug-1-2020">Aug 1, 2020</h3>
+<p>Universal feature extraction, new models, new weights, new test sets.
+* All models support the <code>features_only=True</code> argument for <code>create_model</code> call to return a network that extracts features from the deepest layer at each stride.
+* New models
+  * CSPResNet, CSPResNeXt, CSPDarkNet, DarkNet
+  * ReXNet
+  * (Aligned) Xception41/65/71 (a proper port of TF models)
+* New trained weights
+  * SEResNet50 - 80.3
+  * CSPDarkNet53 - 80.1 top-1
+  * CSPResNeXt50 - 80.0 to-1
+  * DPN68b - 79.2 top-1
+  * EfficientNet-Lite0 (non-TF ver) - 75.5 (submitted by @hal-314)
+* Add 'real' labels for ImageNet and ImageNet-Renditions test set, see <a href="results/README.md"><code>results/README.md</code></a></p>
+<h3 id="june-11-2020">June 11, 2020</h3>
 <p>Bunch of changes:</p>
 <ul>
 <li>DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions</li>
@@ -454,36 +505,6 @@
 <li>Add EfficientNet-Lite models w/ weights ported from <a href="https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite">Tensorflow TPU</a></li>
 <li>Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by <a href="https://github.com/andravin">Andrew Lavin</a> (see Training section for hparams)</li>
 </ul>
-<h3 id="feb-29-2020">Feb 29, 2020</h3>
-<ul>
-<li>New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1</li>
-<li>IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models</li>
-<li>overall results similar to a bit better training from scratch on a few smaller models tried</li>
-<li>performance early in training seems consistently improved but less difference by end</li>
-<li>set <code>fix_group_fanout=False</code> in <code>_init_weight_goog</code> fn if you need to reproducte past behaviour</li>
-<li>Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training</li>
-</ul>
-<h3 id="feb-18-2020">Feb 18, 2020</h3>
-<ul>
-<li>Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' (<a href="https://arxiv.org/abs/2001.06268">https://arxiv.org/abs/2001.06268</a>):</li>
-<li>Move layer/module impl into <code>layers</code> subfolder/module of <code>models</code> and organize in a more granular fashion</li>
-<li>ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks</li>
-<li>Add Selective Kernel Nets on top of ResNet base, pretrained weights<ul>
-<li>skresnet18 - 73% top-1</li>
-<li>skresnet34 - 76.9% top-1 </li>
-<li>skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1</li>
-</ul>
-</li>
-<li>ECA and CECA (circular padding) attention layer contributed by <a href="https://github.com/VRandme">Chris Ha</a></li>
-<li>CBAM attention experiment (not the best results so far, may remove)</li>
-<li>Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the <code>.se</code> position for all ResNets</li>
-<li>Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants</li>
-<li>Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights</li>
-</ul>
-<h3 id="feb-12-2020">Feb 12, 2020</h3>
-<ul>
-<li>Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from <a href="https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet">Tensorflow TPU</a></li>
-</ul>
                 
               
               
@@ -502,7 +523,7 @@
     <div class="md-footer-nav">
       <nav class="md-footer-nav__inner md-grid" aria-label="Footer">
         
-          <a href="../models/" title="Models" class="md-footer-nav__link md-footer-nav__link--prev" rel="prev">
+          <a href="../feature_extraction/" title="Feature Extraction" class="md-footer-nav__link md-footer-nav__link--prev" rel="prev">
             <div class="md-footer-nav__button md-icon">
               <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
             </div>
@@ -511,12 +532,26 @@
                 <span class="md-footer-nav__direction">
                   Previous
                 </span>
-                Models
+                Feature Extraction
               </div>
             </div>
           </a>
         
         
+          <a href="../archived_changes/" title="Archived Changes" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Next
+                </span>
+                Archived Changes
+              </div>
+            </div>
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4z"/></svg>
+            </div>
+          </a>
+        
       </nav>
     </div>
   
@@ -551,6 +586,10 @@
       
         <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
       
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="../javascripts/tables.js"></script>
+      
     
   </body>
 </html>
\ No newline at end of file
diff --git a/feature_extraction/index.html b/feature_extraction/index.html
new file mode 100644
index 00000000..68c89673
--- /dev/null
+++ b/feature_extraction/index.html
@@ -0,0 +1,785 @@
+
+
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="Pretained Image Recognition Models">
+      
+      
+      
+      <link rel="shortcut icon" href="../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.1.2, mkdocs-material-5.4.0">
+    
+    
+      
+        <title>Feature Extraction - Pytorch Image Models</title>
+      
+    
+    
+      <link rel="stylesheet" href="../assets/stylesheets/main.fe0cca5b.min.css">
+      
+      
+    
+    
+    
+      
+        <link href="https://fonts.gstatic.com" rel="preconnect" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,400i,700%7CRoboto+Mono&display=fallback">
+        <style>body,input{font-family:"Roboto",-apple-system,BlinkMacSystemFont,Helvetica,Arial,sans-serif}code,kbd,pre{font-family:"Roboto Mono",SFMono-Regular,Consolas,Menlo,monospace}</style>
+      
+    
+    
+    
+    
+      
+    
+    
+  </head>
+  
+  
+    <body dir="ltr">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#feature-extraction" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+      <header class="md-header" data-md-component="header">
+  <nav class="md-header-nav md-grid" aria-label="Header">
+    <a href=".." title="Pytorch Image Models" class="md-header-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    <label class="md-header-nav__button md-icon" for="__drawer">
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2z"/></svg>
+    </label>
+    <div class="md-header-nav__title" data-md-component="header-title">
+      
+        <div class="md-header-nav__ellipsis">
+          <span class="md-header-nav__topic md-ellipsis">
+            Pytorch Image Models
+          </span>
+          <span class="md-header-nav__topic md-ellipsis">
+            
+              Feature Extraction
+            
+          </span>
+        </div>
+      
+    </div>
+    
+      <label class="md-header-nav__button md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+      </label>
+      
+<div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" data-md-state="active">
+      <label class="md-search__icon md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </label>
+      <button type="reset" class="md-search__icon md-icon" aria-label="Clear" data-md-component="search-reset" tabindex="-1">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"/></svg>
+      </button>
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header-nav__source">
+        
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+      </div>
+    
+  </nav>
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+        
+      
+      
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href=".." title="Pytorch Image Models" class="md-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    Pytorch Image Models
+  </label>
+  
+    <div class="md-nav__source">
+      
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href=".." title="Getting Started" class="md-nav__link">
+      Getting Started
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../results/" title="Results" class="md-nav__link">
+      Results
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+  
+
+
+  <li class="md-nav__item md-nav__item--active">
+    
+    <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
+    
+      
+    
+    
+      <label class="md-nav__link md-nav__link--active" for="__toc">
+        Feature Extraction
+        <span class="md-nav__icon md-icon">
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 9h14V7H3v2m0 4h14v-2H3v2m0 4h14v-2H3v2m16 0h2v-2h-2v2m0-10v2h2V7h-2m0 6h2v-2h-2v2z"/></svg>
+        </span>
+      </label>
+    
+    <a href="./" title="Feature Extraction" class="md-nav__link md-nav__link--active">
+      Feature Extraction
+    </a>
+    
+      
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#penultimate-layer-features-pre-classifier-features" class="md-nav__link">
+    Penultimate Layer Features (Pre-Classifier Features)
+  </a>
+  
+    <nav class="md-nav" aria-label="Penultimate Layer Features (Pre-Classifier Features)">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#unpooled" class="md-nav__link">
+    Unpooled
+  </a>
+  
+    <nav class="md-nav" aria-label="Unpooled">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#forward_features" class="md-nav__link">
+    forward_features()
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#create-with-no-classifier-and-pooling" class="md-nav__link">
+    Create with no classifier and pooling
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#remove-it-later" class="md-nav__link">
+    Remove it later
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#pooled" class="md-nav__link">
+    Pooled
+  </a>
+  
+    <nav class="md-nav" aria-label="Pooled">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#create-with-no-classifier" class="md-nav__link">
+    Create with no classifier
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#remove-it-later_1" class="md-nav__link">
+    Remove it later
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#multi-scale-feature-maps-feature-pyramid" class="md-nav__link">
+    Multi-scale Feature Maps (Feature Pyramid)
+  </a>
+  
+    <nav class="md-nav" aria-label="Multi-scale Feature Maps (Feature Pyramid)">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#create-a-feature-map-extraction-model" class="md-nav__link">
+    Create a feature map extraction model
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#query-the-feature-information" class="md-nav__link">
+    Query the feature information
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#select-specific-feature-levels-or-limit-the-stride" class="md-nav__link">
+    Select specific feature levels or limit the stride
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+    
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
+    </a>
+  </li>
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="toc">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#penultimate-layer-features-pre-classifier-features" class="md-nav__link">
+    Penultimate Layer Features (Pre-Classifier Features)
+  </a>
+  
+    <nav class="md-nav" aria-label="Penultimate Layer Features (Pre-Classifier Features)">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#unpooled" class="md-nav__link">
+    Unpooled
+  </a>
+  
+    <nav class="md-nav" aria-label="Unpooled">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#forward_features" class="md-nav__link">
+    forward_features()
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#create-with-no-classifier-and-pooling" class="md-nav__link">
+    Create with no classifier and pooling
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#remove-it-later" class="md-nav__link">
+    Remove it later
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#pooled" class="md-nav__link">
+    Pooled
+  </a>
+  
+    <nav class="md-nav" aria-label="Pooled">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#create-with-no-classifier" class="md-nav__link">
+    Create with no classifier
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#remove-it-later_1" class="md-nav__link">
+    Remove it later
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#multi-scale-feature-maps-feature-pyramid" class="md-nav__link">
+    Multi-scale Feature Maps (Feature Pyramid)
+  </a>
+  
+    <nav class="md-nav" aria-label="Multi-scale Feature Maps (Feature Pyramid)">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#create-a-feature-map-extraction-model" class="md-nav__link">
+    Create a feature map extraction model
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#query-the-feature-information" class="md-nav__link">
+    Query the feature information
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#select-specific-feature-levels-or-limit-the-stride" class="md-nav__link">
+    Select specific feature levels or limit the stride
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          <div class="md-content">
+            <article class="md-content__inner md-typeset">
+              
+                
+                  <a href="https://github.com/rwightman/pytorch-image-models/edit/master/docs/feature_extraction.md" title="Edit this page" class="md-content__button md-icon">
+                    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z"/></svg>
+                  </a>
+                
+                
+                  
+                
+                
+                <h1 id="feature-extraction">Feature Extraction</h1>
+<p>All of the models in <code>timm</code> have consistent mechanisms for obtaining various types of features from the model for tasks besides classification.</p>
+<h2 id="penultimate-layer-features-pre-classifier-features">Penultimate Layer Features (Pre-Classifier Features)</h2>
+<p>The features from the penultimate model layer can be obtained in severay ways without requiring model surgery (although feel free to do surgery). One must first decide if they want pooled or un-pooled features.</p>
+<h3 id="unpooled">Unpooled</h3>
+<p>There are three ways to obtain unpooled features.</p>
+<p>Without modifying the network, one can call <code>model.forward_features(input)</code> on any model instead of the usual <code>model(input)</code>. This will bypass the head classifier and global pooling for networks.</p>
+<p>If one wants to explicitly modify the network to return unpooled features, they can either create the model without a classifier and pooling, or remove it later. Both paths remove the parameters associated with the classifier from the network.</p>
+<h4 id="forward_features">forward_features()</h4>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;xception41&#39;</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">299</span><span class="p">,</span> <span class="mi">299</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Original shape: {o.shape}&#39;</span><span class="p">)</span>
+<span class="hll"><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">forward_features</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">299</span><span class="p">,</span> <span class="mi">299</span><span class="p">))</span>
+</span><span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Unpooled shape: {o.shape}&#39;</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>Original shape: torch.Size([2, 1000])
+Unpooled shape: torch.Size([2, 2048, 10, 10])
+</code></pre></div></p>
+<h4 id="create-with-no-classifier-and-pooling">Create with no classifier and pooling</h4>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;resnet50&#39;</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">num_classes</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">global_pool</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Unpooled shape: {o.shape}&#39;</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>Unpooled shape: torch.Size([2, 2048, 7, 7])
+</code></pre></div></p>
+<h4 id="remove-it-later">Remove it later</h4>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;densenet121&#39;</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Original shape: {o.shape}&#39;</span><span class="p">)</span>
+<span class="hll"><span class="n">m</span><span class="o">.</span><span class="n">reset_classifier</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Unpooled shape: {o.shape}&#39;</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>Original shape: torch.Size([2, 1000])
+Unpooled shape: torch.Size([2, 1024, 7, 7])
+</code></pre></div></p>
+<h3 id="pooled">Pooled</h3>
+<p>To modify the network to return pooled features, one can use <code>forward_features()</code> and pool/flatten the result themselves, or modify the network like above but keep pooling intact. </p>
+<h4 id="create-with-no-classifier">Create with no classifier</h4>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;resnet50&#39;</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">num_classes</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Pooled shape: {o.shape}&#39;</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>Pooled shape: torch.Size([2, 2048])
+</code></pre></div></p>
+<h4 id="remove-it-later_1">Remove it later</h4>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;ese_vovnet19b_dw&#39;</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Original shape: {o.shape}&#39;</span><span class="p">)</span>
+<span class="hll"><span class="n">m</span><span class="o">.</span><span class="n">reset_classifier</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Pooled shape: {o.shape}&#39;</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>Pooled shape: torch.Size([2, 1024])
+</code></pre></div></p>
+<h2 id="multi-scale-feature-maps-feature-pyramid">Multi-scale Feature Maps (Feature Pyramid)</h2>
+<p>Object detection, segmentation, keypoint, and a variety of dense pixel tasks require access to feature maps from the backbone network at multiple scales. This is often done by modifying the original classification network. Since each network varies quite a bit in structure, it's not uncommon to see only a few backbones supported in any given obj detection or segmentation library.</p>
+<p><code>timm</code> allows a consistent interface for creating any of the included models as feature backbones that output feature maps for selected levels. </p>
+<p>A feature backbone can be created by adding the argument <code>features_only=True</code> to any <code>create_model</code> call. By default 5 strides will be output from most models (not all have that many), with the first starting at 2 (some start at 1 or 4).</p>
+<h3 id="create-a-feature-map-extraction-model">Create a feature map extraction model</h3>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;resnest26d&#39;</span><span class="p">,</span> <span class="n">features_only</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">o</span><span class="p">:</span>
+  <span class="k">print</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>torch.Size([2, 64, 112, 112])
+torch.Size([2, 256, 56, 56])
+torch.Size([2, 512, 28, 28])
+torch.Size([2, 1024, 14, 14])
+torch.Size([2, 2048, 7, 7])
+</code></pre></div></p>
+<h3 id="query-the-feature-information">Query the feature information</h3>
+<p>After a feature backbone has been created, it can be queried to provide channel or resolution reduction information to the downstream heads without requiring static config or hardcoded constants. The <code>.feature_info</code> attribute is a class encapsulating the information about the feature extraction points.</p>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;regnety_032&#39;</span><span class="p">,</span> <span class="n">features_only</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</span><span class="hll"><span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Feature channels: {m.feature_info.channels()}&#39;</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
+<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">o</span><span class="p">:</span>
+  <span class="k">print</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>Feature channels: [32, 72, 216, 576, 1512]
+torch.Size([2, 32, 112, 112])
+torch.Size([2, 72, 56, 56])
+torch.Size([2, 216, 28, 28])
+torch.Size([2, 576, 14, 14])
+torch.Size([2, 1512, 7, 7])
+</code></pre></div></p>
+<h3 id="select-specific-feature-levels-or-limit-the-stride">Select specific feature levels or limit the stride</h3>
+<p>There are to additional creation arguments impacting the output features. </p>
+<ul>
+<li><code>out_indices</code> selects which indices to output</li>
+<li><code>output_stride</code> limits the feature output stride of the network (also works in classification mode BTW)</li>
+</ul>
+<p><code>out_indices</code> is supported by all models, but not all models have the same index to feature stride mapping. Look at the code or check feature_info to compare. The out indices generally correspond to the <code>C(i+1)th</code> feature level (a <code>2^(i+1)</code> reduction). For most models, index 0 is the stride 2 features, and index 4 is stride 32.</p>
+<p><code>output_stride</code> is achieved by converting layers to use dilated convolutions. Doing so is not always straightforward, some networks only support <code>output_stride=32</code>.</p>
+<p><div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">import</span> <span class="nn">timm</span>
+<span class="hll"><span class="n">m</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">create_model</span><span class="p">(</span><span class="s1">&#39;ecaresnet101d&#39;</span><span class="p">,</span> <span class="n">features_only</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">output_stride</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">out_indices</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">),</span> <span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+</span><span class="hll"><span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Feature channels: {m.feature_info.channels()}&#39;</span><span class="p">)</span>
+</span><span class="hll"><span class="k">print</span><span class="p">(</span><span class="n">f</span><span class="s1">&#39;Feature reduction: {m.feature_info.reduction()}&#39;</span><span class="p">)</span>
+</span><span class="n">o</span> <span class="o">=</span> <span class="n">m</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">320</span><span class="p">,</span> <span class="mi">320</span><span class="p">))</span>
+<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">o</span><span class="p">:</span>
+  <span class="k">print</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
+</code></pre></div>
+Output:
+<div class="highlight"><pre><span></span><code>Feature channels: [512, 2048]
+Feature reduction: [8, 8]
+torch.Size([2, 512, 40, 40])
+torch.Size([2, 2048, 40, 40])
+</code></pre></div></p>
+                
+              
+              
+                
+
+
+              
+            </article>
+          </div>
+        </div>
+      </main>
+      
+        
+<footer class="md-footer">
+  
+    <div class="md-footer-nav">
+      <nav class="md-footer-nav__inner md-grid" aria-label="Footer">
+        
+          <a href="../training_hparam_examples/" title="Training Examples" class="md-footer-nav__link md-footer-nav__link--prev" rel="prev">
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+            </div>
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Previous
+                </span>
+                Training Examples
+              </div>
+            </div>
+          </a>
+        
+        
+          <a href="../changes/" title="Recent Changes" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Next
+                </span>
+                Recent Changes
+              </div>
+            </div>
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4z"/></svg>
+            </div>
+          </a>
+        
+      </nav>
+    </div>
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-footer-copyright">
+        
+        Made with
+        <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+          Material for MkDocs
+        </a>
+      </div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    
+      <script src="../assets/javascripts/vendor.d710d30a.min.js"></script>
+      <script src="../assets/javascripts/bundle.b39636ac.min.js"></script><script id="__lang" type="application/json">{"clipboard.copy": "Copy to clipboard", "clipboard.copied": "Copied to clipboard", "search.config.lang": "en", "search.config.pipeline": "trimmer, stopWordFilter", "search.config.separator": "[\\s\\-]+", "search.result.placeholder": "Type to start searching", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents"}</script>
+      
+      <script>
+        app = initialize({
+          base: "..",
+          features: [],
+          search: Object.assign({
+            worker: "../assets/javascripts/worker/search.a68abb33.min.js"
+          }, typeof search !== "undefined" && search)
+        })
+      </script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="../javascripts/tables.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/index.html b/index.html
index 64a871a8..8fe534ea 100644
--- a/index.html
+++ b/index.html
@@ -221,8 +221,22 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#load-pretrained-model" class="md-nav__link">
-    Load Pretrained Model
+  <a href="#load-a-pretrained-model" class="md-nav__link">
+    Load a Pretrained Model
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#list-models-with-pretrained-weights" class="md-nav__link">
+    List Models with Pretrained Weights
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#list-model-architectures-by-wildcard" class="md-nav__link">
+    List Model Architectures by Wildcard
   </a>
   
 </li>
@@ -240,8 +254,20 @@
 
 
   <li class="md-nav__item">
-    <a href="models/" title="Models" class="md-nav__link">
-      Models
+    <a href="models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="results/" title="Results" class="md-nav__link">
+      Results
     </a>
   </li>
 
@@ -252,8 +278,56 @@
 
 
   <li class="md-nav__item">
-    <a href="changes/" title="Changes" class="md-nav__link">
-      Changes
+    <a href="scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
     </a>
   </li>
 
@@ -291,8 +365,22 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#load-pretrained-model" class="md-nav__link">
-    Load Pretrained Model
+  <a href="#load-a-pretrained-model" class="md-nav__link">
+    Load a Pretrained Model
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#list-models-with-pretrained-weights" class="md-nav__link">
+    List Models with Pretrained Weights
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#list-model-architectures-by-wildcard" class="md-nav__link">
+    List Model Architectures by Wildcard
   </a>
   
 </li>
@@ -325,17 +413,17 @@
 
 <div class="admonition info">
 <p class="admonition-title">Conda Environment</p>
-<p>All development and testing has been done in Conda Python 3 environments
- on Linux x86-64 systems, specifically Python 3.6.x and 3.7.x. </p>
-<p>To install <code>timm</code> in a conda environment:
+<p>All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically Python 3.6.x, 3.7.x., 3.8.x.</p>
+<p>Little to no care has been taken to be Python 2.x friendly and will not support it. If you run into any challenges running on Windows, or other OS, I'm definitely open to looking into those issues so long as it's in a reproducible (read Conda) environment.</p>
+<p>PyTorch versions 1.4, 1.5.x, and 1.6 have been tested with this code.</p>
+<p>I've tried to keep the dependencies minimal, the setup is as per the PyTorch default install instructions for Conda:
 <div class="highlight"><pre><span></span><code>conda create -n torch-env
 conda activate torch-env
-conda install -c pytorch pytorch torchvision cudatoolkit=10.1
+conda install -c pytorch pytorch torchvision cudatoolkit=10.2
 conda install pyyaml
-pip install timm
 </code></pre></div></p>
 </div>
-<h2 id="load-pretrained-model">Load Pretrained Model</h2>
+<h2 id="load-a-pretrained-model">Load a Pretrained Model</h2>
 <p>Pretrained models can be loaded using <code>timm.create_model</code></p>
 <div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">timm</span>
 
@@ -343,8 +431,37 @@ pip install timm
 <span class="n">m</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
 </code></pre></div>
 
-<p>To load a different model see <a href="/models
-/#pretrained-imagenet-weights">the list of pretrained weights</a>.</p>
+<h2 id="list-models-with-pretrained-weights">List Models with Pretrained Weights</h2>
+<div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">timm</span>
+<span class="kn">from</span> <span class="nn">pprint</span> <span class="kn">import</span> <span class="n">pprint</span>
+<span class="n">model_names</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">list_models</span><span class="p">(</span><span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+<span class="n">pprint</span><span class="p">(</span><span class="n">model_names</span><span class="p">)</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="p">[</span><span class="s1">&#39;adv_inception_v3&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;cspdarknet53&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;cspresnext50&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;densenet121&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;densenet161&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;densenet169&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;densenet201&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;densenetblur121d&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;dla34&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;dla46_c&#39;</span><span class="p">,</span>
+<span class="o">...</span>
+<span class="p">]</span>
+</code></pre></div>
+
+<h2 id="list-model-architectures-by-wildcard">List Model Architectures by Wildcard</h2>
+<div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">timm</span>
+<span class="kn">from</span> <span class="nn">pprint</span> <span class="kn">import</span> <span class="n">pprint</span>
+<span class="n">model_names</span> <span class="o">=</span> <span class="n">timm</span><span class="o">.</span><span class="n">list_models</span><span class="p">(</span><span class="s1">&#39;*resne*t*&#39;</span><span class="p">)</span>
+<span class="n">pprint</span><span class="p">(</span><span class="n">model_names</span><span class="p">)</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="p">[</span><span class="s1">&#39;cspresnet50&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;cspresnet50d&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;cspresnet50w&#39;</span><span class="p">,</span>
+ <span class="s1">&#39;cspresnext50&#39;</span><span class="p">,</span>
+<span class="o">...</span>
+<span class="p">]</span>
+</code></pre></div>
                 
               
               
@@ -364,13 +481,13 @@ pip install timm
       <nav class="md-footer-nav__inner md-grid" aria-label="Footer">
         
         
-          <a href="models/" title="Models" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
+          <a href="models/" title="Model Architectures" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
             <div class="md-footer-nav__title">
               <div class="md-ellipsis">
                 <span class="md-footer-nav__direction">
                   Next
                 </span>
-                Models
+                Model Architectures
               </div>
             </div>
             <div class="md-footer-nav__button md-icon">
@@ -412,6 +529,10 @@ pip install timm
       
         <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
       
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="javascripts/tables.js"></script>
+      
     
   </body>
 </html>
\ No newline at end of file
diff --git a/javascripts/tables.js b/javascripts/tables.js
new file mode 100644
index 00000000..5f21b4d2
--- /dev/null
+++ b/javascripts/tables.js
@@ -0,0 +1,6 @@
+app.location$.subscribe(function() {
+  var tables = document.querySelectorAll("article table")
+  tables.forEach(function(table) {
+    new Tablesort(table)
+  })
+})
\ No newline at end of file
diff --git a/models/index.html b/models/index.html
index beb1c1d1..e0a6e069 100644
--- a/models/index.html
+++ b/models/index.html
@@ -17,7 +17,7 @@
     
     
       
-        <title>Models - Pytorch Image Models</title>
+        <title>Model Architectures - Pytorch Image Models</title>
       
     
     
@@ -51,7 +51,7 @@
     <div data-md-component="skip">
       
         
-        <a href="#architectures" class="md-skip">
+        <a href="#model-architectures" class="md-skip">
           Skip to content
         </a>
       
@@ -79,7 +79,7 @@
           </span>
           <span class="md-header-nav__topic md-ellipsis">
             
-              Models
+              Model Architectures
             
           </span>
         </div>
@@ -196,22 +196,26 @@
     
     <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
     
+      
+    
     
       <label class="md-nav__link md-nav__link--active" for="__toc">
-        Models
+        Model Architectures
         <span class="md-nav__icon md-icon">
           <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 9h14V7H3v2m0 4h14v-2H3v2m0 4h14v-2H3v2m16 0h2v-2h-2v2m0-10v2h2V7h-2m0 6h2v-2h-2v2z"/></svg>
         </span>
       </label>
     
-    <a href="./" title="Models" class="md-nav__link md-nav__link--active">
-      Models
+    <a href="./" title="Model Architectures" class="md-nav__link md-nav__link--active">
+      Model Architectures
     </a>
     
       
 <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
   
   
+    
+  
   
     <label class="md-nav__title" for="__toc">
       <span class="md-nav__icon md-icon">
@@ -222,154 +226,177 @@
     <ul class="md-nav__list" data-md-scrollfix>
       
         <li class="md-nav__item">
-  <a href="#architectures" class="md-nav__link">
-    Architectures
+  <a href="#cross-stage-partial-networks-cspnetpy" class="md-nav__link">
+    Cross-Stage Partial Networks [cspnet.py]
   </a>
   
-    <nav class="md-nav" aria-label="Architectures">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#resnet-resnext" class="md-nav__link">
-    ResNet / ResNeXt
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#densenet-densenetpy" class="md-nav__link">
+    DenseNet [densenet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#dla" class="md-nav__link">
-    DLA
+      
+        <li class="md-nav__item">
+  <a href="#dla-dlapy" class="md-nav__link">
+    DLA [dla.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#densenet" class="md-nav__link">
-    DenseNet
+      
+        <li class="md-nav__item">
+  <a href="#dual-path-networks-dpnpy" class="md-nav__link">
+    Dual-Path Networks [dpn.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#squeeze-and-excitation-resnetresnext" class="md-nav__link">
-    Squeeze-and-Excitation ResNet/ResNeXt
+      
+        <li class="md-nav__item">
+  <a href="#hrnet-hrnetpy" class="md-nav__link">
+    HRNet [hrnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#inception-v3" class="md-nav__link">
-    Inception-V3
+      
+        <li class="md-nav__item">
+  <a href="#inception-v3-inception_v3py" class="md-nav__link">
+    Inception-V3 [inception_v3.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#inception-resnet-v2-and-inception-v4" class="md-nav__link">
-    Inception-ResNet-V2 and Inception-V4
+      
+        <li class="md-nav__item">
+  <a href="#inception-v4-inception_v4py" class="md-nav__link">
+    Inception-V4 [inception_v4.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#xception" class="md-nav__link">
-    Xception
+      
+        <li class="md-nav__item">
+  <a href="#inception-resnet-v2-inception_resnet_v2py" class="md-nav__link">
+    Inception-ResNet-V2 [inception_resnet_v2.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#pnasnet-nasnet-a" class="md-nav__link">
-    PNasNet &amp; NASNet-A
+      
+        <li class="md-nav__item">
+  <a href="#nasnet-a-nasnetpy" class="md-nav__link">
+    NASNet-A [nasnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#dpn" class="md-nav__link">
-    DPN
+      
+        <li class="md-nav__item">
+  <a href="#pnasnet-5-pnasnetpy" class="md-nav__link">
+    PNasNet-5 [pnasnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#efficientnet" class="md-nav__link">
-    EfficientNet
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-efficientnetpy" class="md-nav__link">
+    EfficientNet [efficientnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#mobilenet-v3" class="md-nav__link">
-    MobileNet-V3
+      
+        <li class="md-nav__item">
+  <a href="#mobilenet-v3-mobilenetv3py" class="md-nav__link">
+    MobileNet-V3 [mobilenetv3.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#hrnet" class="md-nav__link">
-    HRNet
+      
+        <li class="md-nav__item">
+  <a href="#regnet-regnetpy" class="md-nav__link">
+    RegNet [regnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#selecsls" class="md-nav__link">
-    SelecSLS
+      
+        <li class="md-nav__item">
+  <a href="#resnet-resnext-resnetpy" class="md-nav__link">
+    ResNet, ResNeXt [resnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#tresnet" class="md-nav__link">
-    TResNet
+      
+        <li class="md-nav__item">
+  <a href="#res2net-res2netpy" class="md-nav__link">
+    Res2Net [res2net.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#regnet" class="md-nav__link">
-    RegNet
+      
+        <li class="md-nav__item">
+  <a href="#resnest-resnestpy" class="md-nav__link">
+    ResNeSt [resnest.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#vovnet-v2-v1" class="md-nav__link">
-    VovNet V2 / V1
+      
+        <li class="md-nav__item">
+  <a href="#rexnet-rexnetpy" class="md-nav__link">
+    ReXNet [rexnet.py]
   </a>
   
 </li>
-        
-      </ul>
-    </nav>
+      
+        <li class="md-nav__item">
+  <a href="#selective-kernel-networks-sknetpy" class="md-nav__link">
+    Selective-Kernel Networks [sknet.py]
+  </a>
   
 </li>
       
         <li class="md-nav__item">
-  <a href="#pretrained-imagenet-weights" class="md-nav__link">
-    Pretrained Imagenet Weights
+  <a href="#selecsls-selecslspy" class="md-nav__link">
+    SelecSLS [selecsls.py]
   </a>
   
-    <nav class="md-nav" aria-label="Pretrained Imagenet Weights">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#self-trained-weights" class="md-nav__link">
-    Self-trained Weights
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#squeeze-and-excitation-networks-senetpy" class="md-nav__link">
+    Squeeze-and-Excitation Networks [senet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#ported-weights" class="md-nav__link">
-    Ported Weights
+      
+        <li class="md-nav__item">
+  <a href="#tresnet-tresnetpy" class="md-nav__link">
+    TResNet [tresnet.py]
   </a>
   
 </li>
-        
-      </ul>
-    </nav>
+      
+        <li class="md-nav__item">
+  <a href="#vovnet-v2-and-v1-vovnetpy" class="md-nav__link">
+    VovNet V2 and V1 [vovnet.py]
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#xception-xceptionpy" class="md-nav__link">
+    Xception [xception.py]
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#xception-modified-aligned-gluon-gluon_xceptionpy" class="md-nav__link">
+    Xception (Modified Aligned, Gluon) [gluon_xception.py]
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#xception-modified-aligned-tf-aligned_xceptionpy" class="md-nav__link">
+    Xception (Modified Aligned, TF) [aligned_xception.py]
+  </a>
   
 </li>
       
@@ -386,8 +413,68 @@
 
 
   <li class="md-nav__item">
-    <a href="../changes/" title="Changes" class="md-nav__link">
-      Changes
+    <a href="../results/" title="Results" class="md-nav__link">
+      Results
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
     </a>
   </li>
 
@@ -406,6 +493,8 @@
 <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
   
   
+    
+  
   
     <label class="md-nav__title" for="__toc">
       <span class="md-nav__icon md-icon">
@@ -416,154 +505,177 @@
     <ul class="md-nav__list" data-md-scrollfix>
       
         <li class="md-nav__item">
-  <a href="#architectures" class="md-nav__link">
-    Architectures
+  <a href="#cross-stage-partial-networks-cspnetpy" class="md-nav__link">
+    Cross-Stage Partial Networks [cspnet.py]
   </a>
   
-    <nav class="md-nav" aria-label="Architectures">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#resnet-resnext" class="md-nav__link">
-    ResNet / ResNeXt
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#densenet-densenetpy" class="md-nav__link">
+    DenseNet [densenet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#dla" class="md-nav__link">
-    DLA
+      
+        <li class="md-nav__item">
+  <a href="#dla-dlapy" class="md-nav__link">
+    DLA [dla.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#densenet" class="md-nav__link">
-    DenseNet
+      
+        <li class="md-nav__item">
+  <a href="#dual-path-networks-dpnpy" class="md-nav__link">
+    Dual-Path Networks [dpn.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#squeeze-and-excitation-resnetresnext" class="md-nav__link">
-    Squeeze-and-Excitation ResNet/ResNeXt
+      
+        <li class="md-nav__item">
+  <a href="#hrnet-hrnetpy" class="md-nav__link">
+    HRNet [hrnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#inception-v3" class="md-nav__link">
-    Inception-V3
+      
+        <li class="md-nav__item">
+  <a href="#inception-v3-inception_v3py" class="md-nav__link">
+    Inception-V3 [inception_v3.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#inception-resnet-v2-and-inception-v4" class="md-nav__link">
-    Inception-ResNet-V2 and Inception-V4
+      
+        <li class="md-nav__item">
+  <a href="#inception-v4-inception_v4py" class="md-nav__link">
+    Inception-V4 [inception_v4.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#xception" class="md-nav__link">
-    Xception
+      
+        <li class="md-nav__item">
+  <a href="#inception-resnet-v2-inception_resnet_v2py" class="md-nav__link">
+    Inception-ResNet-V2 [inception_resnet_v2.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#pnasnet-nasnet-a" class="md-nav__link">
-    PNasNet &amp; NASNet-A
+      
+        <li class="md-nav__item">
+  <a href="#nasnet-a-nasnetpy" class="md-nav__link">
+    NASNet-A [nasnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#dpn" class="md-nav__link">
-    DPN
+      
+        <li class="md-nav__item">
+  <a href="#pnasnet-5-pnasnetpy" class="md-nav__link">
+    PNasNet-5 [pnasnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#efficientnet" class="md-nav__link">
-    EfficientNet
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-efficientnetpy" class="md-nav__link">
+    EfficientNet [efficientnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#mobilenet-v3" class="md-nav__link">
-    MobileNet-V3
+      
+        <li class="md-nav__item">
+  <a href="#mobilenet-v3-mobilenetv3py" class="md-nav__link">
+    MobileNet-V3 [mobilenetv3.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#hrnet" class="md-nav__link">
-    HRNet
+      
+        <li class="md-nav__item">
+  <a href="#regnet-regnetpy" class="md-nav__link">
+    RegNet [regnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#selecsls" class="md-nav__link">
-    SelecSLS
+      
+        <li class="md-nav__item">
+  <a href="#resnet-resnext-resnetpy" class="md-nav__link">
+    ResNet, ResNeXt [resnet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#tresnet" class="md-nav__link">
-    TResNet
+      
+        <li class="md-nav__item">
+  <a href="#res2net-res2netpy" class="md-nav__link">
+    Res2Net [res2net.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#regnet" class="md-nav__link">
-    RegNet
+      
+        <li class="md-nav__item">
+  <a href="#resnest-resnestpy" class="md-nav__link">
+    ResNeSt [resnest.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#vovnet-v2-v1" class="md-nav__link">
-    VovNet V2 / V1
+      
+        <li class="md-nav__item">
+  <a href="#rexnet-rexnetpy" class="md-nav__link">
+    ReXNet [rexnet.py]
   </a>
   
 </li>
-        
-      </ul>
-    </nav>
+      
+        <li class="md-nav__item">
+  <a href="#selective-kernel-networks-sknetpy" class="md-nav__link">
+    Selective-Kernel Networks [sknet.py]
+  </a>
   
 </li>
       
         <li class="md-nav__item">
-  <a href="#pretrained-imagenet-weights" class="md-nav__link">
-    Pretrained Imagenet Weights
+  <a href="#selecsls-selecslspy" class="md-nav__link">
+    SelecSLS [selecsls.py]
   </a>
   
-    <nav class="md-nav" aria-label="Pretrained Imagenet Weights">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#self-trained-weights" class="md-nav__link">
-    Self-trained Weights
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#squeeze-and-excitation-networks-senetpy" class="md-nav__link">
+    Squeeze-and-Excitation Networks [senet.py]
   </a>
   
 </li>
-        
-          <li class="md-nav__item">
-  <a href="#ported-weights" class="md-nav__link">
-    Ported Weights
+      
+        <li class="md-nav__item">
+  <a href="#tresnet-tresnetpy" class="md-nav__link">
+    TResNet [tresnet.py]
   </a>
   
 </li>
-        
-      </ul>
-    </nav>
+      
+        <li class="md-nav__item">
+  <a href="#vovnet-v2-and-v1-vovnetpy" class="md-nav__link">
+    VovNet V2 and V1 [vovnet.py]
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#xception-xceptionpy" class="md-nav__link">
+    Xception [xception.py]
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#xception-modified-aligned-gluon-gluon_xceptionpy" class="md-nav__link">
+    Xception (Modified Aligned, Gluon) [gluon_xception.py]
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#xception-modified-aligned-tf-aligned_xceptionpy" class="md-nav__link">
+    Xception (Modified Aligned, TF) [aligned_xception.py]
+  </a>
   
 </li>
       
@@ -587,1412 +699,187 @@
                   
                 
                 
-                  <h1>Models</h1>
-                
-                <h2 id="architectures">Architectures</h2>
-<h3 id="resnet-resnext">ResNet / ResNeXt</h3>
-<p>(from <a href="https://github.com/pytorch/vision/tree
-/master/torchvision/models">torchvision</a> with mods by myself)</p>
+                <h1 id="model-architectures">Model Architectures</h1>
+<p>The model architectures included come from a wide variety of sources. Sources, including papers, original impl ("reference code") that I rewrote / adapted, and PyTorch impl that I leveraged directly ("code") are listed below.</p>
+<p>Most included models have pretrained weights. The weights are either:
+1. from their original sources
+2. ported by myself from their original impl in a different framework (e.g. Tensorflow models)
+3. trained from scratch using the included training script</p>
+<p>The validation results for the pretrained weights can be found <a href="../results/">here</a></p>
+<h2 id="cross-stage-partial-networks-cspnetpy">Cross-Stage Partial Networks [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/cspnet.py">cspnet.py</a>]</h2>
+<ul>
+<li>Paper: <code>CSPNet: A New Backbone that can Enhance Learning Capability of CNN</code> - <a href="https://arxiv.org/abs/1911.11929">https://arxiv.org/abs/1911.11929</a></li>
+<li>Reference impl: <a href="https://github.com/WongKinYiu/CrossStagePartialNetworks">https://github.com/WongKinYiu/CrossStagePartialNetworks</a></li>
+</ul>
+<h2 id="densenet-densenetpy">DenseNet [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/densenet.py">densenet.py</a>]</h2>
+<ul>
+<li>Paper: <code>Densely Connected Convolutional Networks</code> - <a href="https://arxiv.org/abs/1608.06993">https://arxiv.org/abs/1608.06993</a></li>
+<li>Code: <a href="https://github.com/pytorch/vision/tree/master/torchvision/models">https://github.com/pytorch/vision/tree/master/torchvision/models</a></li>
+</ul>
+<h2 id="dla-dlapy">DLA [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/dla.py">dla.py</a>]</h2>
+<ul>
+<li>Paper: <a href="https://arxiv.org/abs/1707.06484">https://arxiv.org/abs/1707.06484</a></li>
+<li>Code: <a href="https://github.com/ucbdrive/dla">https://github.com/ucbdrive/dla</a></li>
+</ul>
+<h2 id="dual-path-networks-dpnpy">Dual-Path Networks [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/dpn.py">dpn.py</a>]</h2>
+<ul>
+<li>Paper: <code>Dual Path Networks</code> - <a href="https://arxiv.org/abs/1707.01629">https://arxiv.org/abs/1707.01629</a></li>
+<li>My PyTorch code: <a href="https://github.com/rwightman/pytorch-dpn-pretrained">https://github.com/rwightman/pytorch-dpn-pretrained</a></li>
+<li>Reference code: <a href="https://github.com/cypw/DPNs">https://github.com/cypw/DPNs</a></li>
+</ul>
+<h2 id="hrnet-hrnetpy">HRNet [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hrnet.py">hrnet.py</a>]</h2>
+<ul>
+<li>Paper: <code>Deep High-Resolution Representation Learning for Visual Recognition</code> - <a href="https://arxiv.org/abs/1908.07919">https://arxiv.org/abs/1908.07919</a></li>
+<li>Code: <a href="https://github.com/HRNet/HRNet-Image-Classification">https://github.com/HRNet/HRNet-Image-Classification</a></li>
+</ul>
+<h2 id="inception-v3-inception_v3py">Inception-V3 [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/inception_v3.py">inception_v3.py</a>]</h2>
 <ul>
-<li>ResNet-18, ResNet-34, ResNet-50, ResNet-101, ResNet-152, ResNeXt50 (32x4d), ResNeXt101 (32x4d and 64x4d)</li>
-<li>'Bag of Tricks' / Gluon C, D, E, S variations (<a href="https://arxiv.org/abs/1812.01187">https://arxiv.org/abs/1812.01187</a>)</li>
-<li>Instagram trained / ImageNet tuned ResNeXt101-32x8d to 32x48d from from <a href="https://pytorch.org/hub/facebookresearch_WSL-Images_resnext/">facebookresearch</a></li>
-<li>Res2Net (<a href="https://github.com/gasvn/Res2Net">https://github.com/gasvn/Res2Net</a>, <a href="https://arxiv.org/abs/1904.01169">https://arxiv.org/abs/1904.01169</a>)</li>
-<li>Selective Kernel (SK) Nets (<a href="https://arxiv.org/abs/1903.06586">https://arxiv.org/abs/1903.06586</a>)</li>
-<li>ResNeSt (code adapted from <a href="https://github.com/zhanghang1989/ResNeSt">https://github.com/zhanghang1989/ResNeSt</a>, paper <a href="https://arxiv.org/abs/2004.08955">https://arxiv.org/abs/2004.08955</a>)</li>
+<li>Paper: <code>Rethinking the Inception Architecture for Computer Vision</code> - <a href="https://arxiv.org/abs/1512.00567">https://arxiv.org/abs/1512.00567</a></li>
+<li>Code: <a href="https://github.com/pytorch/vision/tree/master/torchvision/models">https://github.com/pytorch/vision/tree/master/torchvision/models</a></li>
 </ul>
-<h3 id="dla">DLA</h3>
+<h2 id="inception-v4-inception_v4py">Inception-V4 [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/inception_v4.py">inception_v4.py</a>]</h2>
 <ul>
-<li>Original (<a href="https://github.com/ucbdrive/dla">https://github.com/ucbdrive/dla</a>, <a href="https://arxiv.org/abs/1707.06484">https://arxiv.org/abs/1707.06484</a>)</li>
-<li>Res2Net (<a href="https://github.com/gasvn/Res2Net">https://github.com/gasvn/Res2Net</a>, <a href="https://arxiv.org/abs/1904.01169">https://arxiv.org/abs/1904.01169</a>)</li>
+<li>Paper: <code>Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning</code> - <a href="https://arxiv.org/abs/1602.07261">https://arxiv.org/abs/1602.07261</a></li>
+<li>Code: <a href="https://github.com/Cadene/pretrained-models.pytorch">https://github.com/Cadene/pretrained-models.pytorch</a></li>
+<li>Reference code: <a href="https://github.com/tensorflow/models/tree/master/research/slim/nets">https://github.com/tensorflow/models/tree/master/research/slim/nets</a></li>
 </ul>
-<h3 id="densenet">DenseNet</h3>
-<p>(from <a href="https://github.com/pytorch/vision/tree/master
-/torchvision/models">torchvision</a>)</p>
+<h2 id="inception-resnet-v2-inception_resnet_v2py">Inception-ResNet-V2 [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/inception_resnet_v2.py">inception_resnet_v2.py</a>]</h2>
 <ul>
-<li>DenseNet-121, DenseNet-169, DenseNet-201, DenseNet-161</li>
+<li>Paper: <code>Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning</code> - <a href="https://arxiv.org/abs/1602.07261">https://arxiv.org/abs/1602.07261</a></li>
+<li>Code: <a href="https://github.com/Cadene/pretrained-models.pytorch">https://github.com/Cadene/pretrained-models.pytorch</a></li>
+<li>Reference code: <a href="https://github.com/tensorflow/models/tree/master/research/slim/nets">https://github.com/tensorflow/models/tree/master/research/slim/nets</a></li>
 </ul>
-<h3 id="squeeze-and-excitation-resnetresnext">Squeeze-and-Excitation ResNet/ResNeXt</h3>
-<p>(from <a href="https://github.com
-/Cadene/pretrained-models.pytorch">Cadene</a> with some pretrained weight additions by myself)</p>
+<h2 id="nasnet-a-nasnetpy">NASNet-A [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/nasnet.py">nasnet.py</a>]</h2>
 <ul>
-<li>SENet-154, SE-ResNet-18, SE-ResNet-34, SE-ResNet-50, SE-ResNet-101, SE-ResNet-152, SE-ResNeXt-26 (32x4d), SE-ResNeXt50 (32x4d), SE-ResNeXt101 (32x4d)</li>
+<li>Papers: <code>Learning Transferable Architectures for Scalable Image Recognition</code> - <a href="https://arxiv.org/abs/1707.07012">https://arxiv.org/abs/1707.07012</a></li>
+<li>Code: <a href="https://github.com/Cadene/pretrained-models.pytorch">https://github.com/Cadene/pretrained-models.pytorch</a></li>
+<li>Reference code: <a href="https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet">https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet</a></li>
 </ul>
-<h3 id="inception-v3">Inception-V3</h3>
-<p>(from <a href="https://github.com/pytorch/vision/tree/master/torchvision/models">torchvision</a>)</p>
-<h3 id="inception-resnet-v2-and-inception-v4">Inception-ResNet-V2 and Inception-V4</h3>
-<p>(from <a href="https://github.com/Cadene/pretrained-models.pytorch">Cadene</a> )</p>
-<h3 id="xception">Xception</h3>
+<h2 id="pnasnet-5-pnasnetpy">PNasNet-5 [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/pnasnet.py">pnasnet.py</a>]</h2>
 <ul>
-<li>Original variant from <a href="https://github.com/Cadene/pretrained-models.pytorch">Cadene</a></li>
-<li>MXNet Gluon 'modified aligned' Xception-65 and 71 models from <a href="https://github.com/dmlc/gluon-cv/tree/master/gluoncv/model_zoo">Gluon ModelZoo</a></li>
+<li>Papers: <code>Progressive Neural Architecture Search</code> - <a href="https://arxiv.org/abs/1712.00559">https://arxiv.org/abs/1712.00559</a></li>
+<li>Code: <a href="https://github.com/Cadene/pretrained-models.pytorch">https://github.com/Cadene/pretrained-models.pytorch</a></li>
+<li>Reference code: <a href="https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet">https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet</a></li>
 </ul>
-<h3 id="pnasnet-nasnet-a">PNasNet &amp; NASNet-A</h3>
-<p>(from <a href="https://github.com/Cadene/pretrained-models.pytorch">Cadene</a>)</p>
-<h3 id="dpn">DPN</h3>
-<p>(from <a href="https://github.com/rwightman/pytorch-dpn-pretrained">myself</a>)</p>
+<h2 id="efficientnet-efficientnetpy">EfficientNet [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py">efficientnet.py</a>]</h2>
 <ul>
-<li>DPN-68, DPN-68b, DPN-92, DPN-98, DPN-131, DPN-107</li>
+<li>Papers<ul>
+<li>EfficientNet NoisyStudent (B0-B7, L2) - <a href="https://arxiv.org/abs/1911.04252">https://arxiv.org/abs/1911.04252</a></li>
+<li>EfficientNet AdvProp (B0-B8) - <a href="https://arxiv.org/abs/1911.09665">https://arxiv.org/abs/1911.09665</a></li>
+<li>EfficientNet (B0-B7) - <a href="https://arxiv.org/abs/1905.11946">https://arxiv.org/abs/1905.11946</a></li>
+<li>EfficientNet-EdgeTPU (S, M, L) - <a href="https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html">https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html</a></li>
+<li>MixNet - <a href="https://arxiv.org/abs/1907.09595">https://arxiv.org/abs/1907.09595</a></li>
+<li>MNASNet B1, A1 (Squeeze-Excite), and Small - <a href="https://arxiv.org/abs/1807.11626">https://arxiv.org/abs/1807.11626</a></li>
+<li>MobileNet-V2 - <a href="https://arxiv.org/abs/1801.04381">https://arxiv.org/abs/1801.04381</a></li>
+<li>FBNet-C - <a href="https://arxiv.org/abs/1812.03443">https://arxiv.org/abs/1812.03443</a></li>
+<li>Single-Path NAS - <a href="https://arxiv.org/abs/1904.02877">https://arxiv.org/abs/1904.02877</a></li>
 </ul>
-<h3 id="efficientnet">EfficientNet</h3>
-<p>(from my standalone <a href="https://github.com/rwightman/gen-efficientnet-pytorch">GenEfficientNet</a>) - A generic model that implements many of the efficient models that utilize similar DepthwiseSeparable and InvertedResidual blocks</p>
+</li>
+<li>My PyTorch code: <a href="https://github.com/rwightman/gen-efficientnet-pytorch">https://github.com/rwightman/gen-efficientnet-pytorch</a></li>
+<li>Reference code: <a href="https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet">https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet</a></li>
+</ul>
+<h2 id="mobilenet-v3-mobilenetv3py">MobileNet-V3 [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mobilenetv3.py">mobilenetv3.py</a>]</h2>
+<ul>
+<li>Paper: <code>Searching for MobileNetV3</code> - <a href="https://arxiv.org/abs/1905.02244">https://arxiv.org/abs/1905.02244</a></li>
+<li>Reference code: <a href="https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet">https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet</a></li>
+</ul>
+<h2 id="regnet-regnetpy">RegNet [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/regnet.py">regnet.py</a>]</h2>
+<ul>
+<li>Paper: <code>Designing Network Design Spaces</code> - <a href="https://arxiv.org/abs/2003.13678">https://arxiv.org/abs/2003.13678</a></li>
+<li>Reference code: <a href="https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py">https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py</a></li>
+</ul>
+<h2 id="resnet-resnext-resnetpy">ResNet, ResNeXt [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/resnet.py">resnet.py</a>]</h2>
+<ul>
+<li>ResNet (V1B)<ul>
+<li>Paper: <code>Deep Residual Learning for Image Recognition</code> - <a href="https://arxiv.org/abs/1512.03385">https://arxiv.org/abs/1512.03385</a></li>
+<li>Code: <a href="https://github.com/pytorch/vision/tree/master/torchvision/models">https://github.com/pytorch/vision/tree/master/torchvision/models</a></li>
+</ul>
+</li>
+<li>ResNeXt<ul>
+<li>Paper: <code>Aggregated Residual Transformations for Deep Neural Networks</code> - <a href="https://arxiv.org/abs/1611.05431">https://arxiv.org/abs/1611.05431</a></li>
+<li>Code: <a href="https://github.com/pytorch/vision/tree/master/torchvision/models">https://github.com/pytorch/vision/tree/master/torchvision/models</a></li>
+</ul>
+</li>
+<li>'Bag of Tricks' / Gluon C, D, E, S ResNet variants<ul>
+<li>Paper: <code>Bag of Tricks for Image Classification with CNNs</code> - <a href="https://arxiv.org/abs/1812.01187">https://arxiv.org/abs/1812.01187</a></li>
+<li>Code: <a href="https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/resnetv1b.py">https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/resnetv1b.py</a></li>
+</ul>
+</li>
+<li>Instagram pretrained / ImageNet tuned ResNeXt101<ul>
+<li>Paper: <code>Exploring the Limits of Weakly Supervised Pretraining</code> - <a href="https://arxiv.org/abs/1805.00932">https://arxiv.org/abs/1805.00932</a></li>
+<li>Weights: <a href="https://pytorch.org/hub/facebookresearch_WSL-Images_resnext">https://pytorch.org/hub/facebookresearch_WSL-Images_resnext</a> (NOTE: CC BY-NC 4.0 License, NOT commercial friendly)</li>
+</ul>
+</li>
+<li>Semi-supervised (SSL) / Semi-weakly Supervised (SWSL) ResNet and ResNeXts<ul>
+<li>Paper: <code>Billion-scale semi-supervised learning for image classification</code> - <a href="https://arxiv.org/abs/1905.00546">https://arxiv.org/abs/1905.00546</a></li>
+<li>Weights: <a href="https://github.com/facebookresearch/semi-supervised-ImageNet1K-models">https://github.com/facebookresearch/semi-supervised-ImageNet1K-models</a> (NOTE: CC BY-NC 4.0 License, NOT commercial friendly)</li>
+</ul>
+</li>
+<li>Squeeze-and-Excitation Networks<ul>
+<li>Paper: <code>Squeeze-and-Excitation Networks</code> - <a href="https://arxiv.org/abs/1709.01507">https://arxiv.org/abs/1709.01507</a></li>
+<li>Code: Added to ResNet base, this is current version going forward, old <code>senet.py</code> is being deprecated</li>
+</ul>
+</li>
+<li>ECAResNet (ECA-Net)<ul>
+<li>Paper: <code>ECA-Net: Efficient Channel Attention for Deep CNN</code> - <a href="https://arxiv.org/abs/1910.03151v4">https://arxiv.org/abs/1910.03151v4</a></li>
+<li>Code: Added to ResNet base, ECA module contributed by @VRandme, reference <a href="https://github.com/BangguWu/ECANet">https://github.com/BangguWu/ECANet</a></li>
+</ul>
+</li>
+</ul>
+<h2 id="res2net-res2netpy">Res2Net [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/res2net.py">res2net.py</a>]</h2>
+<ul>
+<li>Paper: <code>Res2Net: A New Multi-scale Backbone Architecture</code> - <a href="https://arxiv.org/abs/1904.01169">https://arxiv.org/abs/1904.01169</a></li>
+<li>Code: <a href="https://github.com/gasvn/Res2Net">https://github.com/gasvn/Res2Net</a></li>
+</ul>
+<h2 id="resnest-resnestpy">ResNeSt [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/resnest.py">resnest.py</a>]</h2>
 <ul>
-<li>EfficientNet NoisyStudent (B0-B7, L2) (<a href="https://arxiv.org/abs/1911.04252">https://arxiv.org/abs/1911.04252</a>)</li>
-<li>EfficientNet AdvProp (B0-B8) (<a href="https://arxiv.org/abs/1911.09665">https://arxiv.org/abs/1911.09665</a>)</li>
-<li>EfficientNet (B0-B7) (<a href="https://arxiv.org/abs/1905.11946">https://arxiv.org/abs/1905.11946</a>)</li>
-<li>EfficientNet-EdgeTPU (S, M, L) (<a href="https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html">https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html</a>)</li>
-<li>MixNet (<a href="https://arxiv.org/abs/1907.09595">https://arxiv.org/abs/1907.09595</a>)</li>
-<li>MNASNet B1, A1 (Squeeze-Excite), and Small (<a href="https://arxiv.org/abs/1807.11626">https://arxiv.org/abs/1807.11626</a>)</li>
-<li>MobileNet-V2 (<a href="https://arxiv.org/abs/1801.04381">https://arxiv.org/abs/1801.04381</a>)    </li>
-<li>FBNet-C (<a href="https://arxiv.org/abs/1812.03443">https://arxiv.org/abs/1812.03443</a>)</li>
-<li>Single-Path NAS (<a href="https://arxiv.org/abs/1904.02877">https://arxiv.org/abs/1904.02877</a>)</li>
+<li>Paper: <code>ResNeSt: Split-Attention Networks</code> - <a href="https://arxiv.org/abs/2004.08955">https://arxiv.org/abs/2004.08955</a></li>
+<li>Code: <a href="https://github.com/zhanghang1989/ResNeSt">https://github.com/zhanghang1989/ResNeSt</a></li>
 </ul>
-<h3 id="mobilenet-v3">MobileNet-V3</h3>
-<p>(<a href="https://arxiv.org/abs/1905.02244">https://arxiv.org/abs/1905.02244</a>)</p>
-<h3 id="hrnet">HRNet</h3>
+<h2 id="rexnet-rexnetpy">ReXNet [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/rexnet.py">rexnet.py</a>]</h2>
 <ul>
-<li>code from <a href="https://github.com/HRNet/HRNet-Image-Classification">https://github.com/HRNet/HRNet-Image-Classification</a>, paper <a href="https://arxiv.org/abs/1908.07919">https://arxiv.org/abs/1908.07919</a></li>
+<li>Paper: <code>ReXNet: Diminishing Representational Bottleneck on CNN</code> - <a href="https://arxiv.org/abs/2007.00992">https://arxiv.org/abs/2007.00992</a></li>
+<li>Code: <a href="https://github.com/clovaai/rexnet">https://github.com/clovaai/rexnet</a></li>
 </ul>
-<h3 id="selecsls">SelecSLS</h3>
+<h2 id="selective-kernel-networks-sknetpy">Selective-Kernel Networks [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/sknet.py">sknet.py</a>]</h2>
 <ul>
-<li>code from <a href="https://github.com/mehtadushy/SelecSLS-Pytorch">https://github.com/mehtadushy/SelecSLS-Pytorch</a>, paper <a href="https://arxiv.org/abs/1907.00837">https://arxiv.org/abs/1907.00837</a></li>
+<li>Paper: <code>Selective-Kernel Networks</code> - <a href="https://arxiv.org/abs/1903.06586">https://arxiv.org/abs/1903.06586</a></li>
+<li>Code: <a href="https://github.com/implus/SKNet">https://github.com/implus/SKNet</a>, <a href="https://github.com/clovaai/assembled-cnn">https://github.com/clovaai/assembled-cnn</a></li>
 </ul>
-<h3 id="tresnet">TResNet</h3>
+<h2 id="selecsls-selecslspy">SelecSLS [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/selecsls.py">selecsls.py</a>]</h2>
 <ul>
-<li>code from <a href="https://github.com/mrT23/TResNet">https://github.com/mrT23/TResNet</a>, paper <a href="https://arxiv.org/abs/2003.13630">https://arxiv.org/abs/2003.13630</a></li>
+<li>Paper: <code>XNect: Real-time Multi-Person 3D Motion Capture with a Single RGB Camera</code> - <a href="https://arxiv.org/abs/1907.00837">https://arxiv.org/abs/1907.00837</a></li>
+<li>Code: <a href="https://github.com/mehtadushy/SelecSLS-Pytorch">https://github.com/mehtadushy/SelecSLS-Pytorch</a></li>
 </ul>
-<h3 id="regnet">RegNet</h3>
+<h2 id="squeeze-and-excitation-networks-senetpy">Squeeze-and-Excitation Networks [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/senet.py">senet.py</a>]</h2>
+<p>NOTE: I am deprecating this version of the networks, the new ones are part of <code>resnet.py</code>
+* Paper: <code>Squeeze-and-Excitation Networks</code> - <a href="https://arxiv.org/abs/1709.01507">https://arxiv.org/abs/1709.01507</a>
+* Code: <a href="https://github.com/Cadene/pretrained-models.pytorch">https://github.com/Cadene/pretrained-models.pytorch</a> </p>
+<h2 id="tresnet-tresnetpy">TResNet [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/tresnet.py">tresnet.py</a>]</h2>
 <ul>
-<li>paper <code>Designing Network Design Spaces</code> - <a href="https://arxiv.org/abs/2003.13678">https://arxiv.org/abs/2003.13678</a></li>
-<li>reference code at <a href="https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py">https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py</a></li>
+<li>Paper: <code>TResNet: High Performance GPU-Dedicated Architecture</code> - <a href="https://arxiv.org/abs/2003.13630">https://arxiv.org/abs/2003.13630</a></li>
+<li>Code: <a href="https://github.com/mrT23/TResNet">https://github.com/mrT23/TResNet</a></li>
 </ul>
-<h3 id="vovnet-v2-v1">VovNet V2 / V1</h3>
+<h2 id="vovnet-v2-and-v1-vovnetpy">VovNet V2 and V1 [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vovnet.py">vovnet.py</a>]</h2>
 <ul>
-<li>paper <code>CenterMask : Real-Time Anchor-Free Instance Segmentation</code> - <a href="https://arxiv.org/abs/1911.06667">https://arxiv.org/abs/1911.06667</a></li>
-<li>reference code at <a href="https://github.com/youngwanLEE/vovnet-detectron2">https://github.com/youngwanLEE/vovnet-detectron2</a></li>
+<li>Paper: <code>CenterMask : Real-Time Anchor-Free Instance Segmentation</code> - <a href="https://arxiv.org/abs/1911.06667">https://arxiv.org/abs/1911.06667</a></li>
+<li>Reference code: <a href="https://github.com/youngwanLEE/vovnet-detectron2">https://github.com/youngwanLEE/vovnet-detectron2</a></li>
+</ul>
+<h2 id="xception-xceptionpy">Xception [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/xception.py">xception.py</a>]</h2>
+<ul>
+<li>Paper: <code>Xception: Deep Learning with Depthwise Separable Convolutions</code> - <a href="https://arxiv.org/abs/1610.02357">https://arxiv.org/abs/1610.02357</a></li>
+<li>Code: <a href="https://github.com/Cadene/pretrained-models.pytorch">https://github.com/Cadene/pretrained-models.pytorch</a></li>
+</ul>
+<h2 id="xception-modified-aligned-gluon-gluon_xceptionpy">Xception (Modified Aligned, Gluon) [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/gluon_xception.py">gluon_xception.py</a>]</h2>
+<ul>
+<li>Paper: <code>Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation</code> - <a href="https://arxiv.org/abs/1802.02611">https://arxiv.org/abs/1802.02611</a></li>
+<li>Reference code: <a href="https://github.com/dmlc/gluon-cv/tree/master/gluoncv/model_zoo">https://github.com/dmlc/gluon-cv/tree/master/gluoncv/model_zoo</a>, <a href="https://github.com/jfzhang95/pytorch-deeplab-xception/">https://github.com/jfzhang95/pytorch-deeplab-xception/</a></li>
+</ul>
+<h2 id="xception-modified-aligned-tf-aligned_xceptionpy">Xception (Modified Aligned, TF) [<a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/aligned_xception.py">aligned_xception.py</a>]</h2>
+<ul>
+<li>Paper: <code>Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation</code> - <a href="https://arxiv.org/abs/1802.02611">https://arxiv.org/abs/1802.02611</a></li>
+<li>Reference code: <a href="https://github.com/tensorflow/models/tree/master/research/deeplab">https://github.com/tensorflow/models/tree/master/research/deeplab</a></li>
 </ul>
-<p>Use the  <code>--model</code> arg to specify model for train, validation, inference scripts. Match the all lowercase
-creation fn for the model you'd like.</p>
-<h2 id="pretrained-imagenet-weights">Pretrained Imagenet Weights</h2>
-<h3 id="self-trained-weights">Self-trained Weights</h3>
-<table>
-<thead>
-<tr>
-<th>Model</th>
-<th>Prec@1 (Err)</th>
-<th>Prec@5 (Err)</th>
-<th>Param #</th>
-<th>Image Scaling</th>
-<th>Image Size</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>efficientnet_b3a</td>
-<td>81.874 (18.126)</td>
-<td>95.840 (4.160)</td>
-<td>12.23M</td>
-<td>bicubic</td>
-<td>320 (1.0 crop)</td>
-</tr>
-<tr>
-<td>efficientnet_b3</td>
-<td>81.498 (18.502)</td>
-<td>95.718 (4.282)</td>
-<td>12.23M</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>skresnext50d_32x4d</td>
-<td>81.278 (18.722)</td>
-<td>95.366 (4.634)</td>
-<td>27.5M</td>
-<td>bicubic</td>
-<td>288 (1.0 crop)</td>
-</tr>
-<tr>
-<td>efficientnet_b2a</td>
-<td>80.608 (19.392)</td>
-<td>95.310 (4.690)</td>
-<td>9.11M</td>
-<td>bicubic</td>
-<td>288 (1.0 crop)</td>
-</tr>
-<tr>
-<td>mixnet_xl</td>
-<td>80.478 (19.522)</td>
-<td>94.932 (5.068)</td>
-<td>11.90M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>efficientnet_b2</td>
-<td>80.402 (19.598)</td>
-<td>95.076 (4.924)</td>
-<td>9.11M</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>skresnext50d_32x4d</td>
-<td>80.156 (19.844)</td>
-<td>94.642 (5.358)</td>
-<td>27.5M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>resnext50_32x4d</td>
-<td>79.762 (20.238)</td>
-<td>94.600 (5.400)</td>
-<td>25M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>resnext50d_32x4d</td>
-<td>79.674 (20.326)</td>
-<td>94.868 (5.132)</td>
-<td>25.1M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>ese_vovnet39b</td>
-<td>79.320 (20.680)</td>
-<td>94.710 (5.290)</td>
-<td>24.6M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>resnetblur50</td>
-<td>79.290 (20.710)</td>
-<td>94.632 (5.368)</td>
-<td>25.6M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>resnet50</td>
-<td>79.038 (20.962)</td>
-<td>94.390 (5.610)</td>
-<td>25.6M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mixnet_l</td>
-<td>78.976 (21.024</td>
-<td>94.184 (5.816)</td>
-<td>7.33M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>efficientnet_b1</td>
-<td>78.692 (21.308)</td>
-<td>94.086 (5.914)</td>
-<td>7.79M</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>efficientnet_es</td>
-<td>78.066 (21.934)</td>
-<td>93.926 (6.074)</td>
-<td>5.44M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>seresnext26t_32x4d</td>
-<td>77.998 (22.002)</td>
-<td>93.708 (6.292)</td>
-<td>16.8M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>seresnext26tn_32x4d</td>
-<td>77.986 (22.014)</td>
-<td>93.746 (6.254)</td>
-<td>16.8M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>efficientnet_b0</td>
-<td>77.698 (22.302)</td>
-<td>93.532 (6.468)</td>
-<td>5.29M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>seresnext26d_32x4d</td>
-<td>77.602 (22.398)</td>
-<td>93.608 (6.392)</td>
-<td>16.8M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mobilenetv2_120d</td>
-<td>77.294 (22.706</td>
-<td>93.502 (6.498)</td>
-<td>5.8M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mixnet_m</td>
-<td>77.256 (22.744)</td>
-<td>93.418 (6.582)</td>
-<td>5.01M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>seresnext26_32x4d</td>
-<td>77.104 (22.896)</td>
-<td>93.316 (6.684)</td>
-<td>16.8M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>skresnet34</td>
-<td>76.912 (23.088)</td>
-<td>93.322 (6.678)</td>
-<td>22.2M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>ese_vovnet19b_dw</td>
-<td>76.798 (23.202)</td>
-<td>93.268 (6.732)</td>
-<td>6.5M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>resnet26d</td>
-<td>76.68 (23.32)</td>
-<td>93.166 (6.834)</td>
-<td>16M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>densenetblur121d</td>
-<td>76.576 (23.424)</td>
-<td>93.190 (6.810)</td>
-<td>8.0M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mobilenetv2_140</td>
-<td>76.524 (23.476)</td>
-<td>92.990 (7.010)</td>
-<td>6.1M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mixnet_s</td>
-<td>75.988 (24.012)</td>
-<td>92.794 (7.206)</td>
-<td>4.13M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mobilenetv3_large_100</td>
-<td>75.766 (24.234)</td>
-<td>92.542 (7.458)</td>
-<td>5.5M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mobilenetv3_rw</td>
-<td>75.634 (24.366)</td>
-<td>92.708 (7.292)</td>
-<td>5.5M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mnasnet_a1</td>
-<td>75.448 (24.552)</td>
-<td>92.604 (7.396)</td>
-<td>3.89M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>resnet26</td>
-<td>75.292 (24.708)</td>
-<td>92.57 (7.43)</td>
-<td>16M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>fbnetc_100</td>
-<td>75.124 (24.876)</td>
-<td>92.386 (7.614)</td>
-<td>5.6M</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>resnet34</td>
-<td>75.110 (24.890)</td>
-<td>92.284 (7.716)</td>
-<td>22M</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mobilenetv2_110d</td>
-<td>75.052 (24.948)</td>
-<td>92.180 (7.820)</td>
-<td>4.5M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>seresnet34</td>
-<td>74.808 (25.192)</td>
-<td>92.124 (7.876)</td>
-<td>22M</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mnasnet_b1</td>
-<td>74.658 (25.342)</td>
-<td>92.114 (7.886)</td>
-<td>4.38M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>spnasnet_100</td>
-<td>74.084 (25.916)</td>
-<td>91.818 (8.182)</td>
-<td>4.42M</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>skresnet18</td>
-<td>73.038 (26.962)</td>
-<td>91.168 (8.832)</td>
-<td>11.9M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>mobilenetv2_100</td>
-<td>72.978 (27.022)</td>
-<td>91.016 (8.984)</td>
-<td>3.5M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>seresnet18</td>
-<td>71.742 (28.258)</td>
-<td>90.334 (9.666)</td>
-<td>11.8M</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-</tbody>
-</table>
-<h3 id="ported-weights">Ported Weights</h3>
-<p>For the models below, the model code and weight porting from Tensorflow or MXNet Gluon to Pytorch was done by myself. There are weights/models ported by others included in this repository, they are not listed below.</p>
-<table>
-<thead>
-<tr>
-<th>Model</th>
-<th>Prec@1 (Err)</th>
-<th>Prec@5 (Err)</th>
-<th>Param #</th>
-<th>Image Scaling</th>
-<th>Image Size</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>tf_efficientnet_l2_ns *tfp</td>
-<td>88.352 (11.648)</td>
-<td>98.652 (1.348)</td>
-<td>480</td>
-<td>bicubic</td>
-<td>800</td>
-</tr>
-<tr>
-<td>tf_efficientnet_l2_ns</td>
-<td>TBD</td>
-<td>TBD</td>
-<td>480</td>
-<td>bicubic</td>
-<td>800</td>
-</tr>
-<tr>
-<td>tf_efficientnet_l2_ns_475</td>
-<td>88.234 (11.766)</td>
-<td>98.546 (1.454)f</td>
-<td>480</td>
-<td>bicubic</td>
-<td>475</td>
-</tr>
-<tr>
-<td>tf_efficientnet_l2_ns_475 *tfp</td>
-<td>88.172 (11.828)</td>
-<td>98.566 (1.434)</td>
-<td>480</td>
-<td>bicubic</td>
-<td>475</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b7_ns *tfp</td>
-<td>86.844 (13.156)</td>
-<td>98.084 (1.916)</td>
-<td>66.35</td>
-<td>bicubic</td>
-<td>600</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b7_ns</td>
-<td>86.840 (13.160)</td>
-<td>98.094 (1.906)</td>
-<td>66.35</td>
-<td>bicubic</td>
-<td>600</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b6_ns</td>
-<td>86.452 (13.548)</td>
-<td>97.882 (2.118)</td>
-<td>43.04</td>
-<td>bicubic</td>
-<td>528</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b6_ns *tfp</td>
-<td>86.444 (13.556)</td>
-<td>97.880 (2.120)</td>
-<td>43.04</td>
-<td>bicubic</td>
-<td>528</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b5_ns *tfp</td>
-<td>86.064 (13.936)</td>
-<td>97.746 (2.254)</td>
-<td>30.39</td>
-<td>bicubic</td>
-<td>456</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b5_ns</td>
-<td>86.088 (13.912)</td>
-<td>97.752 (2.248)</td>
-<td>30.39</td>
-<td>bicubic</td>
-<td>456</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b8_ap *tfp</td>
-<td>85.436 (14.564)</td>
-<td>97.272 (2.728)</td>
-<td>87.4</td>
-<td>bicubic</td>
-<td>672</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b8 *tfp</td>
-<td>85.384 (14.616)</td>
-<td>97.394 (2.606)</td>
-<td>87.4</td>
-<td>bicubic</td>
-<td>672</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b8</td>
-<td>85.370 (14.630)</td>
-<td>97.390 (2.610)</td>
-<td>87.4</td>
-<td>bicubic</td>
-<td>672</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b8_ap</td>
-<td>85.368 (14.632)</td>
-<td>97.294 (2.706)</td>
-<td>87.4</td>
-<td>bicubic</td>
-<td>672</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b4_ns *tfp</td>
-<td>85.298 (14.702)</td>
-<td>97.504 (2.496)</td>
-<td>19.34</td>
-<td>bicubic</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b4_ns</td>
-<td>85.162 (14.838)</td>
-<td>97.470 (2.530)</td>
-<td>19.34</td>
-<td>bicubic</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b7_ap *tfp</td>
-<td>85.154 (14.846)</td>
-<td>97.244 (2.756)</td>
-<td>66.35</td>
-<td>bicubic</td>
-<td>600</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b7_ap</td>
-<td>85.118 (14.882)</td>
-<td>97.252 (2.748)</td>
-<td>66.35</td>
-<td>bicubic</td>
-<td>600</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b7 *tfp</td>
-<td>84.940 (15.060)</td>
-<td>97.214 (2.786)</td>
-<td>66.35</td>
-<td>bicubic</td>
-<td>600</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b7</td>
-<td>84.932 (15.068)</td>
-<td>97.208 (2.792)</td>
-<td>66.35</td>
-<td>bicubic</td>
-<td>600</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b6_ap</td>
-<td>84.786 (15.214)</td>
-<td>97.138 (2.862)</td>
-<td>43.04</td>
-<td>bicubic</td>
-<td>528</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b6_ap *tfp</td>
-<td>84.760 (15.240)</td>
-<td>97.124 (2.876)</td>
-<td>43.04</td>
-<td>bicubic</td>
-<td>528</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b5_ap *tfp</td>
-<td>84.276 (15.724)</td>
-<td>96.932 (3.068)</td>
-<td>30.39</td>
-<td>bicubic</td>
-<td>456</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b5_ap</td>
-<td>84.254 (15.746)</td>
-<td>96.976 (3.024)</td>
-<td>30.39</td>
-<td>bicubic</td>
-<td>456</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b6 *tfp</td>
-<td>84.140 (15.860)</td>
-<td>96.852 (3.148)</td>
-<td>43.04</td>
-<td>bicubic</td>
-<td>528</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b6</td>
-<td>84.110 (15.890)</td>
-<td>96.886 (3.114)</td>
-<td>43.04</td>
-<td>bicubic</td>
-<td>528</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b3_ns *tfp</td>
-<td>84.054 (15.946)</td>
-<td>96.918 (3.082)</td>
-<td>12.23</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b3_ns</td>
-<td>84.048 (15.952)</td>
-<td>96.910 (3.090)</td>
-<td>12.23</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b5 *tfp</td>
-<td>83.822 (16.178)</td>
-<td>96.756 (3.244)</td>
-<td>30.39</td>
-<td>bicubic</td>
-<td>456</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b5</td>
-<td>83.812 (16.188)</td>
-<td>96.748 (3.252)</td>
-<td>30.39</td>
-<td>bicubic</td>
-<td>456</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b4_ap *tfp</td>
-<td>83.278 (16.722)</td>
-<td>96.376 (3.624)</td>
-<td>19.34</td>
-<td>bicubic</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b4_ap</td>
-<td>83.248 (16.752)</td>
-<td>96.388 (3.612)</td>
-<td>19.34</td>
-<td>bicubic</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b4</td>
-<td>83.022 (16.978)</td>
-<td>96.300 (3.700)</td>
-<td>19.34</td>
-<td>bicubic</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b4 *tfp</td>
-<td>82.948 (17.052)</td>
-<td>96.308 (3.692)</td>
-<td>19.34</td>
-<td>bicubic</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b2_ns *tfp</td>
-<td>82.436 (17.564)</td>
-<td>96.268 (3.732)</td>
-<td>9.11</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b2_ns</td>
-<td>82.380 (17.620)</td>
-<td>96.248 (3.752)</td>
-<td>9.11</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b3_ap *tfp</td>
-<td>81.882 (18.118)</td>
-<td>95.662 (4.338)</td>
-<td>12.23</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b3_ap</td>
-<td>81.828 (18.172)</td>
-<td>95.624 (4.376)</td>
-<td>12.23</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b3</td>
-<td>81.636 (18.364)</td>
-<td>95.718 (4.282)</td>
-<td>12.23</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b3 *tfp</td>
-<td>81.576 (18.424)</td>
-<td>95.662 (4.338)</td>
-<td>12.23</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite4</td>
-<td>81.528 (18.472)</td>
-<td>95.668 (4.332)</td>
-<td>13.00</td>
-<td>bilinear</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b1_ns *tfp</td>
-<td>81.514 (18.486)</td>
-<td>95.776 (4.224)</td>
-<td>7.79</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite4 *tfp</td>
-<td>81.502 (18.498)</td>
-<td>95.676 (4.324)</td>
-<td>13.00</td>
-<td>bilinear</td>
-<td>380</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b1_ns</td>
-<td>81.388 (18.612)</td>
-<td>95.738 (4.262)</td>
-<td>7.79</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>gluon_senet154</td>
-<td>81.224 (18.776)</td>
-<td>95.356 (4.644)</td>
-<td>115.09</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_resnet152_v1s</td>
-<td>81.012 (18.988)</td>
-<td>95.416 (4.584)</td>
-<td>60.32</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_seresnext101_32x4d</td>
-<td>80.902 (19.098)</td>
-<td>95.294 (4.706)</td>
-<td>48.96</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_seresnext101_64x4d</td>
-<td>80.890 (19.110)</td>
-<td>95.304 (4.696)</td>
-<td>88.23</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_resnext101_64x4d</td>
-<td>80.602 (19.398)</td>
-<td>94.994 (5.006)</td>
-<td>83.46</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_el</td>
-<td>80.534 (19.466)</td>
-<td>95.190 (4.810)</td>
-<td>10.59</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_el *tfp</td>
-<td>80.476 (19.524)</td>
-<td>95.200 (4.800)</td>
-<td>10.59</td>
-<td>bicubic</td>
-<td>300</td>
-</tr>
-<tr>
-<td>gluon_resnet152_v1d</td>
-<td>80.470 (19.530)</td>
-<td>95.206 (4.794)</td>
-<td>60.21</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_resnet101_v1d</td>
-<td>80.424 (19.576)</td>
-<td>95.020 (4.980)</td>
-<td>44.57</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b2_ap *tfp</td>
-<td>80.420 (19.580)</td>
-<td>95.040 (4.960)</td>
-<td>9.11</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>gluon_resnext101_32x4d</td>
-<td>80.334 (19.666)</td>
-<td>94.926 (5.074)</td>
-<td>44.18</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b2_ap</td>
-<td>80.306 (19.694)</td>
-<td>95.028 (4.972)</td>
-<td>9.11</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>gluon_resnet101_v1s</td>
-<td>80.300 (19.700)</td>
-<td>95.150 (4.850)</td>
-<td>44.67</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b2 *tfp</td>
-<td>80.188 (19.812)</td>
-<td>94.974 (5.026)</td>
-<td>9.11</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b2</td>
-<td>80.086 (19.914)</td>
-<td>94.908 (5.092)</td>
-<td>9.11</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>gluon_resnet152_v1c</td>
-<td>79.916 (20.084)</td>
-<td>94.842 (5.158)</td>
-<td>60.21</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_seresnext50_32x4d</td>
-<td>79.912 (20.088)</td>
-<td>94.818 (5.182)</td>
-<td>27.56</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite3</td>
-<td>79.812 (20.188)</td>
-<td>94.914 (5.086)</td>
-<td>8.20</td>
-<td>bilinear</td>
-<td>300</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite3 *tfp</td>
-<td>79.734 (20.266)</td>
-<td>94.838 (5.162)</td>
-<td>8.20</td>
-<td>bilinear</td>
-<td>300</td>
-</tr>
-<tr>
-<td>gluon_resnet152_v1b</td>
-<td>79.692 (20.308)</td>
-<td>94.738 (5.262)</td>
-<td>60.19</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_xception65</td>
-<td>79.604 (20.396)</td>
-<td>94.748 (5.252)</td>
-<td>39.92</td>
-<td>bicubic</td>
-<td>299</td>
-</tr>
-<tr>
-<td>gluon_resnet101_v1c</td>
-<td>79.544 (20.456)</td>
-<td>94.586 (5.414)</td>
-<td>44.57</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b1_ap *tfp</td>
-<td>79.532 (20.468)</td>
-<td>94.378 (5.622)</td>
-<td>7.79</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_efficientnet_cc_b1_8e *tfp</td>
-<td>79.464 (20.536)</td>
-<td>94.492 (5.508)</td>
-<td>39.7</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>gluon_resnext50_32x4d</td>
-<td>79.356 (20.644)</td>
-<td>94.424 (5.576)</td>
-<td>25.03</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_resnet101_v1b</td>
-<td>79.304 (20.696)</td>
-<td>94.524 (5.476)</td>
-<td>44.55</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_cc_b1_8e</td>
-<td>79.298 (20.702)</td>
-<td>94.364 (5.636)</td>
-<td>39.7</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b1_ap</td>
-<td>79.278 (20.722)</td>
-<td>94.308 (5.692)</td>
-<td>7.79</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b1 *tfp</td>
-<td>79.172 (20.828)</td>
-<td>94.450 (5.550)</td>
-<td>7.79</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>gluon_resnet50_v1d</td>
-<td>79.074 (20.926)</td>
-<td>94.476 (5.524)</td>
-<td>25.58</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_em *tfp</td>
-<td>78.958 (21.042)</td>
-<td>94.458 (5.542)</td>
-<td>6.90</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_mixnet_l *tfp</td>
-<td>78.846 (21.154)</td>
-<td>94.212 (5.788)</td>
-<td>7.33</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b1</td>
-<td>78.826 (21.174)</td>
-<td>94.198 (5.802)</td>
-<td>7.79</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b0_ns *tfp</td>
-<td>78.806 (21.194)</td>
-<td>94.496 (5.504)</td>
-<td>5.29</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_inception_v3</td>
-<td>78.804 (21.196)</td>
-<td>94.380 (5.620)</td>
-<td>27.16M</td>
-<td>bicubic</td>
-<td>299</td>
-</tr>
-<tr>
-<td>tf_mixnet_l</td>
-<td>78.770 (21.230)</td>
-<td>94.004 (5.996)</td>
-<td>7.33</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_em</td>
-<td>78.742 (21.258)</td>
-<td>94.332 (5.668)</td>
-<td>6.90</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>gluon_resnet50_v1s</td>
-<td>78.712 (21.288)</td>
-<td>94.242 (5.758)</td>
-<td>25.68</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b0_ns</td>
-<td>78.658 (21.342)</td>
-<td>94.376 (5.624)</td>
-<td>5.29</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_cc_b0_8e *tfp</td>
-<td>78.314 (21.686)</td>
-<td>93.790 (6.210)</td>
-<td>24.0</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_resnet50_v1c</td>
-<td>78.010 (21.990)</td>
-<td>93.988 (6.012)</td>
-<td>25.58</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_cc_b0_8e</td>
-<td>77.908 (22.092)</td>
-<td>93.656 (6.344)</td>
-<td>24.0</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_inception_v3</td>
-<td>77.856 (22.144)</td>
-<td>93.644 (6.356)</td>
-<td>27.16M</td>
-<td>bicubic</td>
-<td>299</td>
-</tr>
-<tr>
-<td>tf_efficientnet_cc_b0_4e *tfp</td>
-<td>77.746 (22.254)</td>
-<td>93.552 (6.448)</td>
-<td>13.3</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_es *tfp</td>
-<td>77.616 (22.384)</td>
-<td>93.750 (6.250)</td>
-<td>5.44</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_resnet50_v1b</td>
-<td>77.578 (22.422)</td>
-<td>93.718 (6.282)</td>
-<td>25.56</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>adv_inception_v3</td>
-<td>77.576 (22.424)</td>
-<td>93.724 (6.276)</td>
-<td>27.16M</td>
-<td>bicubic</td>
-<td>299</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite2 *tfp</td>
-<td>77.544 (22.456)</td>
-<td>93.800 (6.200)</td>
-<td>6.09</td>
-<td>bilinear</td>
-<td>260</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite2</td>
-<td>77.460 (22.540)</td>
-<td>93.746 (6.254)</td>
-<td>6.09</td>
-<td>bicubic</td>
-<td>260</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b0_ap *tfp</td>
-<td>77.514 (22.486)</td>
-<td>93.576 (6.424)</td>
-<td>5.29</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_cc_b0_4e</td>
-<td>77.304 (22.696)</td>
-<td>93.332 (6.668)</td>
-<td>13.3</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_es</td>
-<td>77.264 (22.736)</td>
-<td>93.600 (6.400)</td>
-<td>5.44</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b0 *tfp</td>
-<td>77.258 (22.742)</td>
-<td>93.478 (6.522)</td>
-<td>5.29</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b0_ap</td>
-<td>77.084 (22.916)</td>
-<td>93.254 (6.746)</td>
-<td>5.29</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mixnet_m *tfp</td>
-<td>77.072 (22.928)</td>
-<td>93.368 (6.632)</td>
-<td>5.01</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mixnet_m</td>
-<td>76.950 (23.050)</td>
-<td>93.156 (6.844)</td>
-<td>5.01</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_b0</td>
-<td>76.848 (23.152)</td>
-<td>93.228 (6.772)</td>
-<td>5.29</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite1 *tfp</td>
-<td>76.764 (23.236)</td>
-<td>93.326 (6.674)</td>
-<td>5.42</td>
-<td>bilinear</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite1</td>
-<td>76.638 (23.362)</td>
-<td>93.232 (6.768)</td>
-<td>5.42</td>
-<td>bicubic</td>
-<td>240</td>
-</tr>
-<tr>
-<td>tf_mixnet_s *tfp</td>
-<td>75.800 (24.200)</td>
-<td>92.788 (7.212)</td>
-<td>4.13</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_large_100 *tfp</td>
-<td>75.768 (24.232)</td>
-<td>92.710 (7.290)</td>
-<td>5.48</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mixnet_s</td>
-<td>75.648 (24.352)</td>
-<td>92.636 (7.364)</td>
-<td>4.13</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_large_100</td>
-<td>75.516 (24.484)</td>
-<td>92.600 (7.400)</td>
-<td>5.48</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite0 *tfp</td>
-<td>75.074 (24.926)</td>
-<td>92.314 (7.686)</td>
-<td>4.65</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_efficientnet_lite0</td>
-<td>74.842 (25.158)</td>
-<td>92.170 (7.830)</td>
-<td>4.65</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>gluon_resnet34_v1b</td>
-<td>74.580 (25.420)</td>
-<td>91.988 (8.012)</td>
-<td>21.80</td>
-<td>bicubic</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_large_075 *tfp</td>
-<td>73.730 (26.270)</td>
-<td>91.616 (8.384)</td>
-<td>3.99</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_large_075</td>
-<td>73.442 (26.558)</td>
-<td>91.352 (8.648)</td>
-<td>3.99</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_large_minimal_100 *tfp</td>
-<td>72.678 (27.322)</td>
-<td>90.860 (9.140)</td>
-<td>3.92</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_large_minimal_100</td>
-<td>72.244 (27.756)</td>
-<td>90.636 (9.364)</td>
-<td>3.92</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_small_100 *tfp</td>
-<td>67.918 (32.082)</td>
-<td>87.958 (12.042</td>
-<td>2.54</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_small_100</td>
-<td>67.918 (32.082)</td>
-<td>87.662 (12.338)</td>
-<td>2.54</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_small_075 *tfp</td>
-<td>66.142 (33.858)</td>
-<td>86.498 (13.502)</td>
-<td>2.04</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_small_075</td>
-<td>65.718 (34.282)</td>
-<td>86.136 (13.864)</td>
-<td>2.04</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_small_minimal_100 *tfp</td>
-<td>63.378 (36.622)</td>
-<td>84.802 (15.198)</td>
-<td>2.04</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-<tr>
-<td>tf_mobilenetv3_small_minimal_100</td>
-<td>62.898 (37.102)</td>
-<td>84.230 (15.770)</td>
-<td>2.04</td>
-<td>bilinear</td>
-<td>224</td>
-</tr>
-</tbody>
-</table>
                 
               
               
@@ -2026,13 +913,13 @@ creation fn for the model you'd like.</p>
           </a>
         
         
-          <a href="../changes/" title="Changes" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
+          <a href="../results/" title="Results" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
             <div class="md-footer-nav__title">
               <div class="md-ellipsis">
                 <span class="md-footer-nav__direction">
                   Next
                 </span>
-                Changes
+                Results
               </div>
             </div>
             <div class="md-footer-nav__button md-icon">
@@ -2074,6 +961,10 @@ creation fn for the model you'd like.</p>
       
         <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
       
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="../javascripts/tables.js"></script>
+      
     
   </body>
 </html>
\ No newline at end of file
diff --git a/results/index.html b/results/index.html
new file mode 100644
index 00000000..860b2440
--- /dev/null
+++ b/results/index.html
@@ -0,0 +1,1797 @@
+
+
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="Pretained Image Recognition Models">
+      
+      
+      
+      <link rel="shortcut icon" href="../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.1.2, mkdocs-material-5.4.0">
+    
+    
+      
+        <title>Results - Pytorch Image Models</title>
+      
+    
+    
+      <link rel="stylesheet" href="../assets/stylesheets/main.fe0cca5b.min.css">
+      
+      
+    
+    
+    
+      
+        <link href="https://fonts.gstatic.com" rel="preconnect" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,400i,700%7CRoboto+Mono&display=fallback">
+        <style>body,input{font-family:"Roboto",-apple-system,BlinkMacSystemFont,Helvetica,Arial,sans-serif}code,kbd,pre{font-family:"Roboto Mono",SFMono-Regular,Consolas,Menlo,monospace}</style>
+      
+    
+    
+    
+    
+      
+    
+    
+  </head>
+  
+  
+    <body dir="ltr">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#results" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+      <header class="md-header" data-md-component="header">
+  <nav class="md-header-nav md-grid" aria-label="Header">
+    <a href=".." title="Pytorch Image Models" class="md-header-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    <label class="md-header-nav__button md-icon" for="__drawer">
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2z"/></svg>
+    </label>
+    <div class="md-header-nav__title" data-md-component="header-title">
+      
+        <div class="md-header-nav__ellipsis">
+          <span class="md-header-nav__topic md-ellipsis">
+            Pytorch Image Models
+          </span>
+          <span class="md-header-nav__topic md-ellipsis">
+            
+              Results
+            
+          </span>
+        </div>
+      
+    </div>
+    
+      <label class="md-header-nav__button md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+      </label>
+      
+<div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" data-md-state="active">
+      <label class="md-search__icon md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </label>
+      <button type="reset" class="md-search__icon md-icon" aria-label="Clear" data-md-component="search-reset" tabindex="-1">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"/></svg>
+      </button>
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header-nav__source">
+        
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+      </div>
+    
+  </nav>
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+        
+      
+      
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href=".." title="Pytorch Image Models" class="md-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    Pytorch Image Models
+  </label>
+  
+    <div class="md-nav__source">
+      
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href=".." title="Getting Started" class="md-nav__link">
+      Getting Started
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+  
+
+
+  <li class="md-nav__item md-nav__item--active">
+    
+    <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
+    
+      
+    
+    
+      <label class="md-nav__link md-nav__link--active" for="__toc">
+        Results
+        <span class="md-nav__icon md-icon">
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 9h14V7H3v2m0 4h14v-2H3v2m0 4h14v-2H3v2m16 0h2v-2h-2v2m0-10v2h2V7h-2m0 6h2v-2h-2v2z"/></svg>
+        </span>
+      </label>
+    
+    <a href="./" title="Results" class="md-nav__link md-nav__link--active">
+      Results
+    </a>
+    
+      
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#self-trained-weights" class="md-nav__link">
+    Self-trained Weights
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#ported-weights" class="md-nav__link">
+    Ported Weights
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+    
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
+    </a>
+  </li>
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="toc">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#self-trained-weights" class="md-nav__link">
+    Self-trained Weights
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#ported-weights" class="md-nav__link">
+    Ported Weights
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          <div class="md-content">
+            <article class="md-content__inner md-typeset">
+              
+                
+                  <a href="https://github.com/rwightman/pytorch-image-models/edit/master/docs/results.md" title="Edit this page" class="md-content__button md-icon">
+                    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z"/></svg>
+                  </a>
+                
+                
+                  
+                
+                
+                <h1 id="results">Results</h1>
+<p>CSV files containing an ImageNet-1K validation and OOD test set validation results for all included models with pretrained weights and default configurations is located <a href="https://github.com/rwightman/pytorch-image-models/tree/master/results">here</a>.</p>
+<h2 id="self-trained-weights">Self-trained Weights</h2>
+<p>I've leveraged the training scripts in this repository to train a few of the models with to good levels of performance.</p>
+<table>
+<thead>
+<tr>
+<th>Model</th>
+<th>Acc@1 (Err)</th>
+<th>Acc@5 (Err)</th>
+<th>Param # (M)</th>
+<th>Interpolation</th>
+<th>Image Size</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>efficientnet_b3a</td>
+<td>81.874 (18.126)</td>
+<td>95.840 (4.160)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>320 (1.0 crop)</td>
+</tr>
+<tr>
+<td>efficientnet_b3</td>
+<td>81.498 (18.502)</td>
+<td>95.718 (4.282)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>skresnext50d_32x4d</td>
+<td>81.278 (18.722)</td>
+<td>95.366 (4.634)</td>
+<td>27.5</td>
+<td>bicubic</td>
+<td>288 (1.0 crop)</td>
+</tr>
+<tr>
+<td>efficientnet_b2a</td>
+<td>80.608 (19.392)</td>
+<td>95.310 (4.690)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>288 (1.0 crop)</td>
+</tr>
+<tr>
+<td>mixnet_xl</td>
+<td>80.478 (19.522)</td>
+<td>94.932 (5.068)</td>
+<td>11.90</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>efficientnet_b2</td>
+<td>80.402 (19.598)</td>
+<td>95.076 (4.924)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>skresnext50d_32x4d</td>
+<td>80.156 (19.844)</td>
+<td>94.642 (5.358)</td>
+<td>27.5</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>resnext50_32x4d</td>
+<td>79.762 (20.238)</td>
+<td>94.600 (5.400)</td>
+<td>25</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>resnext50d_32x4d</td>
+<td>79.674 (20.326)</td>
+<td>94.868 (5.132)</td>
+<td>25.1</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>ese_vovnet39b</td>
+<td>79.320 (20.680)</td>
+<td>94.710 (5.290)</td>
+<td>24.6</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>resnetblur50</td>
+<td>79.290 (20.710)</td>
+<td>94.632 (5.368)</td>
+<td>25.6</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>resnet50</td>
+<td>79.038 (20.962)</td>
+<td>94.390 (5.610)</td>
+<td>25.6</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mixnet_l</td>
+<td>78.976 (21.024</td>
+<td>94.184 (5.816)</td>
+<td>7.33</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>efficientnet_b1</td>
+<td>78.692 (21.308)</td>
+<td>94.086 (5.914)</td>
+<td>7.79</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>efficientnet_es</td>
+<td>78.066 (21.934)</td>
+<td>93.926 (6.074)</td>
+<td>5.44</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>seresnext26t_32x4d</td>
+<td>77.998 (22.002)</td>
+<td>93.708 (6.292)</td>
+<td>16.8</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>seresnext26tn_32x4d</td>
+<td>77.986 (22.014)</td>
+<td>93.746 (6.254)</td>
+<td>16.8</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>efficientnet_b0</td>
+<td>77.698 (22.302)</td>
+<td>93.532 (6.468)</td>
+<td>5.29</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>seresnext26d_32x4d</td>
+<td>77.602 (22.398)</td>
+<td>93.608 (6.392)</td>
+<td>16.8</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mobilenetv2_120d</td>
+<td>77.294 (22.706</td>
+<td>93.502 (6.498)</td>
+<td>5.8</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mixnet_m</td>
+<td>77.256 (22.744)</td>
+<td>93.418 (6.582)</td>
+<td>5.01</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>seresnext26_32x4d</td>
+<td>77.104 (22.896)</td>
+<td>93.316 (6.684)</td>
+<td>16.8</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>skresnet34</td>
+<td>76.912 (23.088)</td>
+<td>93.322 (6.678)</td>
+<td>22.2</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>ese_vovnet19b_dw</td>
+<td>76.798 (23.202)</td>
+<td>93.268 (6.732)</td>
+<td>6.5</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>resnet26d</td>
+<td>76.68 (23.32)</td>
+<td>93.166 (6.834)</td>
+<td>16</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>densenetblur121d</td>
+<td>76.576 (23.424)</td>
+<td>93.190 (6.810)</td>
+<td>8.0</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mobilenetv2_140</td>
+<td>76.524 (23.476)</td>
+<td>92.990 (7.010)</td>
+<td>6.1</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mixnet_s</td>
+<td>75.988 (24.012)</td>
+<td>92.794 (7.206)</td>
+<td>4.13</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mobilenetv3_large_100</td>
+<td>75.766 (24.234)</td>
+<td>92.542 (7.458)</td>
+<td>5.5</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mobilenetv3_rw</td>
+<td>75.634 (24.366)</td>
+<td>92.708 (7.292)</td>
+<td>5.5</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mnasnet_a1</td>
+<td>75.448 (24.552)</td>
+<td>92.604 (7.396)</td>
+<td>3.89</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>resnet26</td>
+<td>75.292 (24.708)</td>
+<td>92.57 (7.43)</td>
+<td>16</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>fbnetc_100</td>
+<td>75.124 (24.876)</td>
+<td>92.386 (7.614)</td>
+<td>5.6</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>resnet34</td>
+<td>75.110 (24.890)</td>
+<td>92.284 (7.716)</td>
+<td>22</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mobilenetv2_110d</td>
+<td>75.052 (24.948)</td>
+<td>92.180 (7.820)</td>
+<td>4.5</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>seresnet34</td>
+<td>74.808 (25.192)</td>
+<td>92.124 (7.876)</td>
+<td>22</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mnasnet_b1</td>
+<td>74.658 (25.342)</td>
+<td>92.114 (7.886)</td>
+<td>4.38</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>spnasnet_100</td>
+<td>74.084 (25.916)</td>
+<td>91.818 (8.182)</td>
+<td>4.42</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>skresnet18</td>
+<td>73.038 (26.962)</td>
+<td>91.168 (8.832)</td>
+<td>11.9</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>mobilenetv2_100</td>
+<td>72.978 (27.022)</td>
+<td>91.016 (8.984)</td>
+<td>3.5</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>seresnet18</td>
+<td>71.742 (28.258)</td>
+<td>90.334 (9.666)</td>
+<td>11.8</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+</tbody>
+</table>
+<h2 id="ported-weights">Ported Weights</h2>
+<p>For the models below, the model code and weight porting from Tensorflow or MXNet Gluon to Pytorch was done by myself. There are weights/models ported by others included in this repository, they are not listed below.</p>
+<table>
+<thead>
+<tr>
+<th>Model</th>
+<th>Acc@1 (Err)</th>
+<th>Acc@5 (Err)</th>
+<th>Param # (M)</th>
+<th>Interpolation</th>
+<th>Image Size</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>tf_efficientnet_l2_ns *tfp</td>
+<td>88.352 (11.648)</td>
+<td>98.652 (1.348)</td>
+<td>480</td>
+<td>bicubic</td>
+<td>800</td>
+</tr>
+<tr>
+<td>tf_efficientnet_l2_ns</td>
+<td>TBD</td>
+<td>TBD</td>
+<td>480</td>
+<td>bicubic</td>
+<td>800</td>
+</tr>
+<tr>
+<td>tf_efficientnet_l2_ns_475</td>
+<td>88.234 (11.766)</td>
+<td>98.546 (1.454)f</td>
+<td>480</td>
+<td>bicubic</td>
+<td>475</td>
+</tr>
+<tr>
+<td>tf_efficientnet_l2_ns_475 *tfp</td>
+<td>88.172 (11.828)</td>
+<td>98.566 (1.434)</td>
+<td>480</td>
+<td>bicubic</td>
+<td>475</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b7_ns *tfp</td>
+<td>86.844 (13.156)</td>
+<td>98.084 (1.916)</td>
+<td>66.35</td>
+<td>bicubic</td>
+<td>600</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b7_ns</td>
+<td>86.840 (13.160)</td>
+<td>98.094 (1.906)</td>
+<td>66.35</td>
+<td>bicubic</td>
+<td>600</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b6_ns</td>
+<td>86.452 (13.548)</td>
+<td>97.882 (2.118)</td>
+<td>43.04</td>
+<td>bicubic</td>
+<td>528</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b6_ns *tfp</td>
+<td>86.444 (13.556)</td>
+<td>97.880 (2.120)</td>
+<td>43.04</td>
+<td>bicubic</td>
+<td>528</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b5_ns *tfp</td>
+<td>86.064 (13.936)</td>
+<td>97.746 (2.254)</td>
+<td>30.39</td>
+<td>bicubic</td>
+<td>456</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b5_ns</td>
+<td>86.088 (13.912)</td>
+<td>97.752 (2.248)</td>
+<td>30.39</td>
+<td>bicubic</td>
+<td>456</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b8_ap *tfp</td>
+<td>85.436 (14.564)</td>
+<td>97.272 (2.728)</td>
+<td>87.4</td>
+<td>bicubic</td>
+<td>672</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b8 *tfp</td>
+<td>85.384 (14.616)</td>
+<td>97.394 (2.606)</td>
+<td>87.4</td>
+<td>bicubic</td>
+<td>672</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b8</td>
+<td>85.370 (14.630)</td>
+<td>97.390 (2.610)</td>
+<td>87.4</td>
+<td>bicubic</td>
+<td>672</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b8_ap</td>
+<td>85.368 (14.632)</td>
+<td>97.294 (2.706)</td>
+<td>87.4</td>
+<td>bicubic</td>
+<td>672</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b4_ns *tfp</td>
+<td>85.298 (14.702)</td>
+<td>97.504 (2.496)</td>
+<td>19.34</td>
+<td>bicubic</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b4_ns</td>
+<td>85.162 (14.838)</td>
+<td>97.470 (2.530)</td>
+<td>19.34</td>
+<td>bicubic</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b7_ap *tfp</td>
+<td>85.154 (14.846)</td>
+<td>97.244 (2.756)</td>
+<td>66.35</td>
+<td>bicubic</td>
+<td>600</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b7_ap</td>
+<td>85.118 (14.882)</td>
+<td>97.252 (2.748)</td>
+<td>66.35</td>
+<td>bicubic</td>
+<td>600</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b7 *tfp</td>
+<td>84.940 (15.060)</td>
+<td>97.214 (2.786)</td>
+<td>66.35</td>
+<td>bicubic</td>
+<td>600</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b7</td>
+<td>84.932 (15.068)</td>
+<td>97.208 (2.792)</td>
+<td>66.35</td>
+<td>bicubic</td>
+<td>600</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b6_ap</td>
+<td>84.786 (15.214)</td>
+<td>97.138 (2.862)</td>
+<td>43.04</td>
+<td>bicubic</td>
+<td>528</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b6_ap *tfp</td>
+<td>84.760 (15.240)</td>
+<td>97.124 (2.876)</td>
+<td>43.04</td>
+<td>bicubic</td>
+<td>528</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b5_ap *tfp</td>
+<td>84.276 (15.724)</td>
+<td>96.932 (3.068)</td>
+<td>30.39</td>
+<td>bicubic</td>
+<td>456</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b5_ap</td>
+<td>84.254 (15.746)</td>
+<td>96.976 (3.024)</td>
+<td>30.39</td>
+<td>bicubic</td>
+<td>456</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b6 *tfp</td>
+<td>84.140 (15.860)</td>
+<td>96.852 (3.148)</td>
+<td>43.04</td>
+<td>bicubic</td>
+<td>528</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b6</td>
+<td>84.110 (15.890)</td>
+<td>96.886 (3.114)</td>
+<td>43.04</td>
+<td>bicubic</td>
+<td>528</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b3_ns *tfp</td>
+<td>84.054 (15.946)</td>
+<td>96.918 (3.082)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b3_ns</td>
+<td>84.048 (15.952)</td>
+<td>96.910 (3.090)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b5 *tfp</td>
+<td>83.822 (16.178)</td>
+<td>96.756 (3.244)</td>
+<td>30.39</td>
+<td>bicubic</td>
+<td>456</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b5</td>
+<td>83.812 (16.188)</td>
+<td>96.748 (3.252)</td>
+<td>30.39</td>
+<td>bicubic</td>
+<td>456</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b4_ap *tfp</td>
+<td>83.278 (16.722)</td>
+<td>96.376 (3.624)</td>
+<td>19.34</td>
+<td>bicubic</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b4_ap</td>
+<td>83.248 (16.752)</td>
+<td>96.388 (3.612)</td>
+<td>19.34</td>
+<td>bicubic</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b4</td>
+<td>83.022 (16.978)</td>
+<td>96.300 (3.700)</td>
+<td>19.34</td>
+<td>bicubic</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b4 *tfp</td>
+<td>82.948 (17.052)</td>
+<td>96.308 (3.692)</td>
+<td>19.34</td>
+<td>bicubic</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b2_ns *tfp</td>
+<td>82.436 (17.564)</td>
+<td>96.268 (3.732)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b2_ns</td>
+<td>82.380 (17.620)</td>
+<td>96.248 (3.752)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b3_ap *tfp</td>
+<td>81.882 (18.118)</td>
+<td>95.662 (4.338)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b3_ap</td>
+<td>81.828 (18.172)</td>
+<td>95.624 (4.376)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b3</td>
+<td>81.636 (18.364)</td>
+<td>95.718 (4.282)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b3 *tfp</td>
+<td>81.576 (18.424)</td>
+<td>95.662 (4.338)</td>
+<td>12.23</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite4</td>
+<td>81.528 (18.472)</td>
+<td>95.668 (4.332)</td>
+<td>13.00</td>
+<td>bilinear</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b1_ns *tfp</td>
+<td>81.514 (18.486)</td>
+<td>95.776 (4.224)</td>
+<td>7.79</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite4 *tfp</td>
+<td>81.502 (18.498)</td>
+<td>95.676 (4.324)</td>
+<td>13.00</td>
+<td>bilinear</td>
+<td>380</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b1_ns</td>
+<td>81.388 (18.612)</td>
+<td>95.738 (4.262)</td>
+<td>7.79</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>gluon_senet154</td>
+<td>81.224 (18.776)</td>
+<td>95.356 (4.644)</td>
+<td>115.09</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_resnet152_v1s</td>
+<td>81.012 (18.988)</td>
+<td>95.416 (4.584)</td>
+<td>60.32</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_seresnext101_32x4d</td>
+<td>80.902 (19.098)</td>
+<td>95.294 (4.706)</td>
+<td>48.96</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_seresnext101_64x4d</td>
+<td>80.890 (19.110)</td>
+<td>95.304 (4.696)</td>
+<td>88.23</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_resnext101_64x4d</td>
+<td>80.602 (19.398)</td>
+<td>94.994 (5.006)</td>
+<td>83.46</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_el</td>
+<td>80.534 (19.466)</td>
+<td>95.190 (4.810)</td>
+<td>10.59</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_el *tfp</td>
+<td>80.476 (19.524)</td>
+<td>95.200 (4.800)</td>
+<td>10.59</td>
+<td>bicubic</td>
+<td>300</td>
+</tr>
+<tr>
+<td>gluon_resnet152_v1d</td>
+<td>80.470 (19.530)</td>
+<td>95.206 (4.794)</td>
+<td>60.21</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_resnet101_v1d</td>
+<td>80.424 (19.576)</td>
+<td>95.020 (4.980)</td>
+<td>44.57</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b2_ap *tfp</td>
+<td>80.420 (19.580)</td>
+<td>95.040 (4.960)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>gluon_resnext101_32x4d</td>
+<td>80.334 (19.666)</td>
+<td>94.926 (5.074)</td>
+<td>44.18</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b2_ap</td>
+<td>80.306 (19.694)</td>
+<td>95.028 (4.972)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>gluon_resnet101_v1s</td>
+<td>80.300 (19.700)</td>
+<td>95.150 (4.850)</td>
+<td>44.67</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b2 *tfp</td>
+<td>80.188 (19.812)</td>
+<td>94.974 (5.026)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b2</td>
+<td>80.086 (19.914)</td>
+<td>94.908 (5.092)</td>
+<td>9.11</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>gluon_resnet152_v1c</td>
+<td>79.916 (20.084)</td>
+<td>94.842 (5.158)</td>
+<td>60.21</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_seresnext50_32x4d</td>
+<td>79.912 (20.088)</td>
+<td>94.818 (5.182)</td>
+<td>27.56</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite3</td>
+<td>79.812 (20.188)</td>
+<td>94.914 (5.086)</td>
+<td>8.20</td>
+<td>bilinear</td>
+<td>300</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite3 *tfp</td>
+<td>79.734 (20.266)</td>
+<td>94.838 (5.162)</td>
+<td>8.20</td>
+<td>bilinear</td>
+<td>300</td>
+</tr>
+<tr>
+<td>gluon_resnet152_v1b</td>
+<td>79.692 (20.308)</td>
+<td>94.738 (5.262)</td>
+<td>60.19</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_xception65</td>
+<td>79.604 (20.396)</td>
+<td>94.748 (5.252)</td>
+<td>39.92</td>
+<td>bicubic</td>
+<td>299</td>
+</tr>
+<tr>
+<td>gluon_resnet101_v1c</td>
+<td>79.544 (20.456)</td>
+<td>94.586 (5.414)</td>
+<td>44.57</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b1_ap *tfp</td>
+<td>79.532 (20.468)</td>
+<td>94.378 (5.622)</td>
+<td>7.79</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_efficientnet_cc_b1_8e *tfp</td>
+<td>79.464 (20.536)</td>
+<td>94.492 (5.508)</td>
+<td>39.7</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>gluon_resnext50_32x4d</td>
+<td>79.356 (20.644)</td>
+<td>94.424 (5.576)</td>
+<td>25.03</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_resnet101_v1b</td>
+<td>79.304 (20.696)</td>
+<td>94.524 (5.476)</td>
+<td>44.55</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_cc_b1_8e</td>
+<td>79.298 (20.702)</td>
+<td>94.364 (5.636)</td>
+<td>39.7</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b1_ap</td>
+<td>79.278 (20.722)</td>
+<td>94.308 (5.692)</td>
+<td>7.79</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b1 *tfp</td>
+<td>79.172 (20.828)</td>
+<td>94.450 (5.550)</td>
+<td>7.79</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>gluon_resnet50_v1d</td>
+<td>79.074 (20.926)</td>
+<td>94.476 (5.524)</td>
+<td>25.58</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_em *tfp</td>
+<td>78.958 (21.042)</td>
+<td>94.458 (5.542)</td>
+<td>6.90</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_mixnet_l *tfp</td>
+<td>78.846 (21.154)</td>
+<td>94.212 (5.788)</td>
+<td>7.33</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b1</td>
+<td>78.826 (21.174)</td>
+<td>94.198 (5.802)</td>
+<td>7.79</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b0_ns *tfp</td>
+<td>78.806 (21.194)</td>
+<td>94.496 (5.504)</td>
+<td>5.29</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_inception_v3</td>
+<td>78.804 (21.196)</td>
+<td>94.380 (5.620)</td>
+<td>27.16M</td>
+<td>bicubic</td>
+<td>299</td>
+</tr>
+<tr>
+<td>tf_mixnet_l</td>
+<td>78.770 (21.230)</td>
+<td>94.004 (5.996)</td>
+<td>7.33</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_em</td>
+<td>78.742 (21.258)</td>
+<td>94.332 (5.668)</td>
+<td>6.90</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>gluon_resnet50_v1s</td>
+<td>78.712 (21.288)</td>
+<td>94.242 (5.758)</td>
+<td>25.68</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b0_ns</td>
+<td>78.658 (21.342)</td>
+<td>94.376 (5.624)</td>
+<td>5.29</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_cc_b0_8e *tfp</td>
+<td>78.314 (21.686)</td>
+<td>93.790 (6.210)</td>
+<td>24.0</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_resnet50_v1c</td>
+<td>78.010 (21.990)</td>
+<td>93.988 (6.012)</td>
+<td>25.58</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_cc_b0_8e</td>
+<td>77.908 (22.092)</td>
+<td>93.656 (6.344)</td>
+<td>24.0</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_inception_v3</td>
+<td>77.856 (22.144)</td>
+<td>93.644 (6.356)</td>
+<td>27.16M</td>
+<td>bicubic</td>
+<td>299</td>
+</tr>
+<tr>
+<td>tf_efficientnet_cc_b0_4e *tfp</td>
+<td>77.746 (22.254)</td>
+<td>93.552 (6.448)</td>
+<td>13.3</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_es *tfp</td>
+<td>77.616 (22.384)</td>
+<td>93.750 (6.250)</td>
+<td>5.44</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_resnet50_v1b</td>
+<td>77.578 (22.422)</td>
+<td>93.718 (6.282)</td>
+<td>25.56</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>adv_inception_v3</td>
+<td>77.576 (22.424)</td>
+<td>93.724 (6.276)</td>
+<td>27.16M</td>
+<td>bicubic</td>
+<td>299</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite2 *tfp</td>
+<td>77.544 (22.456)</td>
+<td>93.800 (6.200)</td>
+<td>6.09</td>
+<td>bilinear</td>
+<td>260</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite2</td>
+<td>77.460 (22.540)</td>
+<td>93.746 (6.254)</td>
+<td>6.09</td>
+<td>bicubic</td>
+<td>260</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b0_ap *tfp</td>
+<td>77.514 (22.486)</td>
+<td>93.576 (6.424)</td>
+<td>5.29</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_cc_b0_4e</td>
+<td>77.304 (22.696)</td>
+<td>93.332 (6.668)</td>
+<td>13.3</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_es</td>
+<td>77.264 (22.736)</td>
+<td>93.600 (6.400)</td>
+<td>5.44</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b0 *tfp</td>
+<td>77.258 (22.742)</td>
+<td>93.478 (6.522)</td>
+<td>5.29</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b0_ap</td>
+<td>77.084 (22.916)</td>
+<td>93.254 (6.746)</td>
+<td>5.29</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mixnet_m *tfp</td>
+<td>77.072 (22.928)</td>
+<td>93.368 (6.632)</td>
+<td>5.01</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mixnet_m</td>
+<td>76.950 (23.050)</td>
+<td>93.156 (6.844)</td>
+<td>5.01</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_b0</td>
+<td>76.848 (23.152)</td>
+<td>93.228 (6.772)</td>
+<td>5.29</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite1 *tfp</td>
+<td>76.764 (23.236)</td>
+<td>93.326 (6.674)</td>
+<td>5.42</td>
+<td>bilinear</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite1</td>
+<td>76.638 (23.362)</td>
+<td>93.232 (6.768)</td>
+<td>5.42</td>
+<td>bicubic</td>
+<td>240</td>
+</tr>
+<tr>
+<td>tf_mixnet_s *tfp</td>
+<td>75.800 (24.200)</td>
+<td>92.788 (7.212)</td>
+<td>4.13</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_large_100 *tfp</td>
+<td>75.768 (24.232)</td>
+<td>92.710 (7.290)</td>
+<td>5.48</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mixnet_s</td>
+<td>75.648 (24.352)</td>
+<td>92.636 (7.364)</td>
+<td>4.13</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_large_100</td>
+<td>75.516 (24.484)</td>
+<td>92.600 (7.400)</td>
+<td>5.48</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite0 *tfp</td>
+<td>75.074 (24.926)</td>
+<td>92.314 (7.686)</td>
+<td>4.65</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_efficientnet_lite0</td>
+<td>74.842 (25.158)</td>
+<td>92.170 (7.830)</td>
+<td>4.65</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>gluon_resnet34_v1b</td>
+<td>74.580 (25.420)</td>
+<td>91.988 (8.012)</td>
+<td>21.80</td>
+<td>bicubic</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_large_075 *tfp</td>
+<td>73.730 (26.270)</td>
+<td>91.616 (8.384)</td>
+<td>3.99</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_large_075</td>
+<td>73.442 (26.558)</td>
+<td>91.352 (8.648)</td>
+<td>3.99</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_large_minimal_100 *tfp</td>
+<td>72.678 (27.322)</td>
+<td>90.860 (9.140)</td>
+<td>3.92</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_large_minimal_100</td>
+<td>72.244 (27.756)</td>
+<td>90.636 (9.364)</td>
+<td>3.92</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_small_100 *tfp</td>
+<td>67.918 (32.082)</td>
+<td>87.958 (12.042</td>
+<td>2.54</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_small_100</td>
+<td>67.918 (32.082)</td>
+<td>87.662 (12.338)</td>
+<td>2.54</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_small_075 *tfp</td>
+<td>66.142 (33.858)</td>
+<td>86.498 (13.502)</td>
+<td>2.04</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_small_075</td>
+<td>65.718 (34.282)</td>
+<td>86.136 (13.864)</td>
+<td>2.04</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_small_minimal_100 *tfp</td>
+<td>63.378 (36.622)</td>
+<td>84.802 (15.198)</td>
+<td>2.04</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+<tr>
+<td>tf_mobilenetv3_small_minimal_100</td>
+<td>62.898 (37.102)</td>
+<td>84.230 (15.770)</td>
+<td>2.04</td>
+<td>bilinear</td>
+<td>224</td>
+</tr>
+</tbody>
+</table>
+<p>Models with <code>*tfp</code> next to them were scored with <code>--tf-preprocessing</code> flag. </p>
+<p>The <code>tf_efficientnet</code>, <code>tf_mixnet</code> models require an equivalent for 'SAME' padding as their arch results in asymmetric padding. I've added this in the model creation wrapper, but it does come with a performance penalty. </p>
+<p>Sources for original weights:
+* <code>tf_efficientnet*</code>: <a href="https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet">Tensorflow TPU</a>
+* <code>tf_efficientnet_e*</code>: <a href="https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/edgetpu">Tensorflow TPU</a>
+* <code>tf_mixnet*</code>: <a href="https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet">Tensorflow TPU</a>
+* <code>tf_inception*</code>: <a href="https://github.com/tensorflow/models/tree/master/research/slim">Tensorflow Slim</a>
+* <code>gluon_*</code>: <a href="https://gluon-cv.mxnet.io/model_zoo/classification.html">MxNet Gluon</a></p>
+                
+              
+              
+                
+
+
+              
+            </article>
+          </div>
+        </div>
+      </main>
+      
+        
+<footer class="md-footer">
+  
+    <div class="md-footer-nav">
+      <nav class="md-footer-nav__inner md-grid" aria-label="Footer">
+        
+          <a href="../models/" title="Model Architectures" class="md-footer-nav__link md-footer-nav__link--prev" rel="prev">
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+            </div>
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Previous
+                </span>
+                Model Architectures
+              </div>
+            </div>
+          </a>
+        
+        
+          <a href="../scripts/" title="Scripts" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Next
+                </span>
+                Scripts
+              </div>
+            </div>
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4z"/></svg>
+            </div>
+          </a>
+        
+      </nav>
+    </div>
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-footer-copyright">
+        
+        Made with
+        <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+          Material for MkDocs
+        </a>
+      </div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    
+      <script src="../assets/javascripts/vendor.d710d30a.min.js"></script>
+      <script src="../assets/javascripts/bundle.b39636ac.min.js"></script><script id="__lang" type="application/json">{"clipboard.copy": "Copy to clipboard", "clipboard.copied": "Copied to clipboard", "search.config.lang": "en", "search.config.pipeline": "trimmer, stopWordFilter", "search.config.separator": "[\\s\\-]+", "search.result.placeholder": "Type to start searching", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents"}</script>
+      
+      <script>
+        app = initialize({
+          base: "..",
+          features: [],
+          search: Object.assign({
+            worker: "../assets/javascripts/worker/search.a68abb33.min.js"
+          }, typeof search !== "undefined" && search)
+        })
+      </script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="../javascripts/tables.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/scripts/index.html b/scripts/index.html
new file mode 100644
index 00000000..6665579c
--- /dev/null
+++ b/scripts/index.html
@@ -0,0 +1,482 @@
+
+
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="Pretained Image Recognition Models">
+      
+      
+      
+      <link rel="shortcut icon" href="../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.1.2, mkdocs-material-5.4.0">
+    
+    
+      
+        <title>Scripts - Pytorch Image Models</title>
+      
+    
+    
+      <link rel="stylesheet" href="../assets/stylesheets/main.fe0cca5b.min.css">
+      
+      
+    
+    
+    
+      
+        <link href="https://fonts.gstatic.com" rel="preconnect" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,400i,700%7CRoboto+Mono&display=fallback">
+        <style>body,input{font-family:"Roboto",-apple-system,BlinkMacSystemFont,Helvetica,Arial,sans-serif}code,kbd,pre{font-family:"Roboto Mono",SFMono-Regular,Consolas,Menlo,monospace}</style>
+      
+    
+    
+    
+    
+      
+    
+    
+  </head>
+  
+  
+    <body dir="ltr">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#scripts" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+      <header class="md-header" data-md-component="header">
+  <nav class="md-header-nav md-grid" aria-label="Header">
+    <a href=".." title="Pytorch Image Models" class="md-header-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    <label class="md-header-nav__button md-icon" for="__drawer">
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2z"/></svg>
+    </label>
+    <div class="md-header-nav__title" data-md-component="header-title">
+      
+        <div class="md-header-nav__ellipsis">
+          <span class="md-header-nav__topic md-ellipsis">
+            Pytorch Image Models
+          </span>
+          <span class="md-header-nav__topic md-ellipsis">
+            
+              Scripts
+            
+          </span>
+        </div>
+      
+    </div>
+    
+      <label class="md-header-nav__button md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+      </label>
+      
+<div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" data-md-state="active">
+      <label class="md-search__icon md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </label>
+      <button type="reset" class="md-search__icon md-icon" aria-label="Clear" data-md-component="search-reset" tabindex="-1">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"/></svg>
+      </button>
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header-nav__source">
+        
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+      </div>
+    
+  </nav>
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+        
+      
+      
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href=".." title="Pytorch Image Models" class="md-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    Pytorch Image Models
+  </label>
+  
+    <div class="md-nav__source">
+      
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href=".." title="Getting Started" class="md-nav__link">
+      Getting Started
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../results/" title="Results" class="md-nav__link">
+      Results
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+  
+
+
+  <li class="md-nav__item md-nav__item--active">
+    
+    <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
+    
+      
+    
+    
+      <label class="md-nav__link md-nav__link--active" for="__toc">
+        Scripts
+        <span class="md-nav__icon md-icon">
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 9h14V7H3v2m0 4h14v-2H3v2m0 4h14v-2H3v2m16 0h2v-2h-2v2m0-10v2h2V7h-2m0 6h2v-2h-2v2z"/></svg>
+        </span>
+      </label>
+    
+    <a href="./" title="Scripts" class="md-nav__link md-nav__link--active">
+      Scripts
+    </a>
+    
+      
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#training-script" class="md-nav__link">
+    Training Script
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#validation-inference-scripts" class="md-nav__link">
+    Validation / Inference Scripts
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+    
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../training_hparam_examples/" title="Training Examples" class="md-nav__link">
+      Training Examples
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
+    </a>
+  </li>
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="toc">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#training-script" class="md-nav__link">
+    Training Script
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#validation-inference-scripts" class="md-nav__link">
+    Validation / Inference Scripts
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          <div class="md-content">
+            <article class="md-content__inner md-typeset">
+              
+                
+                  <a href="https://github.com/rwightman/pytorch-image-models/edit/master/docs/scripts.md" title="Edit this page" class="md-content__button md-icon">
+                    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z"/></svg>
+                  </a>
+                
+                
+                  
+                
+                
+                <h1 id="scripts">Scripts</h1>
+<p>A train, validation, inference, and checkpoint cleaning script included in the github root folder. Scripts are not currently packaged in the pip release.</p>
+<p>The training and validation scripts evolved from early versions of the <a href="https://github.com/pytorch/examples">PyTorch Imagenet Examples</a>. I have added significant functionality over time, including CUDA specific performance enhancements based on
+<a href="https://github.com/NVIDIA/apex/tree/master/examples">NVIDIA's APEX Examples</a>.</p>
+<h2 id="training-script">Training Script</h2>
+<p>The variety of training args is large and not all combinations of options (or even options) have been fully tested. For the training dataset folder, specify the folder to the base that contains a <code>train</code> and <code>validation</code> folder.</p>
+<p>To train an SE-ResNet34 on ImageNet, locally distributed, 4 GPUs, one process per GPU w/ cosine schedule, random-erasing prob of 50% and per-pixel random value:</p>
+<p><code>./distributed_train.sh 4 /data/imagenet --model seresnet34 --sched cosine --epochs 150 --warmup-epochs 5 --lr 0.4 --reprob 0.5 --remode pixel --batch-size 256 -j 4</code></p>
+<p>NOTE: NVIDIA APEX should be installed to run in per-process distributed via DDP or to enable AMP mixed precision with the --amp flag</p>
+<h2 id="validation-inference-scripts">Validation / Inference Scripts</h2>
+<p>Validation and inference scripts are similar in usage. One outputs metrics on a validation set and the other outputs topk class ids in a csv. Specify the folder containing validation images, not the base as in training script. </p>
+<p>To validate with the model's pretrained weights (if they exist):</p>
+<p><code>python validate.py /imagenet/validation/ --model seresnext26_32x4d --pretrained</code></p>
+<p>To run inference from a checkpoint:</p>
+<p><code>python inference.py /imagenet/validation/ --model mobilenetv3_large_100 --checkpoint ./output/model_best.pth.tar</code></p>
+                
+              
+              
+                
+
+
+              
+            </article>
+          </div>
+        </div>
+      </main>
+      
+        
+<footer class="md-footer">
+  
+    <div class="md-footer-nav">
+      <nav class="md-footer-nav__inner md-grid" aria-label="Footer">
+        
+          <a href="../results/" title="Results" class="md-footer-nav__link md-footer-nav__link--prev" rel="prev">
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+            </div>
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Previous
+                </span>
+                Results
+              </div>
+            </div>
+          </a>
+        
+        
+          <a href="../training_hparam_examples/" title="Training Examples" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Next
+                </span>
+                Training Examples
+              </div>
+            </div>
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4z"/></svg>
+            </div>
+          </a>
+        
+      </nav>
+    </div>
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-footer-copyright">
+        
+        Made with
+        <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+          Material for MkDocs
+        </a>
+      </div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    
+      <script src="../assets/javascripts/vendor.d710d30a.min.js"></script>
+      <script src="../assets/javascripts/bundle.b39636ac.min.js"></script><script id="__lang" type="application/json">{"clipboard.copy": "Copy to clipboard", "clipboard.copied": "Copied to clipboard", "search.config.lang": "en", "search.config.pipeline": "trimmer, stopWordFilter", "search.config.separator": "[\\s\\-]+", "search.result.placeholder": "Type to start searching", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents"}</script>
+      
+      <script>
+        app = initialize({
+          base: "..",
+          features: [],
+          search: Object.assign({
+            worker: "../assets/javascripts/worker/search.a68abb33.min.js"
+          }, typeof search !== "undefined" && search)
+        })
+      </script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="../javascripts/tables.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/search/search_index.json b/search/search_index.json
index 36e69a2a..78613254 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Getting Started Install The library can be installed with pip: pip install timm Conda Environment All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically Python 3.6.x and 3.7.x. To install timm in a conda environment: conda create -n torch-env conda activate torch-env conda install -c pytorch pytorch torchvision cudatoolkit=10.1 conda install pyyaml pip install timm Load Pretrained Model Pretrained models can be loaded using timm.create_model import timm m = timm . create_model ( 'mobilenetv3_100' , pretrained = True ) m . eval () To load a different model see the list of pretrained weights .","title":"Getting Started"},{"location":"#getting-started","text":"","title":"Getting Started"},{"location":"#install","text":"The library can be installed with pip: pip install timm Conda Environment All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically Python 3.6.x and 3.7.x. To install timm in a conda environment: conda create -n torch-env conda activate torch-env conda install -c pytorch pytorch torchvision cudatoolkit=10.1 conda install pyyaml pip install timm","title":"Install"},{"location":"#load-pretrained-model","text":"Pretrained models can be loaded using timm.create_model import timm m = timm . create_model ( 'mobilenetv3_100' , pretrained = True ) m . eval () To load a different model see the list of pretrained weights .","title":"Load Pretrained Model"},{"location":"changes/","text":"June 11, 2020 Bunch of changes: DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1 Activation factory added along with new activations: select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export) hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish context mgr for setting exportable/scriptable/no_jit states Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call Prep for 0.1.28 pip release May 12, 2020 Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt , paper https://arxiv.org/abs/2004.08955 )) May 3, 2020 Pruned EfficientNet B1, B2, and B3 ( https://arxiv.org/abs/2002.08258 ) contributed by Yonathan Aflalo May 1, 2020 Merged a number of execellent contributions in the ResNet model family over the past month BlurPool2D and resnetblur models initiated by Chris Ha , I trained resnetblur50 to 79.3. TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by mrT23 ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per ( https://arxiv.org/abs/2002.08258 ) by Yonathan Aflalo 200 pretrained models in total now with updated results csv in results folder April 5, 2020 Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite 3.5M param MobileNet-V2 100 @ 73% 4.5M param MobileNet-V2 110d @ 75% 6.1M param MobileNet-V2 140 @ 76.5% 5.8M param MobileNet-V2 120d @ 77.3% March 18, 2020 Add EfficientNet-Lite models w/ weights ported from Tensorflow TPU Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by Andrew Lavin (see Training section for hparams) Feb 29, 2020 New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1 IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models overall results similar to a bit better training from scratch on a few smaller models tried performance early in training seems consistently improved but less difference by end set fix_group_fanout=False in _init_weight_goog fn if you need to reproducte past behaviour Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training Feb 18, 2020 Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' ( https://arxiv.org/abs/2001.06268 ): Move layer/module impl into layers subfolder/module of models and organize in a more granular fashion ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks Add Selective Kernel Nets on top of ResNet base, pretrained weights skresnet18 - 73% top-1 skresnet34 - 76.9% top-1 skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1 ECA and CECA (circular padding) attention layer contributed by Chris Ha CBAM attention experiment (not the best results so far, may remove) Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the .se position for all ResNets Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights Feb 12, 2020 Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from Tensorflow TPU","title":"Changes"},{"location":"changes/#june-11-2020","text":"Bunch of changes: DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1 Activation factory added along with new activations: select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export) hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish context mgr for setting exportable/scriptable/no_jit states Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call Prep for 0.1.28 pip release","title":"June 11, 2020"},{"location":"changes/#may-12-2020","text":"Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt , paper https://arxiv.org/abs/2004.08955 ))","title":"May 12, 2020"},{"location":"changes/#may-3-2020","text":"Pruned EfficientNet B1, B2, and B3 ( https://arxiv.org/abs/2002.08258 ) contributed by Yonathan Aflalo","title":"May 3, 2020"},{"location":"changes/#may-1-2020","text":"Merged a number of execellent contributions in the ResNet model family over the past month BlurPool2D and resnetblur models initiated by Chris Ha , I trained resnetblur50 to 79.3. TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by mrT23 ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per ( https://arxiv.org/abs/2002.08258 ) by Yonathan Aflalo 200 pretrained models in total now with updated results csv in results folder","title":"May 1, 2020"},{"location":"changes/#april-5-2020","text":"Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite 3.5M param MobileNet-V2 100 @ 73% 4.5M param MobileNet-V2 110d @ 75% 6.1M param MobileNet-V2 140 @ 76.5% 5.8M param MobileNet-V2 120d @ 77.3%","title":"April 5, 2020"},{"location":"changes/#march-18-2020","text":"Add EfficientNet-Lite models w/ weights ported from Tensorflow TPU Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by Andrew Lavin (see Training section for hparams)","title":"March 18, 2020"},{"location":"changes/#feb-29-2020","text":"New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1 IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models overall results similar to a bit better training from scratch on a few smaller models tried performance early in training seems consistently improved but less difference by end set fix_group_fanout=False in _init_weight_goog fn if you need to reproducte past behaviour Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training","title":"Feb 29, 2020"},{"location":"changes/#feb-18-2020","text":"Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' ( https://arxiv.org/abs/2001.06268 ): Move layer/module impl into layers subfolder/module of models and organize in a more granular fashion ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks Add Selective Kernel Nets on top of ResNet base, pretrained weights skresnet18 - 73% top-1 skresnet34 - 76.9% top-1 skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1 ECA and CECA (circular padding) attention layer contributed by Chris Ha CBAM attention experiment (not the best results so far, may remove) Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the .se position for all ResNets Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights","title":"Feb 18, 2020"},{"location":"changes/#feb-12-2020","text":"Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from Tensorflow TPU","title":"Feb 12, 2020"},{"location":"models/","text":"Architectures ResNet / ResNeXt (from torchvision with mods by myself) ResNet-18, ResNet-34, ResNet-50, ResNet-101, ResNet-152, ResNeXt50 (32x4d), ResNeXt101 (32x4d and 64x4d) 'Bag of Tricks' / Gluon C, D, E, S variations ( https://arxiv.org/abs/1812.01187 ) Instagram trained / ImageNet tuned ResNeXt101-32x8d to 32x48d from from facebookresearch Res2Net ( https://github.com/gasvn/Res2Net , https://arxiv.org/abs/1904.01169 ) Selective Kernel (SK) Nets ( https://arxiv.org/abs/1903.06586 ) ResNeSt (code adapted from https://github.com/zhanghang1989/ResNeSt , paper https://arxiv.org/abs/2004.08955 ) DLA Original ( https://github.com/ucbdrive/dla , https://arxiv.org/abs/1707.06484 ) Res2Net ( https://github.com/gasvn/Res2Net , https://arxiv.org/abs/1904.01169 ) DenseNet (from torchvision ) DenseNet-121, DenseNet-169, DenseNet-201, DenseNet-161 Squeeze-and-Excitation ResNet/ResNeXt (from Cadene with some pretrained weight additions by myself) SENet-154, SE-ResNet-18, SE-ResNet-34, SE-ResNet-50, SE-ResNet-101, SE-ResNet-152, SE-ResNeXt-26 (32x4d), SE-ResNeXt50 (32x4d), SE-ResNeXt101 (32x4d) Inception-V3 (from torchvision ) Inception-ResNet-V2 and Inception-V4 (from Cadene ) Xception Original variant from Cadene MXNet Gluon 'modified aligned' Xception-65 and 71 models from Gluon ModelZoo PNasNet & NASNet-A (from Cadene ) DPN (from myself ) DPN-68, DPN-68b, DPN-92, DPN-98, DPN-131, DPN-107 EfficientNet (from my standalone GenEfficientNet ) - A generic model that implements many of the efficient models that utilize similar DepthwiseSeparable and InvertedResidual blocks EfficientNet NoisyStudent (B0-B7, L2) ( https://arxiv.org/abs/1911.04252 ) EfficientNet AdvProp (B0-B8) ( https://arxiv.org/abs/1911.09665 ) EfficientNet (B0-B7) ( https://arxiv.org/abs/1905.11946 ) EfficientNet-EdgeTPU (S, M, L) ( https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html ) MixNet ( https://arxiv.org/abs/1907.09595 ) MNASNet B1, A1 (Squeeze-Excite), and Small ( https://arxiv.org/abs/1807.11626 ) MobileNet-V2 ( https://arxiv.org/abs/1801.04381 ) FBNet-C ( https://arxiv.org/abs/1812.03443 ) Single-Path NAS ( https://arxiv.org/abs/1904.02877 ) MobileNet-V3 ( https://arxiv.org/abs/1905.02244 ) HRNet code from https://github.com/HRNet/HRNet-Image-Classification , paper https://arxiv.org/abs/1908.07919 SelecSLS code from https://github.com/mehtadushy/SelecSLS-Pytorch , paper https://arxiv.org/abs/1907.00837 TResNet code from https://github.com/mrT23/TResNet , paper https://arxiv.org/abs/2003.13630 RegNet paper Designing Network Design Spaces - https://arxiv.org/abs/2003.13678 reference code at https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py VovNet V2 / V1 paper CenterMask : Real-Time Anchor-Free Instance Segmentation - https://arxiv.org/abs/1911.06667 reference code at https://github.com/youngwanLEE/vovnet-detectron2 Use the --model arg to specify model for train, validation, inference scripts. Match the all lowercase creation fn for the model you'd like. Pretrained Imagenet Weights Self-trained Weights Model Prec@1 (Err) Prec@5 (Err) Param # Image Scaling Image Size efficientnet_b3a 81.874 (18.126) 95.840 (4.160) 12.23M bicubic 320 (1.0 crop) efficientnet_b3 81.498 (18.502) 95.718 (4.282) 12.23M bicubic 300 skresnext50d_32x4d 81.278 (18.722) 95.366 (4.634) 27.5M bicubic 288 (1.0 crop) efficientnet_b2a 80.608 (19.392) 95.310 (4.690) 9.11M bicubic 288 (1.0 crop) mixnet_xl 80.478 (19.522) 94.932 (5.068) 11.90M bicubic 224 efficientnet_b2 80.402 (19.598) 95.076 (4.924) 9.11M bicubic 260 skresnext50d_32x4d 80.156 (19.844) 94.642 (5.358) 27.5M bicubic 224 resnext50_32x4d 79.762 (20.238) 94.600 (5.400) 25M bicubic 224 resnext50d_32x4d 79.674 (20.326) 94.868 (5.132) 25.1M bicubic 224 ese_vovnet39b 79.320 (20.680) 94.710 (5.290) 24.6M bicubic 224 resnetblur50 79.290 (20.710) 94.632 (5.368) 25.6M bicubic 224 resnet50 79.038 (20.962) 94.390 (5.610) 25.6M bicubic 224 mixnet_l 78.976 (21.024 94.184 (5.816) 7.33M bicubic 224 efficientnet_b1 78.692 (21.308) 94.086 (5.914) 7.79M bicubic 240 efficientnet_es 78.066 (21.934) 93.926 (6.074) 5.44M bicubic 224 seresnext26t_32x4d 77.998 (22.002) 93.708 (6.292) 16.8M bicubic 224 seresnext26tn_32x4d 77.986 (22.014) 93.746 (6.254) 16.8M bicubic 224 efficientnet_b0 77.698 (22.302) 93.532 (6.468) 5.29M bicubic 224 seresnext26d_32x4d 77.602 (22.398) 93.608 (6.392) 16.8M bicubic 224 mobilenetv2_120d 77.294 (22.706 93.502 (6.498) 5.8M bicubic 224 mixnet_m 77.256 (22.744) 93.418 (6.582) 5.01M bicubic 224 seresnext26_32x4d 77.104 (22.896) 93.316 (6.684) 16.8M bicubic 224 skresnet34 76.912 (23.088) 93.322 (6.678) 22.2M bicubic 224 ese_vovnet19b_dw 76.798 (23.202) 93.268 (6.732) 6.5M bicubic 224 resnet26d 76.68 (23.32) 93.166 (6.834) 16M bicubic 224 densenetblur121d 76.576 (23.424) 93.190 (6.810) 8.0M bicubic 224 mobilenetv2_140 76.524 (23.476) 92.990 (7.010) 6.1M bicubic 224 mixnet_s 75.988 (24.012) 92.794 (7.206) 4.13M bicubic 224 mobilenetv3_large_100 75.766 (24.234) 92.542 (7.458) 5.5M bicubic 224 mobilenetv3_rw 75.634 (24.366) 92.708 (7.292) 5.5M bicubic 224 mnasnet_a1 75.448 (24.552) 92.604 (7.396) 3.89M bicubic 224 resnet26 75.292 (24.708) 92.57 (7.43) 16M bicubic 224 fbnetc_100 75.124 (24.876) 92.386 (7.614) 5.6M bilinear 224 resnet34 75.110 (24.890) 92.284 (7.716) 22M bilinear 224 mobilenetv2_110d 75.052 (24.948) 92.180 (7.820) 4.5M bicubic 224 seresnet34 74.808 (25.192) 92.124 (7.876) 22M bilinear 224 mnasnet_b1 74.658 (25.342) 92.114 (7.886) 4.38M bicubic 224 spnasnet_100 74.084 (25.916) 91.818 (8.182) 4.42M bilinear 224 skresnet18 73.038 (26.962) 91.168 (8.832) 11.9M bicubic 224 mobilenetv2_100 72.978 (27.022) 91.016 (8.984) 3.5M bicubic 224 seresnet18 71.742 (28.258) 90.334 (9.666) 11.8M bicubic 224 Ported Weights For the models below, the model code and weight porting from Tensorflow or MXNet Gluon to Pytorch was done by myself. There are weights/models ported by others included in this repository, they are not listed below. Model Prec@1 (Err) Prec@5 (Err) Param # Image Scaling Image Size tf_efficientnet_l2_ns *tfp 88.352 (11.648) 98.652 (1.348) 480 bicubic 800 tf_efficientnet_l2_ns TBD TBD 480 bicubic 800 tf_efficientnet_l2_ns_475 88.234 (11.766) 98.546 (1.454)f 480 bicubic 475 tf_efficientnet_l2_ns_475 *tfp 88.172 (11.828) 98.566 (1.434) 480 bicubic 475 tf_efficientnet_b7_ns *tfp 86.844 (13.156) 98.084 (1.916) 66.35 bicubic 600 tf_efficientnet_b7_ns 86.840 (13.160) 98.094 (1.906) 66.35 bicubic 600 tf_efficientnet_b6_ns 86.452 (13.548) 97.882 (2.118) 43.04 bicubic 528 tf_efficientnet_b6_ns *tfp 86.444 (13.556) 97.880 (2.120) 43.04 bicubic 528 tf_efficientnet_b5_ns *tfp 86.064 (13.936) 97.746 (2.254) 30.39 bicubic 456 tf_efficientnet_b5_ns 86.088 (13.912) 97.752 (2.248) 30.39 bicubic 456 tf_efficientnet_b8_ap *tfp 85.436 (14.564) 97.272 (2.728) 87.4 bicubic 672 tf_efficientnet_b8 *tfp 85.384 (14.616) 97.394 (2.606) 87.4 bicubic 672 tf_efficientnet_b8 85.370 (14.630) 97.390 (2.610) 87.4 bicubic 672 tf_efficientnet_b8_ap 85.368 (14.632) 97.294 (2.706) 87.4 bicubic 672 tf_efficientnet_b4_ns *tfp 85.298 (14.702) 97.504 (2.496) 19.34 bicubic 380 tf_efficientnet_b4_ns 85.162 (14.838) 97.470 (2.530) 19.34 bicubic 380 tf_efficientnet_b7_ap *tfp 85.154 (14.846) 97.244 (2.756) 66.35 bicubic 600 tf_efficientnet_b7_ap 85.118 (14.882) 97.252 (2.748) 66.35 bicubic 600 tf_efficientnet_b7 *tfp 84.940 (15.060) 97.214 (2.786) 66.35 bicubic 600 tf_efficientnet_b7 84.932 (15.068) 97.208 (2.792) 66.35 bicubic 600 tf_efficientnet_b6_ap 84.786 (15.214) 97.138 (2.862) 43.04 bicubic 528 tf_efficientnet_b6_ap *tfp 84.760 (15.240) 97.124 (2.876) 43.04 bicubic 528 tf_efficientnet_b5_ap *tfp 84.276 (15.724) 96.932 (3.068) 30.39 bicubic 456 tf_efficientnet_b5_ap 84.254 (15.746) 96.976 (3.024) 30.39 bicubic 456 tf_efficientnet_b6 *tfp 84.140 (15.860) 96.852 (3.148) 43.04 bicubic 528 tf_efficientnet_b6 84.110 (15.890) 96.886 (3.114) 43.04 bicubic 528 tf_efficientnet_b3_ns *tfp 84.054 (15.946) 96.918 (3.082) 12.23 bicubic 300 tf_efficientnet_b3_ns 84.048 (15.952) 96.910 (3.090) 12.23 bicubic 300 tf_efficientnet_b5 *tfp 83.822 (16.178) 96.756 (3.244) 30.39 bicubic 456 tf_efficientnet_b5 83.812 (16.188) 96.748 (3.252) 30.39 bicubic 456 tf_efficientnet_b4_ap *tfp 83.278 (16.722) 96.376 (3.624) 19.34 bicubic 380 tf_efficientnet_b4_ap 83.248 (16.752) 96.388 (3.612) 19.34 bicubic 380 tf_efficientnet_b4 83.022 (16.978) 96.300 (3.700) 19.34 bicubic 380 tf_efficientnet_b4 *tfp 82.948 (17.052) 96.308 (3.692) 19.34 bicubic 380 tf_efficientnet_b2_ns *tfp 82.436 (17.564) 96.268 (3.732) 9.11 bicubic 260 tf_efficientnet_b2_ns 82.380 (17.620) 96.248 (3.752) 9.11 bicubic 260 tf_efficientnet_b3_ap *tfp 81.882 (18.118) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_b3_ap 81.828 (18.172) 95.624 (4.376) 12.23 bicubic 300 tf_efficientnet_b3 81.636 (18.364) 95.718 (4.282) 12.23 bicubic 300 tf_efficientnet_b3 *tfp 81.576 (18.424) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_lite4 81.528 (18.472) 95.668 (4.332) 13.00 bilinear 380 tf_efficientnet_b1_ns *tfp 81.514 (18.486) 95.776 (4.224) 7.79 bicubic 240 tf_efficientnet_lite4 *tfp 81.502 (18.498) 95.676 (4.324) 13.00 bilinear 380 tf_efficientnet_b1_ns 81.388 (18.612) 95.738 (4.262) 7.79 bicubic 240 gluon_senet154 81.224 (18.776) 95.356 (4.644) 115.09 bicubic 224 gluon_resnet152_v1s 81.012 (18.988) 95.416 (4.584) 60.32 bicubic 224 gluon_seresnext101_32x4d 80.902 (19.098) 95.294 (4.706) 48.96 bicubic 224 gluon_seresnext101_64x4d 80.890 (19.110) 95.304 (4.696) 88.23 bicubic 224 gluon_resnext101_64x4d 80.602 (19.398) 94.994 (5.006) 83.46 bicubic 224 tf_efficientnet_el 80.534 (19.466) 95.190 (4.810) 10.59 bicubic 300 tf_efficientnet_el *tfp 80.476 (19.524) 95.200 (4.800) 10.59 bicubic 300 gluon_resnet152_v1d 80.470 (19.530) 95.206 (4.794) 60.21 bicubic 224 gluon_resnet101_v1d 80.424 (19.576) 95.020 (4.980) 44.57 bicubic 224 tf_efficientnet_b2_ap *tfp 80.420 (19.580) 95.040 (4.960) 9.11 bicubic 260 gluon_resnext101_32x4d 80.334 (19.666) 94.926 (5.074) 44.18 bicubic 224 tf_efficientnet_b2_ap 80.306 (19.694) 95.028 (4.972) 9.11 bicubic 260 gluon_resnet101_v1s 80.300 (19.700) 95.150 (4.850) 44.67 bicubic 224 tf_efficientnet_b2 *tfp 80.188 (19.812) 94.974 (5.026) 9.11 bicubic 260 tf_efficientnet_b2 80.086 (19.914) 94.908 (5.092) 9.11 bicubic 260 gluon_resnet152_v1c 79.916 (20.084) 94.842 (5.158) 60.21 bicubic 224 gluon_seresnext50_32x4d 79.912 (20.088) 94.818 (5.182) 27.56 bicubic 224 tf_efficientnet_lite3 79.812 (20.188) 94.914 (5.086) 8.20 bilinear 300 tf_efficientnet_lite3 *tfp 79.734 (20.266) 94.838 (5.162) 8.20 bilinear 300 gluon_resnet152_v1b 79.692 (20.308) 94.738 (5.262) 60.19 bicubic 224 gluon_xception65 79.604 (20.396) 94.748 (5.252) 39.92 bicubic 299 gluon_resnet101_v1c 79.544 (20.456) 94.586 (5.414) 44.57 bicubic 224 tf_efficientnet_b1_ap *tfp 79.532 (20.468) 94.378 (5.622) 7.79 bicubic 240 tf_efficientnet_cc_b1_8e *tfp 79.464 (20.536) 94.492 (5.508) 39.7 bicubic 240 gluon_resnext50_32x4d 79.356 (20.644) 94.424 (5.576) 25.03 bicubic 224 gluon_resnet101_v1b 79.304 (20.696) 94.524 (5.476) 44.55 bicubic 224 tf_efficientnet_cc_b1_8e 79.298 (20.702) 94.364 (5.636) 39.7 bicubic 240 tf_efficientnet_b1_ap 79.278 (20.722) 94.308 (5.692) 7.79 bicubic 240 tf_efficientnet_b1 *tfp 79.172 (20.828) 94.450 (5.550) 7.79 bicubic 240 gluon_resnet50_v1d 79.074 (20.926) 94.476 (5.524) 25.58 bicubic 224 tf_efficientnet_em *tfp 78.958 (21.042) 94.458 (5.542) 6.90 bicubic 240 tf_mixnet_l *tfp 78.846 (21.154) 94.212 (5.788) 7.33 bilinear 224 tf_efficientnet_b1 78.826 (21.174) 94.198 (5.802) 7.79 bicubic 240 tf_efficientnet_b0_ns *tfp 78.806 (21.194) 94.496 (5.504) 5.29 bicubic 224 gluon_inception_v3 78.804 (21.196) 94.380 (5.620) 27.16M bicubic 299 tf_mixnet_l 78.770 (21.230) 94.004 (5.996) 7.33 bicubic 224 tf_efficientnet_em 78.742 (21.258) 94.332 (5.668) 6.90 bicubic 240 gluon_resnet50_v1s 78.712 (21.288) 94.242 (5.758) 25.68 bicubic 224 tf_efficientnet_b0_ns 78.658 (21.342) 94.376 (5.624) 5.29 bicubic 224 tf_efficientnet_cc_b0_8e *tfp 78.314 (21.686) 93.790 (6.210) 24.0 bicubic 224 gluon_resnet50_v1c 78.010 (21.990) 93.988 (6.012) 25.58 bicubic 224 tf_efficientnet_cc_b0_8e 77.908 (22.092) 93.656 (6.344) 24.0 bicubic 224 tf_inception_v3 77.856 (22.144) 93.644 (6.356) 27.16M bicubic 299 tf_efficientnet_cc_b0_4e *tfp 77.746 (22.254) 93.552 (6.448) 13.3 bicubic 224 tf_efficientnet_es *tfp 77.616 (22.384) 93.750 (6.250) 5.44 bicubic 224 gluon_resnet50_v1b 77.578 (22.422) 93.718 (6.282) 25.56 bicubic 224 adv_inception_v3 77.576 (22.424) 93.724 (6.276) 27.16M bicubic 299 tf_efficientnet_lite2 *tfp 77.544 (22.456) 93.800 (6.200) 6.09 bilinear 260 tf_efficientnet_lite2 77.460 (22.540) 93.746 (6.254) 6.09 bicubic 260 tf_efficientnet_b0_ap *tfp 77.514 (22.486) 93.576 (6.424) 5.29 bicubic 224 tf_efficientnet_cc_b0_4e 77.304 (22.696) 93.332 (6.668) 13.3 bicubic 224 tf_efficientnet_es 77.264 (22.736) 93.600 (6.400) 5.44 bicubic 224 tf_efficientnet_b0 *tfp 77.258 (22.742) 93.478 (6.522) 5.29 bicubic 224 tf_efficientnet_b0_ap 77.084 (22.916) 93.254 (6.746) 5.29 bicubic 224 tf_mixnet_m *tfp 77.072 (22.928) 93.368 (6.632) 5.01 bilinear 224 tf_mixnet_m 76.950 (23.050) 93.156 (6.844) 5.01 bicubic 224 tf_efficientnet_b0 76.848 (23.152) 93.228 (6.772) 5.29 bicubic 224 tf_efficientnet_lite1 *tfp 76.764 (23.236) 93.326 (6.674) 5.42 bilinear 240 tf_efficientnet_lite1 76.638 (23.362) 93.232 (6.768) 5.42 bicubic 240 tf_mixnet_s *tfp 75.800 (24.200) 92.788 (7.212) 4.13 bilinear 224 tf_mobilenetv3_large_100 *tfp 75.768 (24.232) 92.710 (7.290) 5.48 bilinear 224 tf_mixnet_s 75.648 (24.352) 92.636 (7.364) 4.13 bicubic 224 tf_mobilenetv3_large_100 75.516 (24.484) 92.600 (7.400) 5.48 bilinear 224 tf_efficientnet_lite0 *tfp 75.074 (24.926) 92.314 (7.686) 4.65 bilinear 224 tf_efficientnet_lite0 74.842 (25.158) 92.170 (7.830) 4.65 bicubic 224 gluon_resnet34_v1b 74.580 (25.420) 91.988 (8.012) 21.80 bicubic 224 tf_mobilenetv3_large_075 *tfp 73.730 (26.270) 91.616 (8.384) 3.99 bilinear 224 tf_mobilenetv3_large_075 73.442 (26.558) 91.352 (8.648) 3.99 bilinear 224 tf_mobilenetv3_large_minimal_100 *tfp 72.678 (27.322) 90.860 (9.140) 3.92 bilinear 224 tf_mobilenetv3_large_minimal_100 72.244 (27.756) 90.636 (9.364) 3.92 bilinear 224 tf_mobilenetv3_small_100 *tfp 67.918 (32.082) 87.958 (12.042 2.54 bilinear 224 tf_mobilenetv3_small_100 67.918 (32.082) 87.662 (12.338) 2.54 bilinear 224 tf_mobilenetv3_small_075 *tfp 66.142 (33.858) 86.498 (13.502) 2.04 bilinear 224 tf_mobilenetv3_small_075 65.718 (34.282) 86.136 (13.864) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 *tfp 63.378 (36.622) 84.802 (15.198) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 62.898 (37.102) 84.230 (15.770) 2.04 bilinear 224","title":"Models"},{"location":"models/#architectures","text":"","title":"Architectures"},{"location":"models/#resnet-resnext","text":"(from torchvision with mods by myself) ResNet-18, ResNet-34, ResNet-50, ResNet-101, ResNet-152, ResNeXt50 (32x4d), ResNeXt101 (32x4d and 64x4d) 'Bag of Tricks' / Gluon C, D, E, S variations ( https://arxiv.org/abs/1812.01187 ) Instagram trained / ImageNet tuned ResNeXt101-32x8d to 32x48d from from facebookresearch Res2Net ( https://github.com/gasvn/Res2Net , https://arxiv.org/abs/1904.01169 ) Selective Kernel (SK) Nets ( https://arxiv.org/abs/1903.06586 ) ResNeSt (code adapted from https://github.com/zhanghang1989/ResNeSt , paper https://arxiv.org/abs/2004.08955 )","title":"ResNet / ResNeXt"},{"location":"models/#dla","text":"Original ( https://github.com/ucbdrive/dla , https://arxiv.org/abs/1707.06484 ) Res2Net ( https://github.com/gasvn/Res2Net , https://arxiv.org/abs/1904.01169 )","title":"DLA"},{"location":"models/#densenet","text":"(from torchvision ) DenseNet-121, DenseNet-169, DenseNet-201, DenseNet-161","title":"DenseNet"},{"location":"models/#squeeze-and-excitation-resnetresnext","text":"(from Cadene with some pretrained weight additions by myself) SENet-154, SE-ResNet-18, SE-ResNet-34, SE-ResNet-50, SE-ResNet-101, SE-ResNet-152, SE-ResNeXt-26 (32x4d), SE-ResNeXt50 (32x4d), SE-ResNeXt101 (32x4d)","title":"Squeeze-and-Excitation ResNet/ResNeXt"},{"location":"models/#inception-v3","text":"(from torchvision )","title":"Inception-V3"},{"location":"models/#inception-resnet-v2-and-inception-v4","text":"(from Cadene )","title":"Inception-ResNet-V2 and Inception-V4"},{"location":"models/#xception","text":"Original variant from Cadene MXNet Gluon 'modified aligned' Xception-65 and 71 models from Gluon ModelZoo","title":"Xception"},{"location":"models/#pnasnet-nasnet-a","text":"(from Cadene )","title":"PNasNet &amp; NASNet-A"},{"location":"models/#dpn","text":"(from myself ) DPN-68, DPN-68b, DPN-92, DPN-98, DPN-131, DPN-107","title":"DPN"},{"location":"models/#efficientnet","text":"(from my standalone GenEfficientNet ) - A generic model that implements many of the efficient models that utilize similar DepthwiseSeparable and InvertedResidual blocks EfficientNet NoisyStudent (B0-B7, L2) ( https://arxiv.org/abs/1911.04252 ) EfficientNet AdvProp (B0-B8) ( https://arxiv.org/abs/1911.09665 ) EfficientNet (B0-B7) ( https://arxiv.org/abs/1905.11946 ) EfficientNet-EdgeTPU (S, M, L) ( https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html ) MixNet ( https://arxiv.org/abs/1907.09595 ) MNASNet B1, A1 (Squeeze-Excite), and Small ( https://arxiv.org/abs/1807.11626 ) MobileNet-V2 ( https://arxiv.org/abs/1801.04381 ) FBNet-C ( https://arxiv.org/abs/1812.03443 ) Single-Path NAS ( https://arxiv.org/abs/1904.02877 )","title":"EfficientNet"},{"location":"models/#mobilenet-v3","text":"( https://arxiv.org/abs/1905.02244 )","title":"MobileNet-V3"},{"location":"models/#hrnet","text":"code from https://github.com/HRNet/HRNet-Image-Classification , paper https://arxiv.org/abs/1908.07919","title":"HRNet"},{"location":"models/#selecsls","text":"code from https://github.com/mehtadushy/SelecSLS-Pytorch , paper https://arxiv.org/abs/1907.00837","title":"SelecSLS"},{"location":"models/#tresnet","text":"code from https://github.com/mrT23/TResNet , paper https://arxiv.org/abs/2003.13630","title":"TResNet"},{"location":"models/#regnet","text":"paper Designing Network Design Spaces - https://arxiv.org/abs/2003.13678 reference code at https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py","title":"RegNet"},{"location":"models/#vovnet-v2-v1","text":"paper CenterMask : Real-Time Anchor-Free Instance Segmentation - https://arxiv.org/abs/1911.06667 reference code at https://github.com/youngwanLEE/vovnet-detectron2 Use the --model arg to specify model for train, validation, inference scripts. Match the all lowercase creation fn for the model you'd like.","title":"VovNet V2 / V1"},{"location":"models/#pretrained-imagenet-weights","text":"","title":"Pretrained Imagenet Weights"},{"location":"models/#self-trained-weights","text":"Model Prec@1 (Err) Prec@5 (Err) Param # Image Scaling Image Size efficientnet_b3a 81.874 (18.126) 95.840 (4.160) 12.23M bicubic 320 (1.0 crop) efficientnet_b3 81.498 (18.502) 95.718 (4.282) 12.23M bicubic 300 skresnext50d_32x4d 81.278 (18.722) 95.366 (4.634) 27.5M bicubic 288 (1.0 crop) efficientnet_b2a 80.608 (19.392) 95.310 (4.690) 9.11M bicubic 288 (1.0 crop) mixnet_xl 80.478 (19.522) 94.932 (5.068) 11.90M bicubic 224 efficientnet_b2 80.402 (19.598) 95.076 (4.924) 9.11M bicubic 260 skresnext50d_32x4d 80.156 (19.844) 94.642 (5.358) 27.5M bicubic 224 resnext50_32x4d 79.762 (20.238) 94.600 (5.400) 25M bicubic 224 resnext50d_32x4d 79.674 (20.326) 94.868 (5.132) 25.1M bicubic 224 ese_vovnet39b 79.320 (20.680) 94.710 (5.290) 24.6M bicubic 224 resnetblur50 79.290 (20.710) 94.632 (5.368) 25.6M bicubic 224 resnet50 79.038 (20.962) 94.390 (5.610) 25.6M bicubic 224 mixnet_l 78.976 (21.024 94.184 (5.816) 7.33M bicubic 224 efficientnet_b1 78.692 (21.308) 94.086 (5.914) 7.79M bicubic 240 efficientnet_es 78.066 (21.934) 93.926 (6.074) 5.44M bicubic 224 seresnext26t_32x4d 77.998 (22.002) 93.708 (6.292) 16.8M bicubic 224 seresnext26tn_32x4d 77.986 (22.014) 93.746 (6.254) 16.8M bicubic 224 efficientnet_b0 77.698 (22.302) 93.532 (6.468) 5.29M bicubic 224 seresnext26d_32x4d 77.602 (22.398) 93.608 (6.392) 16.8M bicubic 224 mobilenetv2_120d 77.294 (22.706 93.502 (6.498) 5.8M bicubic 224 mixnet_m 77.256 (22.744) 93.418 (6.582) 5.01M bicubic 224 seresnext26_32x4d 77.104 (22.896) 93.316 (6.684) 16.8M bicubic 224 skresnet34 76.912 (23.088) 93.322 (6.678) 22.2M bicubic 224 ese_vovnet19b_dw 76.798 (23.202) 93.268 (6.732) 6.5M bicubic 224 resnet26d 76.68 (23.32) 93.166 (6.834) 16M bicubic 224 densenetblur121d 76.576 (23.424) 93.190 (6.810) 8.0M bicubic 224 mobilenetv2_140 76.524 (23.476) 92.990 (7.010) 6.1M bicubic 224 mixnet_s 75.988 (24.012) 92.794 (7.206) 4.13M bicubic 224 mobilenetv3_large_100 75.766 (24.234) 92.542 (7.458) 5.5M bicubic 224 mobilenetv3_rw 75.634 (24.366) 92.708 (7.292) 5.5M bicubic 224 mnasnet_a1 75.448 (24.552) 92.604 (7.396) 3.89M bicubic 224 resnet26 75.292 (24.708) 92.57 (7.43) 16M bicubic 224 fbnetc_100 75.124 (24.876) 92.386 (7.614) 5.6M bilinear 224 resnet34 75.110 (24.890) 92.284 (7.716) 22M bilinear 224 mobilenetv2_110d 75.052 (24.948) 92.180 (7.820) 4.5M bicubic 224 seresnet34 74.808 (25.192) 92.124 (7.876) 22M bilinear 224 mnasnet_b1 74.658 (25.342) 92.114 (7.886) 4.38M bicubic 224 spnasnet_100 74.084 (25.916) 91.818 (8.182) 4.42M bilinear 224 skresnet18 73.038 (26.962) 91.168 (8.832) 11.9M bicubic 224 mobilenetv2_100 72.978 (27.022) 91.016 (8.984) 3.5M bicubic 224 seresnet18 71.742 (28.258) 90.334 (9.666) 11.8M bicubic 224","title":"Self-trained Weights"},{"location":"models/#ported-weights","text":"For the models below, the model code and weight porting from Tensorflow or MXNet Gluon to Pytorch was done by myself. There are weights/models ported by others included in this repository, they are not listed below. Model Prec@1 (Err) Prec@5 (Err) Param # Image Scaling Image Size tf_efficientnet_l2_ns *tfp 88.352 (11.648) 98.652 (1.348) 480 bicubic 800 tf_efficientnet_l2_ns TBD TBD 480 bicubic 800 tf_efficientnet_l2_ns_475 88.234 (11.766) 98.546 (1.454)f 480 bicubic 475 tf_efficientnet_l2_ns_475 *tfp 88.172 (11.828) 98.566 (1.434) 480 bicubic 475 tf_efficientnet_b7_ns *tfp 86.844 (13.156) 98.084 (1.916) 66.35 bicubic 600 tf_efficientnet_b7_ns 86.840 (13.160) 98.094 (1.906) 66.35 bicubic 600 tf_efficientnet_b6_ns 86.452 (13.548) 97.882 (2.118) 43.04 bicubic 528 tf_efficientnet_b6_ns *tfp 86.444 (13.556) 97.880 (2.120) 43.04 bicubic 528 tf_efficientnet_b5_ns *tfp 86.064 (13.936) 97.746 (2.254) 30.39 bicubic 456 tf_efficientnet_b5_ns 86.088 (13.912) 97.752 (2.248) 30.39 bicubic 456 tf_efficientnet_b8_ap *tfp 85.436 (14.564) 97.272 (2.728) 87.4 bicubic 672 tf_efficientnet_b8 *tfp 85.384 (14.616) 97.394 (2.606) 87.4 bicubic 672 tf_efficientnet_b8 85.370 (14.630) 97.390 (2.610) 87.4 bicubic 672 tf_efficientnet_b8_ap 85.368 (14.632) 97.294 (2.706) 87.4 bicubic 672 tf_efficientnet_b4_ns *tfp 85.298 (14.702) 97.504 (2.496) 19.34 bicubic 380 tf_efficientnet_b4_ns 85.162 (14.838) 97.470 (2.530) 19.34 bicubic 380 tf_efficientnet_b7_ap *tfp 85.154 (14.846) 97.244 (2.756) 66.35 bicubic 600 tf_efficientnet_b7_ap 85.118 (14.882) 97.252 (2.748) 66.35 bicubic 600 tf_efficientnet_b7 *tfp 84.940 (15.060) 97.214 (2.786) 66.35 bicubic 600 tf_efficientnet_b7 84.932 (15.068) 97.208 (2.792) 66.35 bicubic 600 tf_efficientnet_b6_ap 84.786 (15.214) 97.138 (2.862) 43.04 bicubic 528 tf_efficientnet_b6_ap *tfp 84.760 (15.240) 97.124 (2.876) 43.04 bicubic 528 tf_efficientnet_b5_ap *tfp 84.276 (15.724) 96.932 (3.068) 30.39 bicubic 456 tf_efficientnet_b5_ap 84.254 (15.746) 96.976 (3.024) 30.39 bicubic 456 tf_efficientnet_b6 *tfp 84.140 (15.860) 96.852 (3.148) 43.04 bicubic 528 tf_efficientnet_b6 84.110 (15.890) 96.886 (3.114) 43.04 bicubic 528 tf_efficientnet_b3_ns *tfp 84.054 (15.946) 96.918 (3.082) 12.23 bicubic 300 tf_efficientnet_b3_ns 84.048 (15.952) 96.910 (3.090) 12.23 bicubic 300 tf_efficientnet_b5 *tfp 83.822 (16.178) 96.756 (3.244) 30.39 bicubic 456 tf_efficientnet_b5 83.812 (16.188) 96.748 (3.252) 30.39 bicubic 456 tf_efficientnet_b4_ap *tfp 83.278 (16.722) 96.376 (3.624) 19.34 bicubic 380 tf_efficientnet_b4_ap 83.248 (16.752) 96.388 (3.612) 19.34 bicubic 380 tf_efficientnet_b4 83.022 (16.978) 96.300 (3.700) 19.34 bicubic 380 tf_efficientnet_b4 *tfp 82.948 (17.052) 96.308 (3.692) 19.34 bicubic 380 tf_efficientnet_b2_ns *tfp 82.436 (17.564) 96.268 (3.732) 9.11 bicubic 260 tf_efficientnet_b2_ns 82.380 (17.620) 96.248 (3.752) 9.11 bicubic 260 tf_efficientnet_b3_ap *tfp 81.882 (18.118) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_b3_ap 81.828 (18.172) 95.624 (4.376) 12.23 bicubic 300 tf_efficientnet_b3 81.636 (18.364) 95.718 (4.282) 12.23 bicubic 300 tf_efficientnet_b3 *tfp 81.576 (18.424) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_lite4 81.528 (18.472) 95.668 (4.332) 13.00 bilinear 380 tf_efficientnet_b1_ns *tfp 81.514 (18.486) 95.776 (4.224) 7.79 bicubic 240 tf_efficientnet_lite4 *tfp 81.502 (18.498) 95.676 (4.324) 13.00 bilinear 380 tf_efficientnet_b1_ns 81.388 (18.612) 95.738 (4.262) 7.79 bicubic 240 gluon_senet154 81.224 (18.776) 95.356 (4.644) 115.09 bicubic 224 gluon_resnet152_v1s 81.012 (18.988) 95.416 (4.584) 60.32 bicubic 224 gluon_seresnext101_32x4d 80.902 (19.098) 95.294 (4.706) 48.96 bicubic 224 gluon_seresnext101_64x4d 80.890 (19.110) 95.304 (4.696) 88.23 bicubic 224 gluon_resnext101_64x4d 80.602 (19.398) 94.994 (5.006) 83.46 bicubic 224 tf_efficientnet_el 80.534 (19.466) 95.190 (4.810) 10.59 bicubic 300 tf_efficientnet_el *tfp 80.476 (19.524) 95.200 (4.800) 10.59 bicubic 300 gluon_resnet152_v1d 80.470 (19.530) 95.206 (4.794) 60.21 bicubic 224 gluon_resnet101_v1d 80.424 (19.576) 95.020 (4.980) 44.57 bicubic 224 tf_efficientnet_b2_ap *tfp 80.420 (19.580) 95.040 (4.960) 9.11 bicubic 260 gluon_resnext101_32x4d 80.334 (19.666) 94.926 (5.074) 44.18 bicubic 224 tf_efficientnet_b2_ap 80.306 (19.694) 95.028 (4.972) 9.11 bicubic 260 gluon_resnet101_v1s 80.300 (19.700) 95.150 (4.850) 44.67 bicubic 224 tf_efficientnet_b2 *tfp 80.188 (19.812) 94.974 (5.026) 9.11 bicubic 260 tf_efficientnet_b2 80.086 (19.914) 94.908 (5.092) 9.11 bicubic 260 gluon_resnet152_v1c 79.916 (20.084) 94.842 (5.158) 60.21 bicubic 224 gluon_seresnext50_32x4d 79.912 (20.088) 94.818 (5.182) 27.56 bicubic 224 tf_efficientnet_lite3 79.812 (20.188) 94.914 (5.086) 8.20 bilinear 300 tf_efficientnet_lite3 *tfp 79.734 (20.266) 94.838 (5.162) 8.20 bilinear 300 gluon_resnet152_v1b 79.692 (20.308) 94.738 (5.262) 60.19 bicubic 224 gluon_xception65 79.604 (20.396) 94.748 (5.252) 39.92 bicubic 299 gluon_resnet101_v1c 79.544 (20.456) 94.586 (5.414) 44.57 bicubic 224 tf_efficientnet_b1_ap *tfp 79.532 (20.468) 94.378 (5.622) 7.79 bicubic 240 tf_efficientnet_cc_b1_8e *tfp 79.464 (20.536) 94.492 (5.508) 39.7 bicubic 240 gluon_resnext50_32x4d 79.356 (20.644) 94.424 (5.576) 25.03 bicubic 224 gluon_resnet101_v1b 79.304 (20.696) 94.524 (5.476) 44.55 bicubic 224 tf_efficientnet_cc_b1_8e 79.298 (20.702) 94.364 (5.636) 39.7 bicubic 240 tf_efficientnet_b1_ap 79.278 (20.722) 94.308 (5.692) 7.79 bicubic 240 tf_efficientnet_b1 *tfp 79.172 (20.828) 94.450 (5.550) 7.79 bicubic 240 gluon_resnet50_v1d 79.074 (20.926) 94.476 (5.524) 25.58 bicubic 224 tf_efficientnet_em *tfp 78.958 (21.042) 94.458 (5.542) 6.90 bicubic 240 tf_mixnet_l *tfp 78.846 (21.154) 94.212 (5.788) 7.33 bilinear 224 tf_efficientnet_b1 78.826 (21.174) 94.198 (5.802) 7.79 bicubic 240 tf_efficientnet_b0_ns *tfp 78.806 (21.194) 94.496 (5.504) 5.29 bicubic 224 gluon_inception_v3 78.804 (21.196) 94.380 (5.620) 27.16M bicubic 299 tf_mixnet_l 78.770 (21.230) 94.004 (5.996) 7.33 bicubic 224 tf_efficientnet_em 78.742 (21.258) 94.332 (5.668) 6.90 bicubic 240 gluon_resnet50_v1s 78.712 (21.288) 94.242 (5.758) 25.68 bicubic 224 tf_efficientnet_b0_ns 78.658 (21.342) 94.376 (5.624) 5.29 bicubic 224 tf_efficientnet_cc_b0_8e *tfp 78.314 (21.686) 93.790 (6.210) 24.0 bicubic 224 gluon_resnet50_v1c 78.010 (21.990) 93.988 (6.012) 25.58 bicubic 224 tf_efficientnet_cc_b0_8e 77.908 (22.092) 93.656 (6.344) 24.0 bicubic 224 tf_inception_v3 77.856 (22.144) 93.644 (6.356) 27.16M bicubic 299 tf_efficientnet_cc_b0_4e *tfp 77.746 (22.254) 93.552 (6.448) 13.3 bicubic 224 tf_efficientnet_es *tfp 77.616 (22.384) 93.750 (6.250) 5.44 bicubic 224 gluon_resnet50_v1b 77.578 (22.422) 93.718 (6.282) 25.56 bicubic 224 adv_inception_v3 77.576 (22.424) 93.724 (6.276) 27.16M bicubic 299 tf_efficientnet_lite2 *tfp 77.544 (22.456) 93.800 (6.200) 6.09 bilinear 260 tf_efficientnet_lite2 77.460 (22.540) 93.746 (6.254) 6.09 bicubic 260 tf_efficientnet_b0_ap *tfp 77.514 (22.486) 93.576 (6.424) 5.29 bicubic 224 tf_efficientnet_cc_b0_4e 77.304 (22.696) 93.332 (6.668) 13.3 bicubic 224 tf_efficientnet_es 77.264 (22.736) 93.600 (6.400) 5.44 bicubic 224 tf_efficientnet_b0 *tfp 77.258 (22.742) 93.478 (6.522) 5.29 bicubic 224 tf_efficientnet_b0_ap 77.084 (22.916) 93.254 (6.746) 5.29 bicubic 224 tf_mixnet_m *tfp 77.072 (22.928) 93.368 (6.632) 5.01 bilinear 224 tf_mixnet_m 76.950 (23.050) 93.156 (6.844) 5.01 bicubic 224 tf_efficientnet_b0 76.848 (23.152) 93.228 (6.772) 5.29 bicubic 224 tf_efficientnet_lite1 *tfp 76.764 (23.236) 93.326 (6.674) 5.42 bilinear 240 tf_efficientnet_lite1 76.638 (23.362) 93.232 (6.768) 5.42 bicubic 240 tf_mixnet_s *tfp 75.800 (24.200) 92.788 (7.212) 4.13 bilinear 224 tf_mobilenetv3_large_100 *tfp 75.768 (24.232) 92.710 (7.290) 5.48 bilinear 224 tf_mixnet_s 75.648 (24.352) 92.636 (7.364) 4.13 bicubic 224 tf_mobilenetv3_large_100 75.516 (24.484) 92.600 (7.400) 5.48 bilinear 224 tf_efficientnet_lite0 *tfp 75.074 (24.926) 92.314 (7.686) 4.65 bilinear 224 tf_efficientnet_lite0 74.842 (25.158) 92.170 (7.830) 4.65 bicubic 224 gluon_resnet34_v1b 74.580 (25.420) 91.988 (8.012) 21.80 bicubic 224 tf_mobilenetv3_large_075 *tfp 73.730 (26.270) 91.616 (8.384) 3.99 bilinear 224 tf_mobilenetv3_large_075 73.442 (26.558) 91.352 (8.648) 3.99 bilinear 224 tf_mobilenetv3_large_minimal_100 *tfp 72.678 (27.322) 90.860 (9.140) 3.92 bilinear 224 tf_mobilenetv3_large_minimal_100 72.244 (27.756) 90.636 (9.364) 3.92 bilinear 224 tf_mobilenetv3_small_100 *tfp 67.918 (32.082) 87.958 (12.042 2.54 bilinear 224 tf_mobilenetv3_small_100 67.918 (32.082) 87.662 (12.338) 2.54 bilinear 224 tf_mobilenetv3_small_075 *tfp 66.142 (33.858) 86.498 (13.502) 2.04 bilinear 224 tf_mobilenetv3_small_075 65.718 (34.282) 86.136 (13.864) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 *tfp 63.378 (36.622) 84.802 (15.198) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 62.898 (37.102) 84.230 (15.770) 2.04 bilinear 224","title":"Ported Weights"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Getting Started Install The library can be installed with pip: pip install timm Conda Environment All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically Python 3.6.x, 3.7.x., 3.8.x. Little to no care has been taken to be Python 2.x friendly and will not support it. If you run into any challenges running on Windows, or other OS, I'm definitely open to looking into those issues so long as it's in a reproducible (read Conda) environment. PyTorch versions 1.4, 1.5.x, and 1.6 have been tested with this code. I've tried to keep the dependencies minimal, the setup is as per the PyTorch default install instructions for Conda: conda create -n torch-env conda activate torch-env conda install -c pytorch pytorch torchvision cudatoolkit=10.2 conda install pyyaml Load a Pretrained Model Pretrained models can be loaded using timm.create_model import timm m = timm . create_model ( 'mobilenetv3_100' , pretrained = True ) m . eval () List Models with Pretrained Weights import timm from pprint import pprint model_names = timm . list_models ( pretrained = True ) pprint ( model_names ) >>> [ 'adv_inception_v3' , 'cspdarknet53' , 'cspresnext50' , 'densenet121' , 'densenet161' , 'densenet169' , 'densenet201' , 'densenetblur121d' , 'dla34' , 'dla46_c' , ... ] List Model Architectures by Wildcard import timm from pprint import pprint model_names = timm . list_models ( '*resne*t*' ) pprint ( model_names ) >>> [ 'cspresnet50' , 'cspresnet50d' , 'cspresnet50w' , 'cspresnext50' , ... ]","title":"Getting Started"},{"location":"#getting-started","text":"","title":"Getting Started"},{"location":"#install","text":"The library can be installed with pip: pip install timm Conda Environment All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically Python 3.6.x, 3.7.x., 3.8.x. Little to no care has been taken to be Python 2.x friendly and will not support it. If you run into any challenges running on Windows, or other OS, I'm definitely open to looking into those issues so long as it's in a reproducible (read Conda) environment. PyTorch versions 1.4, 1.5.x, and 1.6 have been tested with this code. I've tried to keep the dependencies minimal, the setup is as per the PyTorch default install instructions for Conda: conda create -n torch-env conda activate torch-env conda install -c pytorch pytorch torchvision cudatoolkit=10.2 conda install pyyaml","title":"Install"},{"location":"#load-a-pretrained-model","text":"Pretrained models can be loaded using timm.create_model import timm m = timm . create_model ( 'mobilenetv3_100' , pretrained = True ) m . eval ()","title":"Load a Pretrained Model"},{"location":"#list-models-with-pretrained-weights","text":"import timm from pprint import pprint model_names = timm . list_models ( pretrained = True ) pprint ( model_names ) >>> [ 'adv_inception_v3' , 'cspdarknet53' , 'cspresnext50' , 'densenet121' , 'densenet161' , 'densenet169' , 'densenet201' , 'densenetblur121d' , 'dla34' , 'dla46_c' , ... ]","title":"List Models with Pretrained Weights"},{"location":"#list-model-architectures-by-wildcard","text":"import timm from pprint import pprint model_names = timm . list_models ( '*resne*t*' ) pprint ( model_names ) >>> [ 'cspresnet50' , 'cspresnet50d' , 'cspresnet50w' , 'cspresnext50' , ... ]","title":"List Model Architectures by Wildcard"},{"location":"archived_changes/","text":"Archived Changes Feb 29, 2020 New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1 IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models overall results similar to a bit better training from scratch on a few smaller models tried performance early in training seems consistently improved but less difference by end set fix_group_fanout=False in _init_weight_goog fn if you need to reproducte past behaviour Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training Feb 18, 2020 Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' ( https://arxiv.org/abs/2001.06268 ): Move layer/module impl into layers subfolder/module of models and organize in a more granular fashion ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks Add Selective Kernel Nets on top of ResNet base, pretrained weights skresnet18 - 73% top-1 skresnet34 - 76.9% top-1 skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1 ECA and CECA (circular padding) attention layer contributed by Chris Ha CBAM attention experiment (not the best results so far, may remove) Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the .se position for all ResNets Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights Feb 12, 2020 Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from Tensorflow TPU Feb 6, 2020 Add RandAugment trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by Andrew Lavin (see Training section for hparams) Feb \u00bd, 2020 Port new EfficientNet-B8 (RandAugment) weights, these are different than the B8 AdvProp, different input normalization. Update results csv files on all models for ImageNet validation and three other test sets Push PyPi package update Jan 31, 2020 Update ResNet50 weights with a new 79.038 result from further JSD / AugMix experiments. Full command line for reproduction in training section below. Jan 11/12, 2020 Master may be a bit unstable wrt to training, these changes have been tested but not all combos Implementations of AugMix added to existing RA and AA. Including numerous supporting pieces like JSD loss (Jensen-Shannon divergence + CE), and AugMixDataset SplitBatchNorm adaptation layer added for implementing Auxiliary BN as per AdvProp paper ResNet-50 AugMix trained model w/ 79% top-1 added seresnext26tn_32x4d - 77.99 top-1, 93.75 top-5 added to tiered experiment, higher img/s than 't' and 'd' Jan 3, 2020 Add RandAugment trained EfficientNet-B0 weight with 77.7 top-1. Trained by Michael Klachko with this code and recent hparams (see Training section) Add avg_checkpoints.py script for post training weight averaging and update all scripts with header docstrings and shebangs. Dec 30, 2019 Merge Dushyant Mehta's PR for SelecSLS (Selective Short and Long Range Skip Connections) networks. Good GPU memory consumption and throughput. Original: https://github.com/mehtadushy/SelecSLS-Pytorch Dec 28, 2019 Add new model weights and training hparams (see Training Hparams section) efficientnet_b3 - 81.5 top-1, 95.7 top-5 at default res/crop, 81.9, 95.8 at 320x320 1.0 crop-pct trained with RandAugment, ended up with an interesting but less than perfect result (see training section) seresnext26d_32x4d - 77.6 top-1, 93.6 top-5 deep stem (32, 32, 64), avgpool downsample stem/dowsample from bag-of-tricks paper seresnext26t_32x4d - 78.0 top-1, 93.7 top-5 deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant) stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments Dec 23, 2019 Add RandAugment trained MixNet-XL weights with 80.48 top-1. --dist-bn argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval Dec 4, 2019 Added weights from the first training from scratch of an EfficientNet (B2) with my new RandAugment implementation. Much better than my previous B2 and very close to the official AdvProp ones (80.4 top-1, 95.08 top-5). Nov 29, 2019 Brought EfficientNet and MobileNetV3 up to date with my https://github.com/rwightman/gen-efficientnet-pytorch code. Torchscript and ONNX export compat excluded. AdvProp weights added Official TF MobileNetv3 weights added EfficientNet and MobileNetV3 hook based 'feature extraction' classes added. Will serve as basis for using models as backbones in obj detection/segmentation tasks. Lots more to be done here... HRNet classification models and weights added from https://github.com/HRNet/HRNet-Image-Classification Consistency in global pooling, reset_classifer , and forward_features across models forward_features always returns unpooled feature maps now Reasonable chance I broke something... let me know Nov 22, 2019 Add ImageNet training RandAugment implementation alongside AutoAugment. PyTorch Transform compatible format, using PIL. Currently training two EfficientNet models from scratch with promising results... will update. drop-connect cmd line arg finally added to train.py , no need to hack model fns. Works for efficientnet/mobilenetv3 based models, ignored otherwise.","title":"Archived Changes"},{"location":"archived_changes/#archived-changes","text":"","title":"Archived Changes"},{"location":"archived_changes/#feb-29-2020","text":"New MobileNet-V3 Large weights trained from stratch with this code to 75.77% top-1 IMPORTANT CHANGE - default weight init changed for all MobilenetV3 / EfficientNet / related models overall results similar to a bit better training from scratch on a few smaller models tried performance early in training seems consistently improved but less difference by end set fix_group_fanout=False in _init_weight_goog fn if you need to reproducte past behaviour Experimental LR noise feature added applies a random perturbation to LR each epoch in specified range of training","title":"Feb 29, 2020"},{"location":"archived_changes/#feb-18-2020","text":"Big refactor of model layers and addition of several attention mechanisms. Several additions motivated by 'Compounding the Performance Improvements...' ( https://arxiv.org/abs/2001.06268 ): Move layer/module impl into layers subfolder/module of models and organize in a more granular fashion ResNet downsample paths now properly support dilation (output stride != 32) for avg_pool ('D' variant) and 3x3 (SENets) networks Add Selective Kernel Nets on top of ResNet base, pretrained weights skresnet18 - 73% top-1 skresnet34 - 76.9% top-1 skresnext50_32x4d (equiv to SKNet50) - 80.2% top-1 ECA and CECA (circular padding) attention layer contributed by Chris Ha CBAM attention experiment (not the best results so far, may remove) Attention factory to allow dynamically selecting one of SE, ECA, CBAM in the .se position for all ResNets Add DropBlock and DropPath (formerly DropConnect for EfficientNet/MobileNetv3) support to all ResNet variants Full dataset results updated that incl NoisyStudent weights and 2 of the 3 SK weights","title":"Feb 18, 2020"},{"location":"archived_changes/#feb-12-2020","text":"Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from Tensorflow TPU","title":"Feb 12, 2020"},{"location":"archived_changes/#feb-6-2020","text":"Add RandAugment trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by Andrew Lavin (see Training section for hparams)","title":"Feb 6, 2020"},{"location":"archived_changes/#feb-12-2020_1","text":"Port new EfficientNet-B8 (RandAugment) weights, these are different than the B8 AdvProp, different input normalization. Update results csv files on all models for ImageNet validation and three other test sets Push PyPi package update","title":"Feb 1/2, 2020"},{"location":"archived_changes/#jan-31-2020","text":"Update ResNet50 weights with a new 79.038 result from further JSD / AugMix experiments. Full command line for reproduction in training section below.","title":"Jan 31, 2020"},{"location":"archived_changes/#jan-1112-2020","text":"Master may be a bit unstable wrt to training, these changes have been tested but not all combos Implementations of AugMix added to existing RA and AA. Including numerous supporting pieces like JSD loss (Jensen-Shannon divergence + CE), and AugMixDataset SplitBatchNorm adaptation layer added for implementing Auxiliary BN as per AdvProp paper ResNet-50 AugMix trained model w/ 79% top-1 added seresnext26tn_32x4d - 77.99 top-1, 93.75 top-5 added to tiered experiment, higher img/s than 't' and 'd'","title":"Jan 11/12, 2020"},{"location":"archived_changes/#jan-3-2020","text":"Add RandAugment trained EfficientNet-B0 weight with 77.7 top-1. Trained by Michael Klachko with this code and recent hparams (see Training section) Add avg_checkpoints.py script for post training weight averaging and update all scripts with header docstrings and shebangs.","title":"Jan 3, 2020"},{"location":"archived_changes/#dec-30-2019","text":"Merge Dushyant Mehta's PR for SelecSLS (Selective Short and Long Range Skip Connections) networks. Good GPU memory consumption and throughput. Original: https://github.com/mehtadushy/SelecSLS-Pytorch","title":"Dec 30, 2019"},{"location":"archived_changes/#dec-28-2019","text":"Add new model weights and training hparams (see Training Hparams section) efficientnet_b3 - 81.5 top-1, 95.7 top-5 at default res/crop, 81.9, 95.8 at 320x320 1.0 crop-pct trained with RandAugment, ended up with an interesting but less than perfect result (see training section) seresnext26d_32x4d - 77.6 top-1, 93.6 top-5 deep stem (32, 32, 64), avgpool downsample stem/dowsample from bag-of-tricks paper seresnext26t_32x4d - 78.0 top-1, 93.7 top-5 deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant) stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments","title":"Dec 28, 2019"},{"location":"archived_changes/#dec-23-2019","text":"Add RandAugment trained MixNet-XL weights with 80.48 top-1. --dist-bn argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval","title":"Dec 23, 2019"},{"location":"archived_changes/#dec-4-2019","text":"Added weights from the first training from scratch of an EfficientNet (B2) with my new RandAugment implementation. Much better than my previous B2 and very close to the official AdvProp ones (80.4 top-1, 95.08 top-5).","title":"Dec 4, 2019"},{"location":"archived_changes/#nov-29-2019","text":"Brought EfficientNet and MobileNetV3 up to date with my https://github.com/rwightman/gen-efficientnet-pytorch code. Torchscript and ONNX export compat excluded. AdvProp weights added Official TF MobileNetv3 weights added EfficientNet and MobileNetV3 hook based 'feature extraction' classes added. Will serve as basis for using models as backbones in obj detection/segmentation tasks. Lots more to be done here... HRNet classification models and weights added from https://github.com/HRNet/HRNet-Image-Classification Consistency in global pooling, reset_classifer , and forward_features across models forward_features always returns unpooled feature maps now Reasonable chance I broke something... let me know","title":"Nov 29, 2019"},{"location":"archived_changes/#nov-22-2019","text":"Add ImageNet training RandAugment implementation alongside AutoAugment. PyTorch Transform compatible format, using PIL. Currently training two EfficientNet models from scratch with promising results... will update. drop-connect cmd line arg finally added to train.py , no need to hack model fns. Works for efficientnet/mobilenetv3 based models, ignored otherwise.","title":"Nov 22, 2019"},{"location":"changes/","text":"Recent Changes Aug 1, 2020 Universal feature extraction, new models, new weights, new test sets. * All models support the features_only=True argument for create_model call to return a network that extracts features from the deepest layer at each stride. * New models * CSPResNet, CSPResNeXt, CSPDarkNet, DarkNet * ReXNet * (Aligned) Xception41/65/71 (a proper port of TF models) * New trained weights * SEResNet50 - 80.3 * CSPDarkNet53 - 80.1 top-1 * CSPResNeXt50 - 80.0 to-1 * DPN68b - 79.2 top-1 * EfficientNet-Lite0 (non-TF ver) - 75.5 (submitted by @hal-314) * Add 'real' labels for ImageNet and ImageNet-Renditions test set, see results/README.md June 11, 2020 Bunch of changes: DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1 Activation factory added along with new activations: select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export) hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish context mgr for setting exportable/scriptable/no_jit states Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call Prep for 0.1.28 pip release May 12, 2020 Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt , paper https://arxiv.org/abs/2004.08955 )) May 3, 2020 Pruned EfficientNet B1, B2, and B3 ( https://arxiv.org/abs/2002.08258 ) contributed by Yonathan Aflalo May 1, 2020 Merged a number of execellent contributions in the ResNet model family over the past month BlurPool2D and resnetblur models initiated by Chris Ha , I trained resnetblur50 to 79.3. TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by mrT23 ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per ( https://arxiv.org/abs/2002.08258 ) by Yonathan Aflalo 200 pretrained models in total now with updated results csv in results folder April 5, 2020 Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite 3.5M param MobileNet-V2 100 @ 73% 4.5M param MobileNet-V2 110d @ 75% 6.1M param MobileNet-V2 140 @ 76.5% 5.8M param MobileNet-V2 120d @ 77.3% March 18, 2020 Add EfficientNet-Lite models w/ weights ported from Tensorflow TPU Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by Andrew Lavin (see Training section for hparams)","title":"Recent Changes"},{"location":"changes/#recent-changes","text":"","title":"Recent Changes"},{"location":"changes/#aug-1-2020","text":"Universal feature extraction, new models, new weights, new test sets. * All models support the features_only=True argument for create_model call to return a network that extracts features from the deepest layer at each stride. * New models * CSPResNet, CSPResNeXt, CSPDarkNet, DarkNet * ReXNet * (Aligned) Xception41/65/71 (a proper port of TF models) * New trained weights * SEResNet50 - 80.3 * CSPDarkNet53 - 80.1 top-1 * CSPResNeXt50 - 80.0 to-1 * DPN68b - 79.2 top-1 * EfficientNet-Lite0 (non-TF ver) - 75.5 (submitted by @hal-314) * Add 'real' labels for ImageNet and ImageNet-Renditions test set, see results/README.md","title":"Aug 1, 2020"},{"location":"changes/#june-11-2020","text":"Bunch of changes: DenseNet models updated with memory efficient addition from torchvision (fixed a bug), blur pooling and deep stem additions VoVNet V1 and V2 models added, 39 V2 variant (ese_vovnet_39b) trained to 79.3 top-1 Activation factory added along with new activations: select act at model creation time for more flexibility in using activations compatible with scripting or tracing (ONNX export) hard_mish (experimental) added with memory-efficient grad, along with ME hard_swish context mgr for setting exportable/scriptable/no_jit states Norm + Activation combo layers added with initial trial support in DenseNet and VoVNet along with impl of EvoNorm and InplaceAbn wrapper that fit the interface Torchscript works for all but two of the model types as long as using Pytorch 1.5+, tests added for this Some import cleanup and classifier reset changes, all models will have classifier reset to nn.Identity on reset_classifer(0) call Prep for 0.1.28 pip release","title":"June 11, 2020"},{"location":"changes/#may-12-2020","text":"Add ResNeSt models (code adapted from https://github.com/zhanghang1989/ResNeSt , paper https://arxiv.org/abs/2004.08955 ))","title":"May 12, 2020"},{"location":"changes/#may-3-2020","text":"Pruned EfficientNet B1, B2, and B3 ( https://arxiv.org/abs/2002.08258 ) contributed by Yonathan Aflalo","title":"May 3, 2020"},{"location":"changes/#may-1-2020","text":"Merged a number of execellent contributions in the ResNet model family over the past month BlurPool2D and resnetblur models initiated by Chris Ha , I trained resnetblur50 to 79.3. TResNet models and SpaceToDepth, AntiAliasDownsampleLayer layers by mrT23 ecaresnet (50d, 101d, light) models and two pruned variants using pruning as per ( https://arxiv.org/abs/2002.08258 ) by Yonathan Aflalo 200 pretrained models in total now with updated results csv in results folder","title":"May 1, 2020"},{"location":"changes/#april-5-2020","text":"Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite 3.5M param MobileNet-V2 100 @ 73% 4.5M param MobileNet-V2 110d @ 75% 6.1M param MobileNet-V2 140 @ 76.5% 5.8M param MobileNet-V2 120d @ 77.3%","title":"April 5, 2020"},{"location":"changes/#march-18-2020","text":"Add EfficientNet-Lite models w/ weights ported from Tensorflow TPU Add RandAugment trained ResNeXt-50 32x4d weights with 79.8 top-1. Trained by Andrew Lavin (see Training section for hparams)","title":"March 18, 2020"},{"location":"feature_extraction/","text":"Feature Extraction All of the models in timm have consistent mechanisms for obtaining various types of features from the model for tasks besides classification. Penultimate Layer Features (Pre-Classifier Features) The features from the penultimate model layer can be obtained in severay ways without requiring model surgery (although feel free to do surgery). One must first decide if they want pooled or un-pooled features. Unpooled There are three ways to obtain unpooled features. Without modifying the network, one can call model.forward_features(input) on any model instead of the usual model(input) . This will bypass the head classifier and global pooling for networks. If one wants to explicitly modify the network to return unpooled features, they can either create the model without a classifier and pooling, or remove it later. Both paths remove the parameters associated with the classifier from the network. forward_features() import torch import timm m = timm . create_model ( 'xception41' , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 299 , 299 )) print ( f 'Original shape: {o.shape}' ) o = m . forward_features ( torch . randn ( 2 , 3 , 299 , 299 )) print ( f 'Unpooled shape: {o.shape}' ) Output: Original shape: torch.Size([2, 1000]) Unpooled shape: torch.Size([2, 2048, 10, 10]) Create with no classifier and pooling import torch import timm m = timm . create_model ( 'resnet50' , pretrained = True , num_classes = 0 , global_pool = '' ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Unpooled shape: {o.shape}' ) Output: Unpooled shape: torch.Size([2, 2048, 7, 7]) Remove it later import torch import timm m = timm . create_model ( 'densenet121' , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Original shape: {o.shape}' ) m . reset_classifier ( 0 , '' ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Unpooled shape: {o.shape}' ) Output: Original shape: torch.Size([2, 1000]) Unpooled shape: torch.Size([2, 1024, 7, 7]) Pooled To modify the network to return pooled features, one can use forward_features() and pool/flatten the result themselves, or modify the network like above but keep pooling intact. Create with no classifier import torch import timm m = timm . create_model ( 'resnet50' , pretrained = True , num_classes = 0 ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Pooled shape: {o.shape}' ) Output: Pooled shape: torch.Size([2, 2048]) Remove it later import torch import timm m = timm . create_model ( 'ese_vovnet19b_dw' , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Original shape: {o.shape}' ) m . reset_classifier ( 0 ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Pooled shape: {o.shape}' ) Output: Pooled shape: torch.Size([2, 1024]) Multi-scale Feature Maps (Feature Pyramid) Object detection, segmentation, keypoint, and a variety of dense pixel tasks require access to feature maps from the backbone network at multiple scales. This is often done by modifying the original classification network. Since each network varies quite a bit in structure, it's not uncommon to see only a few backbones supported in any given obj detection or segmentation library. timm allows a consistent interface for creating any of the included models as feature backbones that output feature maps for selected levels. A feature backbone can be created by adding the argument features_only=True to any create_model call. By default 5 strides will be output from most models (not all have that many), with the first starting at 2 (some start at 1 or 4). Create a feature map extraction model import torch import timm m = timm . create_model ( 'resnest26d' , features_only = True , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) for x in o : print ( x . shape ) Output: torch.Size([2, 64, 112, 112]) torch.Size([2, 256, 56, 56]) torch.Size([2, 512, 28, 28]) torch.Size([2, 1024, 14, 14]) torch.Size([2, 2048, 7, 7]) Query the feature information After a feature backbone has been created, it can be queried to provide channel or resolution reduction information to the downstream heads without requiring static config or hardcoded constants. The .feature_info attribute is a class encapsulating the information about the feature extraction points. import torch import timm m = timm . create_model ( 'regnety_032' , features_only = True , pretrained = True ) print ( f 'Feature channels: {m.feature_info.channels()}' ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) for x in o : print ( x . shape ) Output: Feature channels: [32, 72, 216, 576, 1512] torch.Size([2, 32, 112, 112]) torch.Size([2, 72, 56, 56]) torch.Size([2, 216, 28, 28]) torch.Size([2, 576, 14, 14]) torch.Size([2, 1512, 7, 7]) Select specific feature levels or limit the stride There are to additional creation arguments impacting the output features. out_indices selects which indices to output output_stride limits the feature output stride of the network (also works in classification mode BTW) out_indices is supported by all models, but not all models have the same index to feature stride mapping. Look at the code or check feature_info to compare. The out indices generally correspond to the C(i+1)th feature level (a 2^(i+1) reduction). For most models, index 0 is the stride 2 features, and index 4 is stride 32. output_stride is achieved by converting layers to use dilated convolutions. Doing so is not always straightforward, some networks only support output_stride=32 . import torch import timm m = timm . create_model ( 'ecaresnet101d' , features_only = True , output_stride = 8 , out_indices = ( 2 , 4 ), pretrained = True ) print ( f 'Feature channels: {m.feature_info.channels()}' ) print ( f 'Feature reduction: {m.feature_info.reduction()}' ) o = m ( torch . randn ( 2 , 3 , 320 , 320 )) for x in o : print ( x . shape ) Output: Feature channels: [512, 2048] Feature reduction: [8, 8] torch.Size([2, 512, 40, 40]) torch.Size([2, 2048, 40, 40])","title":"Feature Extraction"},{"location":"feature_extraction/#feature-extraction","text":"All of the models in timm have consistent mechanisms for obtaining various types of features from the model for tasks besides classification.","title":"Feature Extraction"},{"location":"feature_extraction/#penultimate-layer-features-pre-classifier-features","text":"The features from the penultimate model layer can be obtained in severay ways without requiring model surgery (although feel free to do surgery). One must first decide if they want pooled or un-pooled features.","title":"Penultimate Layer Features (Pre-Classifier Features)"},{"location":"feature_extraction/#unpooled","text":"There are three ways to obtain unpooled features. Without modifying the network, one can call model.forward_features(input) on any model instead of the usual model(input) . This will bypass the head classifier and global pooling for networks. If one wants to explicitly modify the network to return unpooled features, they can either create the model without a classifier and pooling, or remove it later. Both paths remove the parameters associated with the classifier from the network.","title":"Unpooled"},{"location":"feature_extraction/#forward_features","text":"import torch import timm m = timm . create_model ( 'xception41' , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 299 , 299 )) print ( f 'Original shape: {o.shape}' ) o = m . forward_features ( torch . randn ( 2 , 3 , 299 , 299 )) print ( f 'Unpooled shape: {o.shape}' ) Output: Original shape: torch.Size([2, 1000]) Unpooled shape: torch.Size([2, 2048, 10, 10])","title":"forward_features()"},{"location":"feature_extraction/#create-with-no-classifier-and-pooling","text":"import torch import timm m = timm . create_model ( 'resnet50' , pretrained = True , num_classes = 0 , global_pool = '' ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Unpooled shape: {o.shape}' ) Output: Unpooled shape: torch.Size([2, 2048, 7, 7])","title":"Create with no classifier and pooling"},{"location":"feature_extraction/#remove-it-later","text":"import torch import timm m = timm . create_model ( 'densenet121' , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Original shape: {o.shape}' ) m . reset_classifier ( 0 , '' ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Unpooled shape: {o.shape}' ) Output: Original shape: torch.Size([2, 1000]) Unpooled shape: torch.Size([2, 1024, 7, 7])","title":"Remove it later"},{"location":"feature_extraction/#pooled","text":"To modify the network to return pooled features, one can use forward_features() and pool/flatten the result themselves, or modify the network like above but keep pooling intact.","title":"Pooled"},{"location":"feature_extraction/#create-with-no-classifier","text":"import torch import timm m = timm . create_model ( 'resnet50' , pretrained = True , num_classes = 0 ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Pooled shape: {o.shape}' ) Output: Pooled shape: torch.Size([2, 2048])","title":"Create with no classifier"},{"location":"feature_extraction/#remove-it-later_1","text":"import torch import timm m = timm . create_model ( 'ese_vovnet19b_dw' , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Original shape: {o.shape}' ) m . reset_classifier ( 0 ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) print ( f 'Pooled shape: {o.shape}' ) Output: Pooled shape: torch.Size([2, 1024])","title":"Remove it later"},{"location":"feature_extraction/#multi-scale-feature-maps-feature-pyramid","text":"Object detection, segmentation, keypoint, and a variety of dense pixel tasks require access to feature maps from the backbone network at multiple scales. This is often done by modifying the original classification network. Since each network varies quite a bit in structure, it's not uncommon to see only a few backbones supported in any given obj detection or segmentation library. timm allows a consistent interface for creating any of the included models as feature backbones that output feature maps for selected levels. A feature backbone can be created by adding the argument features_only=True to any create_model call. By default 5 strides will be output from most models (not all have that many), with the first starting at 2 (some start at 1 or 4).","title":"Multi-scale Feature Maps (Feature Pyramid)"},{"location":"feature_extraction/#create-a-feature-map-extraction-model","text":"import torch import timm m = timm . create_model ( 'resnest26d' , features_only = True , pretrained = True ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) for x in o : print ( x . shape ) Output: torch.Size([2, 64, 112, 112]) torch.Size([2, 256, 56, 56]) torch.Size([2, 512, 28, 28]) torch.Size([2, 1024, 14, 14]) torch.Size([2, 2048, 7, 7])","title":"Create a feature map extraction model"},{"location":"feature_extraction/#query-the-feature-information","text":"After a feature backbone has been created, it can be queried to provide channel or resolution reduction information to the downstream heads without requiring static config or hardcoded constants. The .feature_info attribute is a class encapsulating the information about the feature extraction points. import torch import timm m = timm . create_model ( 'regnety_032' , features_only = True , pretrained = True ) print ( f 'Feature channels: {m.feature_info.channels()}' ) o = m ( torch . randn ( 2 , 3 , 224 , 224 )) for x in o : print ( x . shape ) Output: Feature channels: [32, 72, 216, 576, 1512] torch.Size([2, 32, 112, 112]) torch.Size([2, 72, 56, 56]) torch.Size([2, 216, 28, 28]) torch.Size([2, 576, 14, 14]) torch.Size([2, 1512, 7, 7])","title":"Query the feature information"},{"location":"feature_extraction/#select-specific-feature-levels-or-limit-the-stride","text":"There are to additional creation arguments impacting the output features. out_indices selects which indices to output output_stride limits the feature output stride of the network (also works in classification mode BTW) out_indices is supported by all models, but not all models have the same index to feature stride mapping. Look at the code or check feature_info to compare. The out indices generally correspond to the C(i+1)th feature level (a 2^(i+1) reduction). For most models, index 0 is the stride 2 features, and index 4 is stride 32. output_stride is achieved by converting layers to use dilated convolutions. Doing so is not always straightforward, some networks only support output_stride=32 . import torch import timm m = timm . create_model ( 'ecaresnet101d' , features_only = True , output_stride = 8 , out_indices = ( 2 , 4 ), pretrained = True ) print ( f 'Feature channels: {m.feature_info.channels()}' ) print ( f 'Feature reduction: {m.feature_info.reduction()}' ) o = m ( torch . randn ( 2 , 3 , 320 , 320 )) for x in o : print ( x . shape ) Output: Feature channels: [512, 2048] Feature reduction: [8, 8] torch.Size([2, 512, 40, 40]) torch.Size([2, 2048, 40, 40])","title":"Select specific feature levels or limit the stride"},{"location":"models/","text":"Model Architectures The model architectures included come from a wide variety of sources. Sources, including papers, original impl (\"reference code\") that I rewrote / adapted, and PyTorch impl that I leveraged directly (\"code\") are listed below. Most included models have pretrained weights. The weights are either: 1. from their original sources 2. ported by myself from their original impl in a different framework (e.g. Tensorflow models) 3. trained from scratch using the included training script The validation results for the pretrained weights can be found here Cross-Stage Partial Networks [ cspnet.py ] Paper: CSPNet: A New Backbone that can Enhance Learning Capability of CNN - https://arxiv.org/abs/1911.11929 Reference impl: https://github.com/WongKinYiu/CrossStagePartialNetworks DenseNet [ densenet.py ] Paper: Densely Connected Convolutional Networks - https://arxiv.org/abs/1608.06993 Code: https://github.com/pytorch/vision/tree/master/torchvision/models DLA [ dla.py ] Paper: https://arxiv.org/abs/1707.06484 Code: https://github.com/ucbdrive/dla Dual-Path Networks [ dpn.py ] Paper: Dual Path Networks - https://arxiv.org/abs/1707.01629 My PyTorch code: https://github.com/rwightman/pytorch-dpn-pretrained Reference code: https://github.com/cypw/DPNs HRNet [ hrnet.py ] Paper: Deep High-Resolution Representation Learning for Visual Recognition - https://arxiv.org/abs/1908.07919 Code: https://github.com/HRNet/HRNet-Image-Classification Inception-V3 [ inception_v3.py ] Paper: Rethinking the Inception Architecture for Computer Vision - https://arxiv.org/abs/1512.00567 Code: https://github.com/pytorch/vision/tree/master/torchvision/models Inception-V4 [ inception_v4.py ] Paper: Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning - https://arxiv.org/abs/1602.07261 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets Inception-ResNet-V2 [ inception_resnet_v2.py ] Paper: Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning - https://arxiv.org/abs/1602.07261 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets NASNet-A [ nasnet.py ] Papers: Learning Transferable Architectures for Scalable Image Recognition - https://arxiv.org/abs/1707.07012 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet PNasNet-5 [ pnasnet.py ] Papers: Progressive Neural Architecture Search - https://arxiv.org/abs/1712.00559 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet EfficientNet [ efficientnet.py ] Papers EfficientNet NoisyStudent (B0-B7, L2) - https://arxiv.org/abs/1911.04252 EfficientNet AdvProp (B0-B8) - https://arxiv.org/abs/1911.09665 EfficientNet (B0-B7) - https://arxiv.org/abs/1905.11946 EfficientNet-EdgeTPU (S, M, L) - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html MixNet - https://arxiv.org/abs/1907.09595 MNASNet B1, A1 (Squeeze-Excite), and Small - https://arxiv.org/abs/1807.11626 MobileNet-V2 - https://arxiv.org/abs/1801.04381 FBNet-C - https://arxiv.org/abs/1812.03443 Single-Path NAS - https://arxiv.org/abs/1904.02877 My PyTorch code: https://github.com/rwightman/gen-efficientnet-pytorch Reference code: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet MobileNet-V3 [ mobilenetv3.py ] Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244 Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet RegNet [ regnet.py ] Paper: Designing Network Design Spaces - https://arxiv.org/abs/2003.13678 Reference code: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py ResNet, ResNeXt [ resnet.py ] ResNet (V1B) Paper: Deep Residual Learning for Image Recognition - https://arxiv.org/abs/1512.03385 Code: https://github.com/pytorch/vision/tree/master/torchvision/models ResNeXt Paper: Aggregated Residual Transformations for Deep Neural Networks - https://arxiv.org/abs/1611.05431 Code: https://github.com/pytorch/vision/tree/master/torchvision/models 'Bag of Tricks' / Gluon C, D, E, S ResNet variants Paper: Bag of Tricks for Image Classification with CNNs - https://arxiv.org/abs/1812.01187 Code: https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/resnetv1b.py Instagram pretrained / ImageNet tuned ResNeXt101 Paper: Exploring the Limits of Weakly Supervised Pretraining - https://arxiv.org/abs/1805.00932 Weights: https://pytorch.org/hub/facebookresearch_WSL-Images_resnext (NOTE: CC BY-NC 4.0 License, NOT commercial friendly) Semi-supervised (SSL) / Semi-weakly Supervised (SWSL) ResNet and ResNeXts Paper: Billion-scale semi-supervised learning for image classification - https://arxiv.org/abs/1905.00546 Weights: https://github.com/facebookresearch/semi-supervised-ImageNet1K-models (NOTE: CC BY-NC 4.0 License, NOT commercial friendly) Squeeze-and-Excitation Networks Paper: Squeeze-and-Excitation Networks - https://arxiv.org/abs/1709.01507 Code: Added to ResNet base, this is current version going forward, old senet.py is being deprecated ECAResNet (ECA-Net) Paper: ECA-Net: Efficient Channel Attention for Deep CNN - https://arxiv.org/abs/1910.03151v4 Code: Added to ResNet base, ECA module contributed by @VRandme, reference https://github.com/BangguWu/ECANet Res2Net [ res2net.py ] Paper: Res2Net: A New Multi-scale Backbone Architecture - https://arxiv.org/abs/1904.01169 Code: https://github.com/gasvn/Res2Net ResNeSt [ resnest.py ] Paper: ResNeSt: Split-Attention Networks - https://arxiv.org/abs/2004.08955 Code: https://github.com/zhanghang1989/ResNeSt ReXNet [ rexnet.py ] Paper: ReXNet: Diminishing Representational Bottleneck on CNN - https://arxiv.org/abs/2007.00992 Code: https://github.com/clovaai/rexnet Selective-Kernel Networks [ sknet.py ] Paper: Selective-Kernel Networks - https://arxiv.org/abs/1903.06586 Code: https://github.com/implus/SKNet , https://github.com/clovaai/assembled-cnn SelecSLS [ selecsls.py ] Paper: XNect: Real-time Multi-Person 3D Motion Capture with a Single RGB Camera - https://arxiv.org/abs/1907.00837 Code: https://github.com/mehtadushy/SelecSLS-Pytorch Squeeze-and-Excitation Networks [ senet.py ] NOTE: I am deprecating this version of the networks, the new ones are part of resnet.py * Paper: Squeeze-and-Excitation Networks - https://arxiv.org/abs/1709.01507 * Code: https://github.com/Cadene/pretrained-models.pytorch TResNet [ tresnet.py ] Paper: TResNet: High Performance GPU-Dedicated Architecture - https://arxiv.org/abs/2003.13630 Code: https://github.com/mrT23/TResNet VovNet V2 and V1 [ vovnet.py ] Paper: CenterMask : Real-Time Anchor-Free Instance Segmentation - https://arxiv.org/abs/1911.06667 Reference code: https://github.com/youngwanLEE/vovnet-detectron2 Xception [ xception.py ] Paper: Xception: Deep Learning with Depthwise Separable Convolutions - https://arxiv.org/abs/1610.02357 Code: https://github.com/Cadene/pretrained-models.pytorch Xception (Modified Aligned, Gluon) [ gluon_xception.py ] Paper: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation - https://arxiv.org/abs/1802.02611 Reference code: https://github.com/dmlc/gluon-cv/tree/master/gluoncv/model_zoo , https://github.com/jfzhang95/pytorch-deeplab-xception/ Xception (Modified Aligned, TF) [ aligned_xception.py ] Paper: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation - https://arxiv.org/abs/1802.02611 Reference code: https://github.com/tensorflow/models/tree/master/research/deeplab","title":"Model Architectures"},{"location":"models/#model-architectures","text":"The model architectures included come from a wide variety of sources. Sources, including papers, original impl (\"reference code\") that I rewrote / adapted, and PyTorch impl that I leveraged directly (\"code\") are listed below. Most included models have pretrained weights. The weights are either: 1. from their original sources 2. ported by myself from their original impl in a different framework (e.g. Tensorflow models) 3. trained from scratch using the included training script The validation results for the pretrained weights can be found here","title":"Model Architectures"},{"location":"models/#cross-stage-partial-networks-cspnetpy","text":"Paper: CSPNet: A New Backbone that can Enhance Learning Capability of CNN - https://arxiv.org/abs/1911.11929 Reference impl: https://github.com/WongKinYiu/CrossStagePartialNetworks","title":"Cross-Stage Partial Networks [cspnet.py]"},{"location":"models/#densenet-densenetpy","text":"Paper: Densely Connected Convolutional Networks - https://arxiv.org/abs/1608.06993 Code: https://github.com/pytorch/vision/tree/master/torchvision/models","title":"DenseNet [densenet.py]"},{"location":"models/#dla-dlapy","text":"Paper: https://arxiv.org/abs/1707.06484 Code: https://github.com/ucbdrive/dla","title":"DLA [dla.py]"},{"location":"models/#dual-path-networks-dpnpy","text":"Paper: Dual Path Networks - https://arxiv.org/abs/1707.01629 My PyTorch code: https://github.com/rwightman/pytorch-dpn-pretrained Reference code: https://github.com/cypw/DPNs","title":"Dual-Path Networks [dpn.py]"},{"location":"models/#hrnet-hrnetpy","text":"Paper: Deep High-Resolution Representation Learning for Visual Recognition - https://arxiv.org/abs/1908.07919 Code: https://github.com/HRNet/HRNet-Image-Classification","title":"HRNet [hrnet.py]"},{"location":"models/#inception-v3-inception_v3py","text":"Paper: Rethinking the Inception Architecture for Computer Vision - https://arxiv.org/abs/1512.00567 Code: https://github.com/pytorch/vision/tree/master/torchvision/models","title":"Inception-V3 [inception_v3.py]"},{"location":"models/#inception-v4-inception_v4py","text":"Paper: Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning - https://arxiv.org/abs/1602.07261 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets","title":"Inception-V4 [inception_v4.py]"},{"location":"models/#inception-resnet-v2-inception_resnet_v2py","text":"Paper: Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning - https://arxiv.org/abs/1602.07261 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets","title":"Inception-ResNet-V2 [inception_resnet_v2.py]"},{"location":"models/#nasnet-a-nasnetpy","text":"Papers: Learning Transferable Architectures for Scalable Image Recognition - https://arxiv.org/abs/1707.07012 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet","title":"NASNet-A [nasnet.py]"},{"location":"models/#pnasnet-5-pnasnetpy","text":"Papers: Progressive Neural Architecture Search - https://arxiv.org/abs/1712.00559 Code: https://github.com/Cadene/pretrained-models.pytorch Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet","title":"PNasNet-5 [pnasnet.py]"},{"location":"models/#efficientnet-efficientnetpy","text":"Papers EfficientNet NoisyStudent (B0-B7, L2) - https://arxiv.org/abs/1911.04252 EfficientNet AdvProp (B0-B8) - https://arxiv.org/abs/1911.09665 EfficientNet (B0-B7) - https://arxiv.org/abs/1905.11946 EfficientNet-EdgeTPU (S, M, L) - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html MixNet - https://arxiv.org/abs/1907.09595 MNASNet B1, A1 (Squeeze-Excite), and Small - https://arxiv.org/abs/1807.11626 MobileNet-V2 - https://arxiv.org/abs/1801.04381 FBNet-C - https://arxiv.org/abs/1812.03443 Single-Path NAS - https://arxiv.org/abs/1904.02877 My PyTorch code: https://github.com/rwightman/gen-efficientnet-pytorch Reference code: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet","title":"EfficientNet [efficientnet.py]"},{"location":"models/#mobilenet-v3-mobilenetv3py","text":"Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244 Reference code: https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet","title":"MobileNet-V3 [mobilenetv3.py]"},{"location":"models/#regnet-regnetpy","text":"Paper: Designing Network Design Spaces - https://arxiv.org/abs/2003.13678 Reference code: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py","title":"RegNet [regnet.py]"},{"location":"models/#resnet-resnext-resnetpy","text":"ResNet (V1B) Paper: Deep Residual Learning for Image Recognition - https://arxiv.org/abs/1512.03385 Code: https://github.com/pytorch/vision/tree/master/torchvision/models ResNeXt Paper: Aggregated Residual Transformations for Deep Neural Networks - https://arxiv.org/abs/1611.05431 Code: https://github.com/pytorch/vision/tree/master/torchvision/models 'Bag of Tricks' / Gluon C, D, E, S ResNet variants Paper: Bag of Tricks for Image Classification with CNNs - https://arxiv.org/abs/1812.01187 Code: https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/resnetv1b.py Instagram pretrained / ImageNet tuned ResNeXt101 Paper: Exploring the Limits of Weakly Supervised Pretraining - https://arxiv.org/abs/1805.00932 Weights: https://pytorch.org/hub/facebookresearch_WSL-Images_resnext (NOTE: CC BY-NC 4.0 License, NOT commercial friendly) Semi-supervised (SSL) / Semi-weakly Supervised (SWSL) ResNet and ResNeXts Paper: Billion-scale semi-supervised learning for image classification - https://arxiv.org/abs/1905.00546 Weights: https://github.com/facebookresearch/semi-supervised-ImageNet1K-models (NOTE: CC BY-NC 4.0 License, NOT commercial friendly) Squeeze-and-Excitation Networks Paper: Squeeze-and-Excitation Networks - https://arxiv.org/abs/1709.01507 Code: Added to ResNet base, this is current version going forward, old senet.py is being deprecated ECAResNet (ECA-Net) Paper: ECA-Net: Efficient Channel Attention for Deep CNN - https://arxiv.org/abs/1910.03151v4 Code: Added to ResNet base, ECA module contributed by @VRandme, reference https://github.com/BangguWu/ECANet","title":"ResNet, ResNeXt [resnet.py]"},{"location":"models/#res2net-res2netpy","text":"Paper: Res2Net: A New Multi-scale Backbone Architecture - https://arxiv.org/abs/1904.01169 Code: https://github.com/gasvn/Res2Net","title":"Res2Net [res2net.py]"},{"location":"models/#resnest-resnestpy","text":"Paper: ResNeSt: Split-Attention Networks - https://arxiv.org/abs/2004.08955 Code: https://github.com/zhanghang1989/ResNeSt","title":"ResNeSt [resnest.py]"},{"location":"models/#rexnet-rexnetpy","text":"Paper: ReXNet: Diminishing Representational Bottleneck on CNN - https://arxiv.org/abs/2007.00992 Code: https://github.com/clovaai/rexnet","title":"ReXNet [rexnet.py]"},{"location":"models/#selective-kernel-networks-sknetpy","text":"Paper: Selective-Kernel Networks - https://arxiv.org/abs/1903.06586 Code: https://github.com/implus/SKNet , https://github.com/clovaai/assembled-cnn","title":"Selective-Kernel Networks [sknet.py]"},{"location":"models/#selecsls-selecslspy","text":"Paper: XNect: Real-time Multi-Person 3D Motion Capture with a Single RGB Camera - https://arxiv.org/abs/1907.00837 Code: https://github.com/mehtadushy/SelecSLS-Pytorch","title":"SelecSLS [selecsls.py]"},{"location":"models/#squeeze-and-excitation-networks-senetpy","text":"NOTE: I am deprecating this version of the networks, the new ones are part of resnet.py * Paper: Squeeze-and-Excitation Networks - https://arxiv.org/abs/1709.01507 * Code: https://github.com/Cadene/pretrained-models.pytorch","title":"Squeeze-and-Excitation Networks [senet.py]"},{"location":"models/#tresnet-tresnetpy","text":"Paper: TResNet: High Performance GPU-Dedicated Architecture - https://arxiv.org/abs/2003.13630 Code: https://github.com/mrT23/TResNet","title":"TResNet [tresnet.py]"},{"location":"models/#vovnet-v2-and-v1-vovnetpy","text":"Paper: CenterMask : Real-Time Anchor-Free Instance Segmentation - https://arxiv.org/abs/1911.06667 Reference code: https://github.com/youngwanLEE/vovnet-detectron2","title":"VovNet V2 and V1 [vovnet.py]"},{"location":"models/#xception-xceptionpy","text":"Paper: Xception: Deep Learning with Depthwise Separable Convolutions - https://arxiv.org/abs/1610.02357 Code: https://github.com/Cadene/pretrained-models.pytorch","title":"Xception [xception.py]"},{"location":"models/#xception-modified-aligned-gluon-gluon_xceptionpy","text":"Paper: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation - https://arxiv.org/abs/1802.02611 Reference code: https://github.com/dmlc/gluon-cv/tree/master/gluoncv/model_zoo , https://github.com/jfzhang95/pytorch-deeplab-xception/","title":"Xception (Modified Aligned, Gluon) [gluon_xception.py]"},{"location":"models/#xception-modified-aligned-tf-aligned_xceptionpy","text":"Paper: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation - https://arxiv.org/abs/1802.02611 Reference code: https://github.com/tensorflow/models/tree/master/research/deeplab","title":"Xception (Modified Aligned, TF) [aligned_xception.py]"},{"location":"results/","text":"Results CSV files containing an ImageNet-1K validation and OOD test set validation results for all included models with pretrained weights and default configurations is located here . Self-trained Weights I've leveraged the training scripts in this repository to train a few of the models with to good levels of performance. Model Acc@1 (Err) Acc@5 (Err) Param # (M) Interpolation Image Size efficientnet_b3a 81.874 (18.126) 95.840 (4.160) 12.23 bicubic 320 (1.0 crop) efficientnet_b3 81.498 (18.502) 95.718 (4.282) 12.23 bicubic 300 skresnext50d_32x4d 81.278 (18.722) 95.366 (4.634) 27.5 bicubic 288 (1.0 crop) efficientnet_b2a 80.608 (19.392) 95.310 (4.690) 9.11 bicubic 288 (1.0 crop) mixnet_xl 80.478 (19.522) 94.932 (5.068) 11.90 bicubic 224 efficientnet_b2 80.402 (19.598) 95.076 (4.924) 9.11 bicubic 260 skresnext50d_32x4d 80.156 (19.844) 94.642 (5.358) 27.5 bicubic 224 resnext50_32x4d 79.762 (20.238) 94.600 (5.400) 25 bicubic 224 resnext50d_32x4d 79.674 (20.326) 94.868 (5.132) 25.1 bicubic 224 ese_vovnet39b 79.320 (20.680) 94.710 (5.290) 24.6 bicubic 224 resnetblur50 79.290 (20.710) 94.632 (5.368) 25.6 bicubic 224 resnet50 79.038 (20.962) 94.390 (5.610) 25.6 bicubic 224 mixnet_l 78.976 (21.024 94.184 (5.816) 7.33 bicubic 224 efficientnet_b1 78.692 (21.308) 94.086 (5.914) 7.79 bicubic 240 efficientnet_es 78.066 (21.934) 93.926 (6.074) 5.44 bicubic 224 seresnext26t_32x4d 77.998 (22.002) 93.708 (6.292) 16.8 bicubic 224 seresnext26tn_32x4d 77.986 (22.014) 93.746 (6.254) 16.8 bicubic 224 efficientnet_b0 77.698 (22.302) 93.532 (6.468) 5.29 bicubic 224 seresnext26d_32x4d 77.602 (22.398) 93.608 (6.392) 16.8 bicubic 224 mobilenetv2_120d 77.294 (22.706 93.502 (6.498) 5.8 bicubic 224 mixnet_m 77.256 (22.744) 93.418 (6.582) 5.01 bicubic 224 seresnext26_32x4d 77.104 (22.896) 93.316 (6.684) 16.8 bicubic 224 skresnet34 76.912 (23.088) 93.322 (6.678) 22.2 bicubic 224 ese_vovnet19b_dw 76.798 (23.202) 93.268 (6.732) 6.5 bicubic 224 resnet26d 76.68 (23.32) 93.166 (6.834) 16 bicubic 224 densenetblur121d 76.576 (23.424) 93.190 (6.810) 8.0 bicubic 224 mobilenetv2_140 76.524 (23.476) 92.990 (7.010) 6.1 bicubic 224 mixnet_s 75.988 (24.012) 92.794 (7.206) 4.13 bicubic 224 mobilenetv3_large_100 75.766 (24.234) 92.542 (7.458) 5.5 bicubic 224 mobilenetv3_rw 75.634 (24.366) 92.708 (7.292) 5.5 bicubic 224 mnasnet_a1 75.448 (24.552) 92.604 (7.396) 3.89 bicubic 224 resnet26 75.292 (24.708) 92.57 (7.43) 16 bicubic 224 fbnetc_100 75.124 (24.876) 92.386 (7.614) 5.6 bilinear 224 resnet34 75.110 (24.890) 92.284 (7.716) 22 bilinear 224 mobilenetv2_110d 75.052 (24.948) 92.180 (7.820) 4.5 bicubic 224 seresnet34 74.808 (25.192) 92.124 (7.876) 22 bilinear 224 mnasnet_b1 74.658 (25.342) 92.114 (7.886) 4.38 bicubic 224 spnasnet_100 74.084 (25.916) 91.818 (8.182) 4.42 bilinear 224 skresnet18 73.038 (26.962) 91.168 (8.832) 11.9 bicubic 224 mobilenetv2_100 72.978 (27.022) 91.016 (8.984) 3.5 bicubic 224 seresnet18 71.742 (28.258) 90.334 (9.666) 11.8 bicubic 224 Ported Weights For the models below, the model code and weight porting from Tensorflow or MXNet Gluon to Pytorch was done by myself. There are weights/models ported by others included in this repository, they are not listed below. Model Acc@1 (Err) Acc@5 (Err) Param # (M) Interpolation Image Size tf_efficientnet_l2_ns *tfp 88.352 (11.648) 98.652 (1.348) 480 bicubic 800 tf_efficientnet_l2_ns TBD TBD 480 bicubic 800 tf_efficientnet_l2_ns_475 88.234 (11.766) 98.546 (1.454)f 480 bicubic 475 tf_efficientnet_l2_ns_475 *tfp 88.172 (11.828) 98.566 (1.434) 480 bicubic 475 tf_efficientnet_b7_ns *tfp 86.844 (13.156) 98.084 (1.916) 66.35 bicubic 600 tf_efficientnet_b7_ns 86.840 (13.160) 98.094 (1.906) 66.35 bicubic 600 tf_efficientnet_b6_ns 86.452 (13.548) 97.882 (2.118) 43.04 bicubic 528 tf_efficientnet_b6_ns *tfp 86.444 (13.556) 97.880 (2.120) 43.04 bicubic 528 tf_efficientnet_b5_ns *tfp 86.064 (13.936) 97.746 (2.254) 30.39 bicubic 456 tf_efficientnet_b5_ns 86.088 (13.912) 97.752 (2.248) 30.39 bicubic 456 tf_efficientnet_b8_ap *tfp 85.436 (14.564) 97.272 (2.728) 87.4 bicubic 672 tf_efficientnet_b8 *tfp 85.384 (14.616) 97.394 (2.606) 87.4 bicubic 672 tf_efficientnet_b8 85.370 (14.630) 97.390 (2.610) 87.4 bicubic 672 tf_efficientnet_b8_ap 85.368 (14.632) 97.294 (2.706) 87.4 bicubic 672 tf_efficientnet_b4_ns *tfp 85.298 (14.702) 97.504 (2.496) 19.34 bicubic 380 tf_efficientnet_b4_ns 85.162 (14.838) 97.470 (2.530) 19.34 bicubic 380 tf_efficientnet_b7_ap *tfp 85.154 (14.846) 97.244 (2.756) 66.35 bicubic 600 tf_efficientnet_b7_ap 85.118 (14.882) 97.252 (2.748) 66.35 bicubic 600 tf_efficientnet_b7 *tfp 84.940 (15.060) 97.214 (2.786) 66.35 bicubic 600 tf_efficientnet_b7 84.932 (15.068) 97.208 (2.792) 66.35 bicubic 600 tf_efficientnet_b6_ap 84.786 (15.214) 97.138 (2.862) 43.04 bicubic 528 tf_efficientnet_b6_ap *tfp 84.760 (15.240) 97.124 (2.876) 43.04 bicubic 528 tf_efficientnet_b5_ap *tfp 84.276 (15.724) 96.932 (3.068) 30.39 bicubic 456 tf_efficientnet_b5_ap 84.254 (15.746) 96.976 (3.024) 30.39 bicubic 456 tf_efficientnet_b6 *tfp 84.140 (15.860) 96.852 (3.148) 43.04 bicubic 528 tf_efficientnet_b6 84.110 (15.890) 96.886 (3.114) 43.04 bicubic 528 tf_efficientnet_b3_ns *tfp 84.054 (15.946) 96.918 (3.082) 12.23 bicubic 300 tf_efficientnet_b3_ns 84.048 (15.952) 96.910 (3.090) 12.23 bicubic 300 tf_efficientnet_b5 *tfp 83.822 (16.178) 96.756 (3.244) 30.39 bicubic 456 tf_efficientnet_b5 83.812 (16.188) 96.748 (3.252) 30.39 bicubic 456 tf_efficientnet_b4_ap *tfp 83.278 (16.722) 96.376 (3.624) 19.34 bicubic 380 tf_efficientnet_b4_ap 83.248 (16.752) 96.388 (3.612) 19.34 bicubic 380 tf_efficientnet_b4 83.022 (16.978) 96.300 (3.700) 19.34 bicubic 380 tf_efficientnet_b4 *tfp 82.948 (17.052) 96.308 (3.692) 19.34 bicubic 380 tf_efficientnet_b2_ns *tfp 82.436 (17.564) 96.268 (3.732) 9.11 bicubic 260 tf_efficientnet_b2_ns 82.380 (17.620) 96.248 (3.752) 9.11 bicubic 260 tf_efficientnet_b3_ap *tfp 81.882 (18.118) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_b3_ap 81.828 (18.172) 95.624 (4.376) 12.23 bicubic 300 tf_efficientnet_b3 81.636 (18.364) 95.718 (4.282) 12.23 bicubic 300 tf_efficientnet_b3 *tfp 81.576 (18.424) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_lite4 81.528 (18.472) 95.668 (4.332) 13.00 bilinear 380 tf_efficientnet_b1_ns *tfp 81.514 (18.486) 95.776 (4.224) 7.79 bicubic 240 tf_efficientnet_lite4 *tfp 81.502 (18.498) 95.676 (4.324) 13.00 bilinear 380 tf_efficientnet_b1_ns 81.388 (18.612) 95.738 (4.262) 7.79 bicubic 240 gluon_senet154 81.224 (18.776) 95.356 (4.644) 115.09 bicubic 224 gluon_resnet152_v1s 81.012 (18.988) 95.416 (4.584) 60.32 bicubic 224 gluon_seresnext101_32x4d 80.902 (19.098) 95.294 (4.706) 48.96 bicubic 224 gluon_seresnext101_64x4d 80.890 (19.110) 95.304 (4.696) 88.23 bicubic 224 gluon_resnext101_64x4d 80.602 (19.398) 94.994 (5.006) 83.46 bicubic 224 tf_efficientnet_el 80.534 (19.466) 95.190 (4.810) 10.59 bicubic 300 tf_efficientnet_el *tfp 80.476 (19.524) 95.200 (4.800) 10.59 bicubic 300 gluon_resnet152_v1d 80.470 (19.530) 95.206 (4.794) 60.21 bicubic 224 gluon_resnet101_v1d 80.424 (19.576) 95.020 (4.980) 44.57 bicubic 224 tf_efficientnet_b2_ap *tfp 80.420 (19.580) 95.040 (4.960) 9.11 bicubic 260 gluon_resnext101_32x4d 80.334 (19.666) 94.926 (5.074) 44.18 bicubic 224 tf_efficientnet_b2_ap 80.306 (19.694) 95.028 (4.972) 9.11 bicubic 260 gluon_resnet101_v1s 80.300 (19.700) 95.150 (4.850) 44.67 bicubic 224 tf_efficientnet_b2 *tfp 80.188 (19.812) 94.974 (5.026) 9.11 bicubic 260 tf_efficientnet_b2 80.086 (19.914) 94.908 (5.092) 9.11 bicubic 260 gluon_resnet152_v1c 79.916 (20.084) 94.842 (5.158) 60.21 bicubic 224 gluon_seresnext50_32x4d 79.912 (20.088) 94.818 (5.182) 27.56 bicubic 224 tf_efficientnet_lite3 79.812 (20.188) 94.914 (5.086) 8.20 bilinear 300 tf_efficientnet_lite3 *tfp 79.734 (20.266) 94.838 (5.162) 8.20 bilinear 300 gluon_resnet152_v1b 79.692 (20.308) 94.738 (5.262) 60.19 bicubic 224 gluon_xception65 79.604 (20.396) 94.748 (5.252) 39.92 bicubic 299 gluon_resnet101_v1c 79.544 (20.456) 94.586 (5.414) 44.57 bicubic 224 tf_efficientnet_b1_ap *tfp 79.532 (20.468) 94.378 (5.622) 7.79 bicubic 240 tf_efficientnet_cc_b1_8e *tfp 79.464 (20.536) 94.492 (5.508) 39.7 bicubic 240 gluon_resnext50_32x4d 79.356 (20.644) 94.424 (5.576) 25.03 bicubic 224 gluon_resnet101_v1b 79.304 (20.696) 94.524 (5.476) 44.55 bicubic 224 tf_efficientnet_cc_b1_8e 79.298 (20.702) 94.364 (5.636) 39.7 bicubic 240 tf_efficientnet_b1_ap 79.278 (20.722) 94.308 (5.692) 7.79 bicubic 240 tf_efficientnet_b1 *tfp 79.172 (20.828) 94.450 (5.550) 7.79 bicubic 240 gluon_resnet50_v1d 79.074 (20.926) 94.476 (5.524) 25.58 bicubic 224 tf_efficientnet_em *tfp 78.958 (21.042) 94.458 (5.542) 6.90 bicubic 240 tf_mixnet_l *tfp 78.846 (21.154) 94.212 (5.788) 7.33 bilinear 224 tf_efficientnet_b1 78.826 (21.174) 94.198 (5.802) 7.79 bicubic 240 tf_efficientnet_b0_ns *tfp 78.806 (21.194) 94.496 (5.504) 5.29 bicubic 224 gluon_inception_v3 78.804 (21.196) 94.380 (5.620) 27.16M bicubic 299 tf_mixnet_l 78.770 (21.230) 94.004 (5.996) 7.33 bicubic 224 tf_efficientnet_em 78.742 (21.258) 94.332 (5.668) 6.90 bicubic 240 gluon_resnet50_v1s 78.712 (21.288) 94.242 (5.758) 25.68 bicubic 224 tf_efficientnet_b0_ns 78.658 (21.342) 94.376 (5.624) 5.29 bicubic 224 tf_efficientnet_cc_b0_8e *tfp 78.314 (21.686) 93.790 (6.210) 24.0 bicubic 224 gluon_resnet50_v1c 78.010 (21.990) 93.988 (6.012) 25.58 bicubic 224 tf_efficientnet_cc_b0_8e 77.908 (22.092) 93.656 (6.344) 24.0 bicubic 224 tf_inception_v3 77.856 (22.144) 93.644 (6.356) 27.16M bicubic 299 tf_efficientnet_cc_b0_4e *tfp 77.746 (22.254) 93.552 (6.448) 13.3 bicubic 224 tf_efficientnet_es *tfp 77.616 (22.384) 93.750 (6.250) 5.44 bicubic 224 gluon_resnet50_v1b 77.578 (22.422) 93.718 (6.282) 25.56 bicubic 224 adv_inception_v3 77.576 (22.424) 93.724 (6.276) 27.16M bicubic 299 tf_efficientnet_lite2 *tfp 77.544 (22.456) 93.800 (6.200) 6.09 bilinear 260 tf_efficientnet_lite2 77.460 (22.540) 93.746 (6.254) 6.09 bicubic 260 tf_efficientnet_b0_ap *tfp 77.514 (22.486) 93.576 (6.424) 5.29 bicubic 224 tf_efficientnet_cc_b0_4e 77.304 (22.696) 93.332 (6.668) 13.3 bicubic 224 tf_efficientnet_es 77.264 (22.736) 93.600 (6.400) 5.44 bicubic 224 tf_efficientnet_b0 *tfp 77.258 (22.742) 93.478 (6.522) 5.29 bicubic 224 tf_efficientnet_b0_ap 77.084 (22.916) 93.254 (6.746) 5.29 bicubic 224 tf_mixnet_m *tfp 77.072 (22.928) 93.368 (6.632) 5.01 bilinear 224 tf_mixnet_m 76.950 (23.050) 93.156 (6.844) 5.01 bicubic 224 tf_efficientnet_b0 76.848 (23.152) 93.228 (6.772) 5.29 bicubic 224 tf_efficientnet_lite1 *tfp 76.764 (23.236) 93.326 (6.674) 5.42 bilinear 240 tf_efficientnet_lite1 76.638 (23.362) 93.232 (6.768) 5.42 bicubic 240 tf_mixnet_s *tfp 75.800 (24.200) 92.788 (7.212) 4.13 bilinear 224 tf_mobilenetv3_large_100 *tfp 75.768 (24.232) 92.710 (7.290) 5.48 bilinear 224 tf_mixnet_s 75.648 (24.352) 92.636 (7.364) 4.13 bicubic 224 tf_mobilenetv3_large_100 75.516 (24.484) 92.600 (7.400) 5.48 bilinear 224 tf_efficientnet_lite0 *tfp 75.074 (24.926) 92.314 (7.686) 4.65 bilinear 224 tf_efficientnet_lite0 74.842 (25.158) 92.170 (7.830) 4.65 bicubic 224 gluon_resnet34_v1b 74.580 (25.420) 91.988 (8.012) 21.80 bicubic 224 tf_mobilenetv3_large_075 *tfp 73.730 (26.270) 91.616 (8.384) 3.99 bilinear 224 tf_mobilenetv3_large_075 73.442 (26.558) 91.352 (8.648) 3.99 bilinear 224 tf_mobilenetv3_large_minimal_100 *tfp 72.678 (27.322) 90.860 (9.140) 3.92 bilinear 224 tf_mobilenetv3_large_minimal_100 72.244 (27.756) 90.636 (9.364) 3.92 bilinear 224 tf_mobilenetv3_small_100 *tfp 67.918 (32.082) 87.958 (12.042 2.54 bilinear 224 tf_mobilenetv3_small_100 67.918 (32.082) 87.662 (12.338) 2.54 bilinear 224 tf_mobilenetv3_small_075 *tfp 66.142 (33.858) 86.498 (13.502) 2.04 bilinear 224 tf_mobilenetv3_small_075 65.718 (34.282) 86.136 (13.864) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 *tfp 63.378 (36.622) 84.802 (15.198) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 62.898 (37.102) 84.230 (15.770) 2.04 bilinear 224 Models with *tfp next to them were scored with --tf-preprocessing flag. The tf_efficientnet , tf_mixnet models require an equivalent for 'SAME' padding as their arch results in asymmetric padding. I've added this in the model creation wrapper, but it does come with a performance penalty. Sources for original weights: * tf_efficientnet* : Tensorflow TPU * tf_efficientnet_e* : Tensorflow TPU * tf_mixnet* : Tensorflow TPU * tf_inception* : Tensorflow Slim * gluon_* : MxNet Gluon","title":"Results"},{"location":"results/#results","text":"CSV files containing an ImageNet-1K validation and OOD test set validation results for all included models with pretrained weights and default configurations is located here .","title":"Results"},{"location":"results/#self-trained-weights","text":"I've leveraged the training scripts in this repository to train a few of the models with to good levels of performance. Model Acc@1 (Err) Acc@5 (Err) Param # (M) Interpolation Image Size efficientnet_b3a 81.874 (18.126) 95.840 (4.160) 12.23 bicubic 320 (1.0 crop) efficientnet_b3 81.498 (18.502) 95.718 (4.282) 12.23 bicubic 300 skresnext50d_32x4d 81.278 (18.722) 95.366 (4.634) 27.5 bicubic 288 (1.0 crop) efficientnet_b2a 80.608 (19.392) 95.310 (4.690) 9.11 bicubic 288 (1.0 crop) mixnet_xl 80.478 (19.522) 94.932 (5.068) 11.90 bicubic 224 efficientnet_b2 80.402 (19.598) 95.076 (4.924) 9.11 bicubic 260 skresnext50d_32x4d 80.156 (19.844) 94.642 (5.358) 27.5 bicubic 224 resnext50_32x4d 79.762 (20.238) 94.600 (5.400) 25 bicubic 224 resnext50d_32x4d 79.674 (20.326) 94.868 (5.132) 25.1 bicubic 224 ese_vovnet39b 79.320 (20.680) 94.710 (5.290) 24.6 bicubic 224 resnetblur50 79.290 (20.710) 94.632 (5.368) 25.6 bicubic 224 resnet50 79.038 (20.962) 94.390 (5.610) 25.6 bicubic 224 mixnet_l 78.976 (21.024 94.184 (5.816) 7.33 bicubic 224 efficientnet_b1 78.692 (21.308) 94.086 (5.914) 7.79 bicubic 240 efficientnet_es 78.066 (21.934) 93.926 (6.074) 5.44 bicubic 224 seresnext26t_32x4d 77.998 (22.002) 93.708 (6.292) 16.8 bicubic 224 seresnext26tn_32x4d 77.986 (22.014) 93.746 (6.254) 16.8 bicubic 224 efficientnet_b0 77.698 (22.302) 93.532 (6.468) 5.29 bicubic 224 seresnext26d_32x4d 77.602 (22.398) 93.608 (6.392) 16.8 bicubic 224 mobilenetv2_120d 77.294 (22.706 93.502 (6.498) 5.8 bicubic 224 mixnet_m 77.256 (22.744) 93.418 (6.582) 5.01 bicubic 224 seresnext26_32x4d 77.104 (22.896) 93.316 (6.684) 16.8 bicubic 224 skresnet34 76.912 (23.088) 93.322 (6.678) 22.2 bicubic 224 ese_vovnet19b_dw 76.798 (23.202) 93.268 (6.732) 6.5 bicubic 224 resnet26d 76.68 (23.32) 93.166 (6.834) 16 bicubic 224 densenetblur121d 76.576 (23.424) 93.190 (6.810) 8.0 bicubic 224 mobilenetv2_140 76.524 (23.476) 92.990 (7.010) 6.1 bicubic 224 mixnet_s 75.988 (24.012) 92.794 (7.206) 4.13 bicubic 224 mobilenetv3_large_100 75.766 (24.234) 92.542 (7.458) 5.5 bicubic 224 mobilenetv3_rw 75.634 (24.366) 92.708 (7.292) 5.5 bicubic 224 mnasnet_a1 75.448 (24.552) 92.604 (7.396) 3.89 bicubic 224 resnet26 75.292 (24.708) 92.57 (7.43) 16 bicubic 224 fbnetc_100 75.124 (24.876) 92.386 (7.614) 5.6 bilinear 224 resnet34 75.110 (24.890) 92.284 (7.716) 22 bilinear 224 mobilenetv2_110d 75.052 (24.948) 92.180 (7.820) 4.5 bicubic 224 seresnet34 74.808 (25.192) 92.124 (7.876) 22 bilinear 224 mnasnet_b1 74.658 (25.342) 92.114 (7.886) 4.38 bicubic 224 spnasnet_100 74.084 (25.916) 91.818 (8.182) 4.42 bilinear 224 skresnet18 73.038 (26.962) 91.168 (8.832) 11.9 bicubic 224 mobilenetv2_100 72.978 (27.022) 91.016 (8.984) 3.5 bicubic 224 seresnet18 71.742 (28.258) 90.334 (9.666) 11.8 bicubic 224","title":"Self-trained Weights"},{"location":"results/#ported-weights","text":"For the models below, the model code and weight porting from Tensorflow or MXNet Gluon to Pytorch was done by myself. There are weights/models ported by others included in this repository, they are not listed below. Model Acc@1 (Err) Acc@5 (Err) Param # (M) Interpolation Image Size tf_efficientnet_l2_ns *tfp 88.352 (11.648) 98.652 (1.348) 480 bicubic 800 tf_efficientnet_l2_ns TBD TBD 480 bicubic 800 tf_efficientnet_l2_ns_475 88.234 (11.766) 98.546 (1.454)f 480 bicubic 475 tf_efficientnet_l2_ns_475 *tfp 88.172 (11.828) 98.566 (1.434) 480 bicubic 475 tf_efficientnet_b7_ns *tfp 86.844 (13.156) 98.084 (1.916) 66.35 bicubic 600 tf_efficientnet_b7_ns 86.840 (13.160) 98.094 (1.906) 66.35 bicubic 600 tf_efficientnet_b6_ns 86.452 (13.548) 97.882 (2.118) 43.04 bicubic 528 tf_efficientnet_b6_ns *tfp 86.444 (13.556) 97.880 (2.120) 43.04 bicubic 528 tf_efficientnet_b5_ns *tfp 86.064 (13.936) 97.746 (2.254) 30.39 bicubic 456 tf_efficientnet_b5_ns 86.088 (13.912) 97.752 (2.248) 30.39 bicubic 456 tf_efficientnet_b8_ap *tfp 85.436 (14.564) 97.272 (2.728) 87.4 bicubic 672 tf_efficientnet_b8 *tfp 85.384 (14.616) 97.394 (2.606) 87.4 bicubic 672 tf_efficientnet_b8 85.370 (14.630) 97.390 (2.610) 87.4 bicubic 672 tf_efficientnet_b8_ap 85.368 (14.632) 97.294 (2.706) 87.4 bicubic 672 tf_efficientnet_b4_ns *tfp 85.298 (14.702) 97.504 (2.496) 19.34 bicubic 380 tf_efficientnet_b4_ns 85.162 (14.838) 97.470 (2.530) 19.34 bicubic 380 tf_efficientnet_b7_ap *tfp 85.154 (14.846) 97.244 (2.756) 66.35 bicubic 600 tf_efficientnet_b7_ap 85.118 (14.882) 97.252 (2.748) 66.35 bicubic 600 tf_efficientnet_b7 *tfp 84.940 (15.060) 97.214 (2.786) 66.35 bicubic 600 tf_efficientnet_b7 84.932 (15.068) 97.208 (2.792) 66.35 bicubic 600 tf_efficientnet_b6_ap 84.786 (15.214) 97.138 (2.862) 43.04 bicubic 528 tf_efficientnet_b6_ap *tfp 84.760 (15.240) 97.124 (2.876) 43.04 bicubic 528 tf_efficientnet_b5_ap *tfp 84.276 (15.724) 96.932 (3.068) 30.39 bicubic 456 tf_efficientnet_b5_ap 84.254 (15.746) 96.976 (3.024) 30.39 bicubic 456 tf_efficientnet_b6 *tfp 84.140 (15.860) 96.852 (3.148) 43.04 bicubic 528 tf_efficientnet_b6 84.110 (15.890) 96.886 (3.114) 43.04 bicubic 528 tf_efficientnet_b3_ns *tfp 84.054 (15.946) 96.918 (3.082) 12.23 bicubic 300 tf_efficientnet_b3_ns 84.048 (15.952) 96.910 (3.090) 12.23 bicubic 300 tf_efficientnet_b5 *tfp 83.822 (16.178) 96.756 (3.244) 30.39 bicubic 456 tf_efficientnet_b5 83.812 (16.188) 96.748 (3.252) 30.39 bicubic 456 tf_efficientnet_b4_ap *tfp 83.278 (16.722) 96.376 (3.624) 19.34 bicubic 380 tf_efficientnet_b4_ap 83.248 (16.752) 96.388 (3.612) 19.34 bicubic 380 tf_efficientnet_b4 83.022 (16.978) 96.300 (3.700) 19.34 bicubic 380 tf_efficientnet_b4 *tfp 82.948 (17.052) 96.308 (3.692) 19.34 bicubic 380 tf_efficientnet_b2_ns *tfp 82.436 (17.564) 96.268 (3.732) 9.11 bicubic 260 tf_efficientnet_b2_ns 82.380 (17.620) 96.248 (3.752) 9.11 bicubic 260 tf_efficientnet_b3_ap *tfp 81.882 (18.118) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_b3_ap 81.828 (18.172) 95.624 (4.376) 12.23 bicubic 300 tf_efficientnet_b3 81.636 (18.364) 95.718 (4.282) 12.23 bicubic 300 tf_efficientnet_b3 *tfp 81.576 (18.424) 95.662 (4.338) 12.23 bicubic 300 tf_efficientnet_lite4 81.528 (18.472) 95.668 (4.332) 13.00 bilinear 380 tf_efficientnet_b1_ns *tfp 81.514 (18.486) 95.776 (4.224) 7.79 bicubic 240 tf_efficientnet_lite4 *tfp 81.502 (18.498) 95.676 (4.324) 13.00 bilinear 380 tf_efficientnet_b1_ns 81.388 (18.612) 95.738 (4.262) 7.79 bicubic 240 gluon_senet154 81.224 (18.776) 95.356 (4.644) 115.09 bicubic 224 gluon_resnet152_v1s 81.012 (18.988) 95.416 (4.584) 60.32 bicubic 224 gluon_seresnext101_32x4d 80.902 (19.098) 95.294 (4.706) 48.96 bicubic 224 gluon_seresnext101_64x4d 80.890 (19.110) 95.304 (4.696) 88.23 bicubic 224 gluon_resnext101_64x4d 80.602 (19.398) 94.994 (5.006) 83.46 bicubic 224 tf_efficientnet_el 80.534 (19.466) 95.190 (4.810) 10.59 bicubic 300 tf_efficientnet_el *tfp 80.476 (19.524) 95.200 (4.800) 10.59 bicubic 300 gluon_resnet152_v1d 80.470 (19.530) 95.206 (4.794) 60.21 bicubic 224 gluon_resnet101_v1d 80.424 (19.576) 95.020 (4.980) 44.57 bicubic 224 tf_efficientnet_b2_ap *tfp 80.420 (19.580) 95.040 (4.960) 9.11 bicubic 260 gluon_resnext101_32x4d 80.334 (19.666) 94.926 (5.074) 44.18 bicubic 224 tf_efficientnet_b2_ap 80.306 (19.694) 95.028 (4.972) 9.11 bicubic 260 gluon_resnet101_v1s 80.300 (19.700) 95.150 (4.850) 44.67 bicubic 224 tf_efficientnet_b2 *tfp 80.188 (19.812) 94.974 (5.026) 9.11 bicubic 260 tf_efficientnet_b2 80.086 (19.914) 94.908 (5.092) 9.11 bicubic 260 gluon_resnet152_v1c 79.916 (20.084) 94.842 (5.158) 60.21 bicubic 224 gluon_seresnext50_32x4d 79.912 (20.088) 94.818 (5.182) 27.56 bicubic 224 tf_efficientnet_lite3 79.812 (20.188) 94.914 (5.086) 8.20 bilinear 300 tf_efficientnet_lite3 *tfp 79.734 (20.266) 94.838 (5.162) 8.20 bilinear 300 gluon_resnet152_v1b 79.692 (20.308) 94.738 (5.262) 60.19 bicubic 224 gluon_xception65 79.604 (20.396) 94.748 (5.252) 39.92 bicubic 299 gluon_resnet101_v1c 79.544 (20.456) 94.586 (5.414) 44.57 bicubic 224 tf_efficientnet_b1_ap *tfp 79.532 (20.468) 94.378 (5.622) 7.79 bicubic 240 tf_efficientnet_cc_b1_8e *tfp 79.464 (20.536) 94.492 (5.508) 39.7 bicubic 240 gluon_resnext50_32x4d 79.356 (20.644) 94.424 (5.576) 25.03 bicubic 224 gluon_resnet101_v1b 79.304 (20.696) 94.524 (5.476) 44.55 bicubic 224 tf_efficientnet_cc_b1_8e 79.298 (20.702) 94.364 (5.636) 39.7 bicubic 240 tf_efficientnet_b1_ap 79.278 (20.722) 94.308 (5.692) 7.79 bicubic 240 tf_efficientnet_b1 *tfp 79.172 (20.828) 94.450 (5.550) 7.79 bicubic 240 gluon_resnet50_v1d 79.074 (20.926) 94.476 (5.524) 25.58 bicubic 224 tf_efficientnet_em *tfp 78.958 (21.042) 94.458 (5.542) 6.90 bicubic 240 tf_mixnet_l *tfp 78.846 (21.154) 94.212 (5.788) 7.33 bilinear 224 tf_efficientnet_b1 78.826 (21.174) 94.198 (5.802) 7.79 bicubic 240 tf_efficientnet_b0_ns *tfp 78.806 (21.194) 94.496 (5.504) 5.29 bicubic 224 gluon_inception_v3 78.804 (21.196) 94.380 (5.620) 27.16M bicubic 299 tf_mixnet_l 78.770 (21.230) 94.004 (5.996) 7.33 bicubic 224 tf_efficientnet_em 78.742 (21.258) 94.332 (5.668) 6.90 bicubic 240 gluon_resnet50_v1s 78.712 (21.288) 94.242 (5.758) 25.68 bicubic 224 tf_efficientnet_b0_ns 78.658 (21.342) 94.376 (5.624) 5.29 bicubic 224 tf_efficientnet_cc_b0_8e *tfp 78.314 (21.686) 93.790 (6.210) 24.0 bicubic 224 gluon_resnet50_v1c 78.010 (21.990) 93.988 (6.012) 25.58 bicubic 224 tf_efficientnet_cc_b0_8e 77.908 (22.092) 93.656 (6.344) 24.0 bicubic 224 tf_inception_v3 77.856 (22.144) 93.644 (6.356) 27.16M bicubic 299 tf_efficientnet_cc_b0_4e *tfp 77.746 (22.254) 93.552 (6.448) 13.3 bicubic 224 tf_efficientnet_es *tfp 77.616 (22.384) 93.750 (6.250) 5.44 bicubic 224 gluon_resnet50_v1b 77.578 (22.422) 93.718 (6.282) 25.56 bicubic 224 adv_inception_v3 77.576 (22.424) 93.724 (6.276) 27.16M bicubic 299 tf_efficientnet_lite2 *tfp 77.544 (22.456) 93.800 (6.200) 6.09 bilinear 260 tf_efficientnet_lite2 77.460 (22.540) 93.746 (6.254) 6.09 bicubic 260 tf_efficientnet_b0_ap *tfp 77.514 (22.486) 93.576 (6.424) 5.29 bicubic 224 tf_efficientnet_cc_b0_4e 77.304 (22.696) 93.332 (6.668) 13.3 bicubic 224 tf_efficientnet_es 77.264 (22.736) 93.600 (6.400) 5.44 bicubic 224 tf_efficientnet_b0 *tfp 77.258 (22.742) 93.478 (6.522) 5.29 bicubic 224 tf_efficientnet_b0_ap 77.084 (22.916) 93.254 (6.746) 5.29 bicubic 224 tf_mixnet_m *tfp 77.072 (22.928) 93.368 (6.632) 5.01 bilinear 224 tf_mixnet_m 76.950 (23.050) 93.156 (6.844) 5.01 bicubic 224 tf_efficientnet_b0 76.848 (23.152) 93.228 (6.772) 5.29 bicubic 224 tf_efficientnet_lite1 *tfp 76.764 (23.236) 93.326 (6.674) 5.42 bilinear 240 tf_efficientnet_lite1 76.638 (23.362) 93.232 (6.768) 5.42 bicubic 240 tf_mixnet_s *tfp 75.800 (24.200) 92.788 (7.212) 4.13 bilinear 224 tf_mobilenetv3_large_100 *tfp 75.768 (24.232) 92.710 (7.290) 5.48 bilinear 224 tf_mixnet_s 75.648 (24.352) 92.636 (7.364) 4.13 bicubic 224 tf_mobilenetv3_large_100 75.516 (24.484) 92.600 (7.400) 5.48 bilinear 224 tf_efficientnet_lite0 *tfp 75.074 (24.926) 92.314 (7.686) 4.65 bilinear 224 tf_efficientnet_lite0 74.842 (25.158) 92.170 (7.830) 4.65 bicubic 224 gluon_resnet34_v1b 74.580 (25.420) 91.988 (8.012) 21.80 bicubic 224 tf_mobilenetv3_large_075 *tfp 73.730 (26.270) 91.616 (8.384) 3.99 bilinear 224 tf_mobilenetv3_large_075 73.442 (26.558) 91.352 (8.648) 3.99 bilinear 224 tf_mobilenetv3_large_minimal_100 *tfp 72.678 (27.322) 90.860 (9.140) 3.92 bilinear 224 tf_mobilenetv3_large_minimal_100 72.244 (27.756) 90.636 (9.364) 3.92 bilinear 224 tf_mobilenetv3_small_100 *tfp 67.918 (32.082) 87.958 (12.042 2.54 bilinear 224 tf_mobilenetv3_small_100 67.918 (32.082) 87.662 (12.338) 2.54 bilinear 224 tf_mobilenetv3_small_075 *tfp 66.142 (33.858) 86.498 (13.502) 2.04 bilinear 224 tf_mobilenetv3_small_075 65.718 (34.282) 86.136 (13.864) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 *tfp 63.378 (36.622) 84.802 (15.198) 2.04 bilinear 224 tf_mobilenetv3_small_minimal_100 62.898 (37.102) 84.230 (15.770) 2.04 bilinear 224 Models with *tfp next to them were scored with --tf-preprocessing flag. The tf_efficientnet , tf_mixnet models require an equivalent for 'SAME' padding as their arch results in asymmetric padding. I've added this in the model creation wrapper, but it does come with a performance penalty. Sources for original weights: * tf_efficientnet* : Tensorflow TPU * tf_efficientnet_e* : Tensorflow TPU * tf_mixnet* : Tensorflow TPU * tf_inception* : Tensorflow Slim * gluon_* : MxNet Gluon","title":"Ported Weights"},{"location":"scripts/","text":"Scripts A train, validation, inference, and checkpoint cleaning script included in the github root folder. Scripts are not currently packaged in the pip release. The training and validation scripts evolved from early versions of the PyTorch Imagenet Examples . I have added significant functionality over time, including CUDA specific performance enhancements based on NVIDIA's APEX Examples . Training Script The variety of training args is large and not all combinations of options (or even options) have been fully tested. For the training dataset folder, specify the folder to the base that contains a train and validation folder. To train an SE-ResNet34 on ImageNet, locally distributed, 4 GPUs, one process per GPU w/ cosine schedule, random-erasing prob of 50% and per-pixel random value: ./distributed_train.sh 4 /data/imagenet --model seresnet34 --sched cosine --epochs 150 --warmup-epochs 5 --lr 0.4 --reprob 0.5 --remode pixel --batch-size 256 -j 4 NOTE: NVIDIA APEX should be installed to run in per-process distributed via DDP or to enable AMP mixed precision with the --amp flag Validation / Inference Scripts Validation and inference scripts are similar in usage. One outputs metrics on a validation set and the other outputs topk class ids in a csv. Specify the folder containing validation images, not the base as in training script. To validate with the model's pretrained weights (if they exist): python validate.py /imagenet/validation/ --model seresnext26_32x4d --pretrained To run inference from a checkpoint: python inference.py /imagenet/validation/ --model mobilenetv3_large_100 --checkpoint ./output/model_best.pth.tar","title":"Scripts"},{"location":"scripts/#scripts","text":"A train, validation, inference, and checkpoint cleaning script included in the github root folder. Scripts are not currently packaged in the pip release. The training and validation scripts evolved from early versions of the PyTorch Imagenet Examples . I have added significant functionality over time, including CUDA specific performance enhancements based on NVIDIA's APEX Examples .","title":"Scripts"},{"location":"scripts/#training-script","text":"The variety of training args is large and not all combinations of options (or even options) have been fully tested. For the training dataset folder, specify the folder to the base that contains a train and validation folder. To train an SE-ResNet34 on ImageNet, locally distributed, 4 GPUs, one process per GPU w/ cosine schedule, random-erasing prob of 50% and per-pixel random value: ./distributed_train.sh 4 /data/imagenet --model seresnet34 --sched cosine --epochs 150 --warmup-epochs 5 --lr 0.4 --reprob 0.5 --remode pixel --batch-size 256 -j 4 NOTE: NVIDIA APEX should be installed to run in per-process distributed via DDP or to enable AMP mixed precision with the --amp flag","title":"Training Script"},{"location":"scripts/#validation-inference-scripts","text":"Validation and inference scripts are similar in usage. One outputs metrics on a validation set and the other outputs topk class ids in a csv. Specify the folder containing validation images, not the base as in training script. To validate with the model's pretrained weights (if they exist): python validate.py /imagenet/validation/ --model seresnext26_32x4d --pretrained To run inference from a checkpoint: python inference.py /imagenet/validation/ --model mobilenetv3_large_100 --checkpoint ./output/model_best.pth.tar","title":"Validation / Inference Scripts"},{"location":"training_hparam_examples/","text":"Training Examples EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5 These params are for dual Titan RTX cards with NVIDIA Apex installed: ./distributed_train.sh 2 /imagenet/ --model efficientnet_b2 -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .016 MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5 This params are for dual Titan RTX cards with NVIDIA Apex installed: ./distributed_train.sh 2 /imagenet/ --model mixnet_xl -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .969 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.3 --amp --lr .016 --dist-bn reduce SE-ResNeXt-26-D and SE-ResNeXt-26-T These hparams (or similar) work well for a wide range of ResNet architecture, generally a good idea to increase the epoch # as the model size increases... ie approx 180-200 for ResNe(X)t50, and 220+ for larger. Increase batch size and LR proportionally for better GPUs or with AMP enabled. These params were for 2 1080Ti cards: ./distributed_train.sh 2 /imagenet/ --model seresnext26t_32x4d --lr 0.1 --warmup-epochs 5 --epochs 160 --weight-decay 1e-4 --sched cosine --reprob 0.4 --remode pixel -b 112 EfficientNet-B3 with RandAugment - 81.5 top-1, 95.7 top-5 The training of this model started with the same command line as EfficientNet-B2 w/ RA above. After almost three weeks of training the process crashed. The results weren't looking amazing so I resumed the training several times with tweaks to a few params (increase RE prob, decrease rand-aug, increase ema-decay). Nothing looked great. I ended up averaging the best checkpoints from all restarts. The result is mediocre at default res/crop but oddly performs much better with a full image test crop of 1.0. EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5 Michael Klachko achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2. ./distributed_train.sh 2 /imagenet/ --model efficientnet_b0 -b 384 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .048 ResNet50 with JSD loss and RandAugment (clean + 2x RA augs) - 79.04 top-1, 94.39 top-5 Trained on two older 1080Ti cards, this took a while. Only slightly, non statistically better ImageNet validation result than my first good AugMix training of 78.99. However, these weights are more robust on tests with ImageNetV2, ImageNet-Sketch, etc. Unlike my first AugMix runs, I've enabled SplitBatchNorm, disabled random erasing on the clean split, and cranked up random erasing prob on the 2 augmented paths. ./distributed_train.sh 2 /imagenet -b 64 --model resnet50 --sched cosine --epochs 200 --lr 0.05 --amp --remode pixel --reprob 0.6 --aug-splits 3 --aa rand-m9-mstd0.5-inc1 --resplit --split-bn --jsd --dist-bn reduce EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5 Trained by Andrew Lavin with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training. ./distributed_train.sh 8 /imagenet --model efficientnet_es -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064 MobileNetV3-Large-100 - 75.766 top-1, 92,542 top-5 ./distributed_train.sh 2 /imagenet/ --model mobilenetv3_large_100 -b 512 --sched step --epochs 600 --decay-epochs 2.4 --decay-rate .973 --opt rmsproptf --opt-eps .001 -j 7 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064 --lr-noise 0.42 0.9 ResNeXt-50 32x4d w/ RandAugment - 79.762 top-1, 94.60 top-5 These params will also work well for SE-ResNeXt-50 and SK-ResNeXt-50 and likely 101. I used them for the SK-ResNeXt-50 32x4d that I trained with 2 GPU using a slightly higher LR per effective batch size (lr=0.18, b=192 per GPU). The cmd line below are tuned for 8 GPU training. ./distributed_train.sh 8 /imagenet --model resnext50_32x4d --lr 0.6 --warmup-epochs 5 --epochs 240 --weight-decay 1e-4 --sched cosine --reprob 0.4 --recount 3 --remode pixel --aa rand-m7-mstd0.5-inc1 -b 192 -j 6 --amp --dist-bn reduce","title":"Training Examples"},{"location":"training_hparam_examples/#training-examples","text":"","title":"Training Examples"},{"location":"training_hparam_examples/#efficientnet-b2-with-randaugment-804-top-1-951-top-5","text":"These params are for dual Titan RTX cards with NVIDIA Apex installed: ./distributed_train.sh 2 /imagenet/ --model efficientnet_b2 -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .016","title":"EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5"},{"location":"training_hparam_examples/#mixnet-xl-with-randaugment-805-top-1-949-top-5","text":"This params are for dual Titan RTX cards with NVIDIA Apex installed: ./distributed_train.sh 2 /imagenet/ --model mixnet_xl -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .969 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.3 --amp --lr .016 --dist-bn reduce","title":"MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5"},{"location":"training_hparam_examples/#se-resnext-26-d-and-se-resnext-26-t","text":"These hparams (or similar) work well for a wide range of ResNet architecture, generally a good idea to increase the epoch # as the model size increases... ie approx 180-200 for ResNe(X)t50, and 220+ for larger. Increase batch size and LR proportionally for better GPUs or with AMP enabled. These params were for 2 1080Ti cards: ./distributed_train.sh 2 /imagenet/ --model seresnext26t_32x4d --lr 0.1 --warmup-epochs 5 --epochs 160 --weight-decay 1e-4 --sched cosine --reprob 0.4 --remode pixel -b 112","title":"SE-ResNeXt-26-D and SE-ResNeXt-26-T"},{"location":"training_hparam_examples/#efficientnet-b3-with-randaugment-815-top-1-957-top-5","text":"The training of this model started with the same command line as EfficientNet-B2 w/ RA above. After almost three weeks of training the process crashed. The results weren't looking amazing so I resumed the training several times with tweaks to a few params (increase RE prob, decrease rand-aug, increase ema-decay). Nothing looked great. I ended up averaging the best checkpoints from all restarts. The result is mediocre at default res/crop but oddly performs much better with a full image test crop of 1.0.","title":"EfficientNet-B3 with RandAugment - 81.5 top-1, 95.7 top-5"},{"location":"training_hparam_examples/#efficientnet-b0-with-randaugment-777-top-1-953-top-5","text":"Michael Klachko achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2. ./distributed_train.sh 2 /imagenet/ --model efficientnet_b0 -b 384 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .048","title":"EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5"},{"location":"training_hparam_examples/#resnet50-with-jsd-loss-and-randaugment-clean-2x-ra-augs-7904-top-1-9439-top-5","text":"Trained on two older 1080Ti cards, this took a while. Only slightly, non statistically better ImageNet validation result than my first good AugMix training of 78.99. However, these weights are more robust on tests with ImageNetV2, ImageNet-Sketch, etc. Unlike my first AugMix runs, I've enabled SplitBatchNorm, disabled random erasing on the clean split, and cranked up random erasing prob on the 2 augmented paths. ./distributed_train.sh 2 /imagenet -b 64 --model resnet50 --sched cosine --epochs 200 --lr 0.05 --amp --remode pixel --reprob 0.6 --aug-splits 3 --aa rand-m9-mstd0.5-inc1 --resplit --split-bn --jsd --dist-bn reduce","title":"ResNet50 with JSD loss and RandAugment (clean + 2x RA augs) - 79.04 top-1, 94.39 top-5"},{"location":"training_hparam_examples/#efficientnet-es-edgetpu-small-with-randaugment-78066-top-1-93926-top-5","text":"Trained by Andrew Lavin with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training. ./distributed_train.sh 8 /imagenet --model efficientnet_es -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064","title":"EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5"},{"location":"training_hparam_examples/#mobilenetv3-large-100-75766-top-1-92542-top-5","text":"./distributed_train.sh 2 /imagenet/ --model mobilenetv3_large_100 -b 512 --sched step --epochs 600 --decay-epochs 2.4 --decay-rate .973 --opt rmsproptf --opt-eps .001 -j 7 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064 --lr-noise 0.42 0.9","title":"MobileNetV3-Large-100 - 75.766 top-1, 92,542 top-5"},{"location":"training_hparam_examples/#resnext-50-32x4d-w-randaugment-79762-top-1-9460-top-5","text":"These params will also work well for SE-ResNeXt-50 and SK-ResNeXt-50 and likely 101. I used them for the SK-ResNeXt-50 32x4d that I trained with 2 GPU using a slightly higher LR per effective batch size (lr=0.18, b=192 per GPU). The cmd line below are tuned for 8 GPU training. ./distributed_train.sh 8 /imagenet --model resnext50_32x4d --lr 0.6 --warmup-epochs 5 --epochs 240 --weight-decay 1e-4 --sched cosine --reprob 0.4 --recount 3 --remode pixel --aa rand-m7-mstd0.5-inc1 -b 192 -j 6 --amp --dist-bn reduce","title":"ResNeXt-50 32x4d w/ RandAugment - 79.762 top-1, 94.60 top-5"}]}
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index e9c90903..082fbf68 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -1,15 +1,35 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url>
      <loc>None</loc>
-     <lastmod>2020-07-10</lastmod>
+     <lastmod>2020-08-06</lastmod>
      <changefreq>daily</changefreq>
     </url><url>
      <loc>None</loc>
-     <lastmod>2020-07-10</lastmod>
+     <lastmod>2020-08-06</lastmod>
      <changefreq>daily</changefreq>
     </url><url>
      <loc>None</loc>
-     <lastmod>2020-07-10</lastmod>
+     <lastmod>2020-08-06</lastmod>
+     <changefreq>daily</changefreq>
+    </url><url>
+     <loc>None</loc>
+     <lastmod>2020-08-06</lastmod>
+     <changefreq>daily</changefreq>
+    </url><url>
+     <loc>None</loc>
+     <lastmod>2020-08-06</lastmod>
+     <changefreq>daily</changefreq>
+    </url><url>
+     <loc>None</loc>
+     <lastmod>2020-08-06</lastmod>
+     <changefreq>daily</changefreq>
+    </url><url>
+     <loc>None</loc>
+     <lastmod>2020-08-06</lastmod>
+     <changefreq>daily</changefreq>
+    </url><url>
+     <loc>None</loc>
+     <lastmod>2020-08-06</lastmod>
      <changefreq>daily</changefreq>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 39a7f11cc757a386e157a5f6c718f4313e5790e8..456d4c184157e7a5e7280b805d98d7c64cd53419 100644
GIT binary patch
literal 197
zcmV;$06PC4iwFoMN-JLi|8r?{Wo=<_E_iKh0PT~p4#FT1hW9=NVJ~0@Qw^n?qmw=W
zq1cj8s2o~-d!ea`?_gPu`|iJd)A9B-TJ+QbjCT#*5r&a=%6G=~4ZdDZWsO@=@JFzq
zBT8Wh4IW~gj+D;xOu$4v$_a=MJq0zS2IvYYBL^(<t|(A`+<GO>-f2$r?+Ycw(RZR^
z6)Ra?vV9t_Y>lWvx?bOczKc#8`{wkAEE!I-W^-+>e{&T}N!KYpgPV`-%>w`cp><fw

literal 194
zcmV;z06qU7iwFo{!w6pj|8r?{Wo=<_E_iKh0Ns&24#OY}h4-8Su?LeZ{ShHs$4>15
z5TOZD0x70Vd;3C^N<Blp*#7*U-eA$brja~NK<iyYE5<1?PW#TfzM<F4soc|67UB^`
zG?65+!-ftq&PRY{Ss2=ACN(4Tp$F7MS`an}oE@mlURjV_V)R;`y)y#x^o0uJ<U3jO
wnwNZ6Ry>_Iwn_A$TyJi{+-0Zi_!jU(R*irh^RHRO+R_K)2jwcC$*}<d000bH=>Px#

diff --git a/training_hparam_examples/index.html b/training_hparam_examples/index.html
new file mode 100644
index 00000000..d5bd68b5
--- /dev/null
+++ b/training_hparam_examples/index.html
@@ -0,0 +1,591 @@
+
+
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="Pretained Image Recognition Models">
+      
+      
+      
+      <link rel="shortcut icon" href="../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.1.2, mkdocs-material-5.4.0">
+    
+    
+      
+        <title>Training Examples - Pytorch Image Models</title>
+      
+    
+    
+      <link rel="stylesheet" href="../assets/stylesheets/main.fe0cca5b.min.css">
+      
+      
+    
+    
+    
+      
+        <link href="https://fonts.gstatic.com" rel="preconnect" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,400i,700%7CRoboto+Mono&display=fallback">
+        <style>body,input{font-family:"Roboto",-apple-system,BlinkMacSystemFont,Helvetica,Arial,sans-serif}code,kbd,pre{font-family:"Roboto Mono",SFMono-Regular,Consolas,Menlo,monospace}</style>
+      
+    
+    
+    
+    
+      
+    
+    
+  </head>
+  
+  
+    <body dir="ltr">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#training-examples" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+      <header class="md-header" data-md-component="header">
+  <nav class="md-header-nav md-grid" aria-label="Header">
+    <a href=".." title="Pytorch Image Models" class="md-header-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    <label class="md-header-nav__button md-icon" for="__drawer">
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2z"/></svg>
+    </label>
+    <div class="md-header-nav__title" data-md-component="header-title">
+      
+        <div class="md-header-nav__ellipsis">
+          <span class="md-header-nav__topic md-ellipsis">
+            Pytorch Image Models
+          </span>
+          <span class="md-header-nav__topic md-ellipsis">
+            
+              Training Examples
+            
+          </span>
+        </div>
+      
+    </div>
+    
+      <label class="md-header-nav__button md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+      </label>
+      
+<div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" data-md-state="active">
+      <label class="md-search__icon md-icon" for="__search">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0116 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 019.5 16 6.5 6.5 0 013 9.5 6.5 6.5 0 019.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5z"/></svg>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </label>
+      <button type="reset" class="md-search__icon md-icon" aria-label="Clear" data-md-component="search-reset" tabindex="-1">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"/></svg>
+      </button>
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header-nav__source">
+        
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+      </div>
+    
+  </nav>
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+        
+      
+      
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href=".." title="Pytorch Image Models" class="md-nav__button md-logo" aria-label="Pytorch Image Models">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 003-3 3 3 0 00-3-3 3 3 0 00-3 3 3 3 0 003 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54z"/></svg>
+
+    </a>
+    Pytorch Image Models
+  </label>
+  
+    <div class="md-nav__source">
+      
+<a href="https://github.com/rwightman/pytorch-image-models/" title="Go to repository" class="md-source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path d="M439.55 236.05L244 40.45a28.87 28.87 0 00-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 01-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 000 40.81l195.61 195.6a28.86 28.86 0 0040.8 0l194.69-194.69a28.86 28.86 0 000-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    rwightman/pytorch-image-models
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href=".." title="Getting Started" class="md-nav__link">
+      Getting Started
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../models/" title="Model Architectures" class="md-nav__link">
+      Model Architectures
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../results/" title="Results" class="md-nav__link">
+      Results
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../scripts/" title="Scripts" class="md-nav__link">
+      Scripts
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+  
+
+
+  <li class="md-nav__item md-nav__item--active">
+    
+    <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
+    
+      
+    
+    
+      <label class="md-nav__link md-nav__link--active" for="__toc">
+        Training Examples
+        <span class="md-nav__icon md-icon">
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 9h14V7H3v2m0 4h14v-2H3v2m0 4h14v-2H3v2m16 0h2v-2h-2v2m0-10v2h2V7h-2m0 6h2v-2h-2v2z"/></svg>
+        </span>
+      </label>
+    
+    <a href="./" title="Training Examples" class="md-nav__link md-nav__link--active">
+      Training Examples
+    </a>
+    
+      
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-b2-with-randaugment-804-top-1-951-top-5" class="md-nav__link">
+    EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#mixnet-xl-with-randaugment-805-top-1-949-top-5" class="md-nav__link">
+    MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#se-resnext-26-d-and-se-resnext-26-t" class="md-nav__link">
+    SE-ResNeXt-26-D and SE-ResNeXt-26-T
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-b3-with-randaugment-815-top-1-957-top-5" class="md-nav__link">
+    EfficientNet-B3 with RandAugment - 81.5 top-1, 95.7 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-b0-with-randaugment-777-top-1-953-top-5" class="md-nav__link">
+    EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#resnet50-with-jsd-loss-and-randaugment-clean-2x-ra-augs-7904-top-1-9439-top-5" class="md-nav__link">
+    ResNet50 with JSD loss and RandAugment (clean + 2x RA augs) - 79.04 top-1, 94.39 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-es-edgetpu-small-with-randaugment-78066-top-1-93926-top-5" class="md-nav__link">
+    EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#mobilenetv3-large-100-75766-top-1-92542-top-5" class="md-nav__link">
+    MobileNetV3-Large-100 - 75.766 top-1, 92,542 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#resnext-50-32x4d-w-randaugment-79762-top-1-9460-top-5" class="md-nav__link">
+    ResNeXt-50 32x4d w/ RandAugment - 79.762 top-1, 94.60 top-5
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+    
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../feature_extraction/" title="Feature Extraction" class="md-nav__link">
+      Feature Extraction
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../changes/" title="Recent Changes" class="md-nav__link">
+      Recent Changes
+    </a>
+  </li>
+
+    
+      
+      
+      
+
+
+  <li class="md-nav__item">
+    <a href="../archived_changes/" title="Archived Changes" class="md-nav__link">
+      Archived Changes
+    </a>
+  </li>
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="toc">
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon">
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+      </span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-b2-with-randaugment-804-top-1-951-top-5" class="md-nav__link">
+    EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#mixnet-xl-with-randaugment-805-top-1-949-top-5" class="md-nav__link">
+    MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#se-resnext-26-d-and-se-resnext-26-t" class="md-nav__link">
+    SE-ResNeXt-26-D and SE-ResNeXt-26-T
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-b3-with-randaugment-815-top-1-957-top-5" class="md-nav__link">
+    EfficientNet-B3 with RandAugment - 81.5 top-1, 95.7 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-b0-with-randaugment-777-top-1-953-top-5" class="md-nav__link">
+    EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#resnet50-with-jsd-loss-and-randaugment-clean-2x-ra-augs-7904-top-1-9439-top-5" class="md-nav__link">
+    ResNet50 with JSD loss and RandAugment (clean + 2x RA augs) - 79.04 top-1, 94.39 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#efficientnet-es-edgetpu-small-with-randaugment-78066-top-1-93926-top-5" class="md-nav__link">
+    EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#mobilenetv3-large-100-75766-top-1-92542-top-5" class="md-nav__link">
+    MobileNetV3-Large-100 - 75.766 top-1, 92,542 top-5
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#resnext-50-32x4d-w-randaugment-79762-top-1-9460-top-5" class="md-nav__link">
+    ResNeXt-50 32x4d w/ RandAugment - 79.762 top-1, 94.60 top-5
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          <div class="md-content">
+            <article class="md-content__inner md-typeset">
+              
+                
+                  <a href="https://github.com/rwightman/pytorch-image-models/edit/master/docs/training_hparam_examples.md" title="Edit this page" class="md-content__button md-icon">
+                    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z"/></svg>
+                  </a>
+                
+                
+                  
+                
+                
+                <h1 id="training-examples">Training Examples</h1>
+<h2 id="efficientnet-b2-with-randaugment-804-top-1-951-top-5">EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5</h2>
+<p>These params are for dual Titan RTX cards with NVIDIA Apex installed:</p>
+<p><code>./distributed_train.sh 2 /imagenet/ --model efficientnet_b2 -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .016</code></p>
+<h2 id="mixnet-xl-with-randaugment-805-top-1-949-top-5">MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5</h2>
+<p>This params are for dual Titan RTX cards with NVIDIA Apex installed:</p>
+<p><code>./distributed_train.sh 2 /imagenet/ --model mixnet_xl -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .969 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.3 --amp --lr .016 --dist-bn reduce</code></p>
+<h2 id="se-resnext-26-d-and-se-resnext-26-t">SE-ResNeXt-26-D and SE-ResNeXt-26-T</h2>
+<p>These hparams (or similar) work well for a wide range of ResNet architecture, generally a good idea to increase the epoch # as the model size increases... ie approx 180-200 for ResNe(X)t50, and 220+ for larger. Increase batch size and LR proportionally for better GPUs or with AMP enabled. These params were for 2 1080Ti cards:</p>
+<p><code>./distributed_train.sh 2 /imagenet/ --model seresnext26t_32x4d --lr 0.1 --warmup-epochs 5 --epochs 160 --weight-decay 1e-4 --sched cosine --reprob 0.4 --remode pixel -b 112</code></p>
+<h2 id="efficientnet-b3-with-randaugment-815-top-1-957-top-5">EfficientNet-B3 with RandAugment - 81.5 top-1, 95.7 top-5</h2>
+<p>The training of this model started with the same command line as EfficientNet-B2 w/ RA above. After almost three weeks of training the process crashed. The results weren't looking amazing so I resumed the training several times with tweaks to a few params (increase RE prob, decrease rand-aug, increase ema-decay). Nothing looked great. I ended up averaging the best checkpoints from all restarts. The result is mediocre at default res/crop but oddly performs much better with a full image test crop of 1.0. </p>
+<h2 id="efficientnet-b0-with-randaugment-777-top-1-953-top-5">EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5</h2>
+<p><a href="https://github.com/michaelklachko">Michael Klachko</a> achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2.</p>
+<p><code>./distributed_train.sh 2 /imagenet/ --model efficientnet_b0 -b 384 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .048</code></p>
+<h2 id="resnet50-with-jsd-loss-and-randaugment-clean-2x-ra-augs-7904-top-1-9439-top-5">ResNet50 with JSD loss and RandAugment (clean + 2x RA augs) - 79.04 top-1, 94.39 top-5</h2>
+<p>Trained on two older 1080Ti cards, this took a while. Only slightly, non statistically better ImageNet validation result than my first good AugMix training of 78.99. However, these weights are more robust on tests with ImageNetV2, ImageNet-Sketch, etc. Unlike my first AugMix runs, I've enabled SplitBatchNorm, disabled random erasing on the clean split, and cranked up random erasing prob on the 2 augmented paths.</p>
+<p><code>./distributed_train.sh 2 /imagenet -b 64 --model resnet50 --sched cosine --epochs 200 --lr 0.05 --amp --remode pixel --reprob 0.6 --aug-splits 3 --aa rand-m9-mstd0.5-inc1 --resplit --split-bn --jsd --dist-bn reduce</code></p>
+<h2 id="efficientnet-es-edgetpu-small-with-randaugment-78066-top-1-93926-top-5">EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5</h2>
+<p>Trained by <a href="https://github.com/andravin">Andrew Lavin</a> with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training.</p>
+<p><code>./distributed_train.sh 8 /imagenet --model efficientnet_es -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2  --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064</code></p>
+<h2 id="mobilenetv3-large-100-75766-top-1-92542-top-5">MobileNetV3-Large-100 - 75.766 top-1, 92,542 top-5</h2>
+<p><code>./distributed_train.sh 2 /imagenet/ --model mobilenetv3_large_100 -b 512 --sched step --epochs 600 --decay-epochs 2.4 --decay-rate .973 --opt rmsproptf --opt-eps .001 -j 7 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064 --lr-noise 0.42 0.9</code></p>
+<h2 id="resnext-50-32x4d-w-randaugment-79762-top-1-9460-top-5">ResNeXt-50 32x4d w/ RandAugment - 79.762 top-1, 94.60 top-5</h2>
+<p>These params will also work well for SE-ResNeXt-50 and SK-ResNeXt-50 and likely 101. I used them for the SK-ResNeXt-50 32x4d that I trained with 2 GPU using a slightly higher LR per effective batch size (lr=0.18, b=192 per GPU). The cmd line below are tuned for 8 GPU training.</p>
+<p><code>./distributed_train.sh 8 /imagenet --model resnext50_32x4d --lr 0.6 --warmup-epochs 5 --epochs 240 --weight-decay 1e-4 --sched cosine --reprob 0.4 --recount 3 --remode pixel --aa rand-m7-mstd0.5-inc1 -b 192 -j 6 --amp --dist-bn reduce</code></p>
+                
+              
+              
+                
+
+
+              
+            </article>
+          </div>
+        </div>
+      </main>
+      
+        
+<footer class="md-footer">
+  
+    <div class="md-footer-nav">
+      <nav class="md-footer-nav__inner md-grid" aria-label="Footer">
+        
+          <a href="../scripts/" title="Scripts" class="md-footer-nav__link md-footer-nav__link--prev" rel="prev">
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
+            </div>
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Previous
+                </span>
+                Scripts
+              </div>
+            </div>
+          </a>
+        
+        
+          <a href="../feature_extraction/" title="Feature Extraction" class="md-footer-nav__link md-footer-nav__link--next" rel="next">
+            <div class="md-footer-nav__title">
+              <div class="md-ellipsis">
+                <span class="md-footer-nav__direction">
+                  Next
+                </span>
+                Feature Extraction
+              </div>
+            </div>
+            <div class="md-footer-nav__button md-icon">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4z"/></svg>
+            </div>
+          </a>
+        
+      </nav>
+    </div>
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-footer-copyright">
+        
+        Made with
+        <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+          Material for MkDocs
+        </a>
+      </div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    
+      <script src="../assets/javascripts/vendor.d710d30a.min.js"></script>
+      <script src="../assets/javascripts/bundle.b39636ac.min.js"></script><script id="__lang" type="application/json">{"clipboard.copy": "Copy to clipboard", "clipboard.copied": "Copied to clipboard", "search.config.lang": "en", "search.config.pipeline": "trimmer, stopWordFilter", "search.config.separator": "[\\s\\-]+", "search.result.placeholder": "Type to start searching", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents"}</script>
+      
+      <script>
+        app = initialize({
+          base: "..",
+          features: [],
+          search: Object.assign({
+            worker: "../assets/javascripts/worker/search.a68abb33.min.js"
+          }, typeof search !== "undefined" && search)
+        })
+      </script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script>
+      
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js"></script>
+      
+        <script src="../javascripts/tables.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file