From 7c2660576d565b7441922265456ec8b050608da3 Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Thu, 25 Aug 2022 15:30:59 -0700
Subject: [PATCH] Tweak init for convnext block using maxxvit/coatnext.

---
 timm/models/maxxvit.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/timm/models/maxxvit.py b/timm/models/maxxvit.py
index 7f4ebf59..82840523 100644
--- a/timm/models/maxxvit.py
+++ b/timm/models/maxxvit.py
@@ -259,8 +259,6 @@ def _rw_max_cfg(
     # - mbconv expansion calculated from input instead of output chs
     # - mbconv shortcut and final 1x1 conv did not have a bias
     # - mbconv uses silu in timm, not gelu
-    # - avg pool with kernel_size=2 favoured downsampling (instead of maxpool for coat)
-    # - default to avg pool for mbconv downsample instead of 1x1 or dw conv
     # - expansion in attention block done via output proj, not input proj
     return dict(
         conv_cfg=MaxxVitConvCfg(
@@ -411,18 +409,19 @@ model_cfgs = dict(
             rel_pos_dim=384,  # was supposed to be 512, woops
         ),
     ),
-    coatnext_nano_rw_224=MaxxVitCfg(
+    coatnet_nano_cc_224=MaxxVitCfg(
         embed_dim=(64, 128, 256, 512),
         depths=(3, 4, 6, 3),
         stem_width=(32, 64),
-        **_next_cfg(),
+        block_type=('C', 'C', ('C', 'T'), ('C', 'T')),
+        **_rw_coat_cfg(),
     ),
-    coatnet_nano_cc_224=MaxxVitCfg(
+    coatnext_nano_rw_224=MaxxVitCfg(
         embed_dim=(64, 128, 256, 512),
         depths=(3, 4, 6, 3),
         stem_width=(32, 64),
-        block_type=('C', 'C', ('C', 'T'), ('C', 'T')),
-        **_rw_coat_cfg(),
+        weight_init='normal',
+        **_next_cfg(),
     ),
 
     # Trying to be like the CoAtNet paper configs
@@ -498,6 +497,7 @@ model_cfgs = dict(
         depths=(1, 2, 3, 1),
         block_type=('M',) * 4,
         stem_width=(32, 64),
+        weight_init='normal',
         **_next_cfg(window_size=8),
     ),