diff --git a/timm/utils/model.py b/timm/utils/model.py
index b9f3e9d3..3fbc9f25 100644
--- a/timm/utils/model.py
+++ b/timm/utils/model.py
@@ -7,33 +7,38 @@ import fnmatch
 import torch
 from torchvision.ops.misc import FrozenBatchNorm2d
 
-from .model_ema import ModelEma
 
+_SUB_MODULE_ATTR = ('module', 'model')
 
-def unwrap_model(model):
-    if isinstance(model, ModelEma):
-        return unwrap_model(model.ema)
-    else:
-        return model.module if hasattr(model, 'module') else model
+
+def unwrap_model(model, recursive=True):
+    for attr in _SUB_MODULE_ATTR:
+        sub_module = getattr(model, attr, None)
+        if sub_module is not None:
+            return unwrap_model(sub_module) if recursive else sub_module
+    return model
 
 
 def get_state_dict(model, unwrap_fn=unwrap_model):
     return unwrap_fn(model).state_dict()
 
 
-def avg_sq_ch_mean(model, input, output): 
-    "calculate average channel square mean of output activations"
-    return torch.mean(output.mean(axis=[0,2,3])**2).item()
+def avg_sq_ch_mean(model, input, output):
+    """ calculate average channel square mean of output activations
+    """
+    return torch.mean(output.mean(axis=[0, 2, 3]) ** 2).item()
 
 
-def avg_ch_var(model, input, output): 
-    "calculate average channel variance of output activations"
-    return torch.mean(output.var(axis=[0,2,3])).item()\
+def avg_ch_var(model, input, output):
+    """calculate average channel variance of output activations
+    """
+    return torch.mean(output.var(axis=[0, 2, 3])).item()
 
 
-def avg_ch_var_residual(model, input, output): 
-    "calculate average channel variance of output activations"
-    return torch.mean(output.var(axis=[0,2,3])).item()
+def avg_ch_var_residual(model, input, output):
+    """calculate average channel variance of output activations
+    """
+    return torch.mean(output.var(axis=[0, 2, 3])).item()
 
 
 class ActivationStatsHook:
@@ -62,15 +67,16 @@ class ActivationStatsHook:
             raise ValueError("Please provide `hook_fns` for each `hook_fn_locs`, \
                 their lengths are different.")
         self.stats = dict((hook_fn.__name__, []) for hook_fn in hook_fns)
-        for hook_fn_loc, hook_fn in zip(hook_fn_locs, hook_fns): 
+        for hook_fn_loc, hook_fn in zip(hook_fn_locs, hook_fns):
             self.register_hook(hook_fn_loc, hook_fn)
 
     def _create_hook(self, hook_fn):
         def append_activation_stats(module, input, output):
             out = hook_fn(module, input, output)
             self.stats[hook_fn.__name__].append(out)
+
         return append_activation_stats
-        
+
     def register_hook(self, hook_fn_loc, hook_fn):
         for name, module in self.model.named_modules():
             if not fnmatch.fnmatch(name, hook_fn_loc):
@@ -78,17 +84,18 @@ class ActivationStatsHook:
             module.register_forward_hook(self._create_hook(hook_fn))
 
 
-def extract_spp_stats(model, 
-                      hook_fn_locs,
-                      hook_fns, 
-                      input_shape=[8, 3, 224, 224]):
+def extract_spp_stats(
+        model,
+        hook_fn_locs,
+        hook_fns,
+        input_shape=[8, 3, 224, 224]):
     """Extract average square channel mean and variance of activations during 
     forward pass to plot Signal Propogation Plots (SPP).
     
     Paper: https://arxiv.org/abs/2101.08692
 
     Example Usage: https://gist.github.com/amaarora/6e56942fcb46e67ba203f3009b30d950
-    """ 
+    """
     x = torch.normal(0., 1., input_shape)
     hook = ActivationStatsHook(model, hook_fn_locs=hook_fn_locs, hook_fns=hook_fns)
     _ = model(x)
@@ -186,7 +193,7 @@ def _freeze_unfreeze(root_module, submodules=[], include_bn_running_stats=True,
     named_modules = submodules
     submodules = [root_module.get_submodule(m) for m in submodules]
 
-    if not(len(submodules)):
+    if not (len(submodules)):
         named_modules, submodules = list(zip(*root_module.named_children()))
 
     for n, m in zip(named_modules, submodules):
@@ -201,13 +208,14 @@ def _freeze_unfreeze(root_module, submodules=[], include_bn_running_stats=True,
                     module.get_submodule(split[0]).add_module(split[1], submodule)
                 else:
                     module.add_module(name, submodule)
+
             # Freeze batch norm
             if mode == 'freeze':
                 res = freeze_batch_norm_2d(m)
                 # It's possible that `m` is a type of BatchNorm in itself, in which case `unfreeze_batch_norm_2d` won't
                 # convert it in place, but will return the converted result. In this case `res` holds the converted
                 # result and we may try to re-assign the named module
-                if isinstance(m, (torch.nn.modules.batchnorm.BatchNorm2d,  torch.nn.modules.batchnorm.SyncBatchNorm)):
+                if isinstance(m, (torch.nn.modules.batchnorm.BatchNorm2d, torch.nn.modules.batchnorm.SyncBatchNorm)):
                     _add_submodule(root_module, n, res)
             # Unfreeze batch norm
             else: