From 834a9ec721525fa92f4dbf6ddef9393c2143d358 Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Wed, 1 Dec 2021 14:58:09 -0800
Subject: [PATCH 1/2] Disable use of timm nn.Linear wrapper since AMP autocast
 + torchscript use appears fixed

---
 timm/models/layers/classifier.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/timm/models/layers/classifier.py b/timm/models/layers/classifier.py
index 2b745413..798748da 100644
--- a/timm/models/layers/classifier.py
+++ b/timm/models/layers/classifier.py
@@ -6,7 +6,6 @@ from torch import nn as nn
 from torch.nn import functional as F
 
 from .adaptive_avgmax_pool import SelectAdaptivePool2d
-from .linear import Linear
 
 
 def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False):
@@ -26,8 +25,7 @@ def _create_fc(num_features, num_classes, use_conv=False):
     elif use_conv:
         fc = nn.Conv2d(num_features, num_classes, 1, bias=True)
     else:
-        # NOTE: using my Linear wrapper that fixes AMP + torchscript casting issue
-        fc = Linear(num_features, num_classes, bias=True)
+        fc = nn.Linear(num_features, num_classes, bias=True)
     return fc
 
 

From cd059cbe9c3bf22e21db97963c8aeb15bb9c6fd0 Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Wed, 1 Dec 2021 14:58:56 -0800
Subject: [PATCH 2/2] Add FX backward tests back

---
 tests/test_models.py | 58 ++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index 4f80612f..01ad7489 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -386,41 +386,41 @@ def test_model_forward_fx(model_name, batch_size):
     assert not torch.isnan(outputs).any(), 'Output included NaNs'
 
 
-if 'GITHUB_ACTIONS' not in os.environ:
-    # FIXME this test is causing GitHub actions to run out of RAM and abruptly kill the test process
+@pytest.mark.timeout(120)
+@pytest.mark.parametrize('model_name', list_models(
+    exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FX_FILTERS, name_matches_cfg=True))
+@pytest.mark.parametrize('batch_size', [2])
+def test_model_backward_fx(model_name, batch_size):
+    """Symbolically trace each model and run single backward pass through the resulting GraphModule"""
+    if not has_fx_feature_extraction:
+        pytest.skip("Can't test FX. Torch >= 1.10 and Torchvision >= 0.11 are required.")
 
-    @pytest.mark.timeout(120)
-    @pytest.mark.parametrize('model_name', list_models(
-        exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FX_FILTERS, name_matches_cfg=True))
-    @pytest.mark.parametrize('batch_size', [2])
-    def test_model_backward_fx(model_name, batch_size):
-        """Symbolically trace each model and run single backward pass through the resulting GraphModule"""
-        if not has_fx_feature_extraction:
-            pytest.skip("Can't test FX. Torch >= 1.10 and Torchvision >= 0.11 are required.")
+    input_size = _get_input_size(model_name=model_name, target=TARGET_BWD_FX_SIZE)
+    if max(input_size) > MAX_BWD_FX_SIZE:
+        pytest.skip("Fixed input size model > limit.")
 
-        input_size = _get_input_size(model_name=model_name, target=TARGET_BWD_FX_SIZE)
-        if max(input_size) > MAX_BWD_FX_SIZE:
-            pytest.skip("Fixed input size model > limit.")
+    model = create_model(model_name, pretrained=False, num_classes=42)
+    model.train()
+    num_params = sum([x.numel() for x in model.parameters()])
+    if 'GITHUB_ACTIONS' in os.environ and num_params > 100e6:
+        pytest.skip("Skipping FX backward test on model with more than 100M params.")
 
-        model = create_model(model_name, pretrained=False, num_classes=42)
-        model.train()
-        num_params = sum([x.numel() for x in model.parameters()])
-        if 'GITHUB_ACTIONS' in os.environ and num_params > 100e6:
-            pytest.skip("Skipping FX backward test on model with more than 100M params.")
+    model = _create_fx_model(model, train=True)
+    outputs = tuple(model(torch.randn((batch_size, *input_size))).values())
+    if isinstance(outputs, tuple):
+        outputs = torch.cat(outputs)
+    outputs.mean().backward()
+    for n, x in model.named_parameters():
+        assert x.grad is not None, f'No gradient for {n}'
+    num_grad = sum([x.grad.numel() for x in model.parameters() if x.grad is not None])
 
-        model = _create_fx_model(model, train=True)
-        outputs = tuple(model(torch.randn((batch_size, *input_size))).values())
-        if isinstance(outputs, tuple):
-            outputs = torch.cat(outputs)
-        outputs.mean().backward()
-        for n, x in model.named_parameters():
-            assert x.grad is not None, f'No gradient for {n}'
-        num_grad = sum([x.grad.numel() for x in model.parameters() if x.grad is not None])
+    assert outputs.shape[-1] == 42
+    assert num_params == num_grad, 'Some parameters are missing gradients'
+    assert not torch.isnan(outputs).any(), 'Output included NaNs'
 
-        assert outputs.shape[-1] == 42
-        assert num_params == num_grad, 'Some parameters are missing gradients'
-        assert not torch.isnan(outputs).any(), 'Output included NaNs'
 
+if 'GITHUB_ACTIONS' not in os.environ:
+    # FIXME this test is causing GitHub actions to run out of RAM and abruptly kill the test process
 
     # reason: model is scripted after fx tracing, but beit has torch.jit.is_scripting() control flow
     EXCLUDE_FX_JIT_FILTERS = [