From 834a9ec721525fa92f4dbf6ddef9393c2143d358 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 1 Dec 2021 14:58:09 -0800 Subject: [PATCH 1/2] Disable use of timm nn.Linear wrapper since AMP autocast + torchscript use appears fixed --- timm/models/layers/classifier.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/timm/models/layers/classifier.py b/timm/models/layers/classifier.py index 2b745413..798748da 100644 --- a/timm/models/layers/classifier.py +++ b/timm/models/layers/classifier.py @@ -6,7 +6,6 @@ from torch import nn as nn from torch.nn import functional as F from .adaptive_avgmax_pool import SelectAdaptivePool2d -from .linear import Linear def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False): @@ -26,8 +25,7 @@ def _create_fc(num_features, num_classes, use_conv=False): elif use_conv: fc = nn.Conv2d(num_features, num_classes, 1, bias=True) else: - # NOTE: using my Linear wrapper that fixes AMP + torchscript casting issue - fc = Linear(num_features, num_classes, bias=True) + fc = nn.Linear(num_features, num_classes, bias=True) return fc From cd059cbe9c3bf22e21db97963c8aeb15bb9c6fd0 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 1 Dec 2021 14:58:56 -0800 Subject: [PATCH 2/2] Add FX backward tests back --- tests/test_models.py | 58 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 4f80612f..01ad7489 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -386,41 +386,41 @@ def test_model_forward_fx(model_name, batch_size): assert not torch.isnan(outputs).any(), 'Output included NaNs' -if 'GITHUB_ACTIONS' not in os.environ: - # FIXME this test is causing GitHub actions to run out of RAM and abruptly kill the test process +@pytest.mark.timeout(120) +@pytest.mark.parametrize('model_name', list_models( + exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FX_FILTERS, name_matches_cfg=True)) +@pytest.mark.parametrize('batch_size', [2]) +def test_model_backward_fx(model_name, batch_size): + """Symbolically trace each model and run single backward pass through the resulting GraphModule""" + if not has_fx_feature_extraction: + pytest.skip("Can't test FX. Torch >= 1.10 and Torchvision >= 0.11 are required.") - @pytest.mark.timeout(120) - @pytest.mark.parametrize('model_name', list_models( - exclude_filters=EXCLUDE_FILTERS + EXCLUDE_FX_FILTERS, name_matches_cfg=True)) - @pytest.mark.parametrize('batch_size', [2]) - def test_model_backward_fx(model_name, batch_size): - """Symbolically trace each model and run single backward pass through the resulting GraphModule""" - if not has_fx_feature_extraction: - pytest.skip("Can't test FX. Torch >= 1.10 and Torchvision >= 0.11 are required.") + input_size = _get_input_size(model_name=model_name, target=TARGET_BWD_FX_SIZE) + if max(input_size) > MAX_BWD_FX_SIZE: + pytest.skip("Fixed input size model > limit.") - input_size = _get_input_size(model_name=model_name, target=TARGET_BWD_FX_SIZE) - if max(input_size) > MAX_BWD_FX_SIZE: - pytest.skip("Fixed input size model > limit.") + model = create_model(model_name, pretrained=False, num_classes=42) + model.train() + num_params = sum([x.numel() for x in model.parameters()]) + if 'GITHUB_ACTIONS' in os.environ and num_params > 100e6: + pytest.skip("Skipping FX backward test on model with more than 100M params.") - model = create_model(model_name, pretrained=False, num_classes=42) - model.train() - num_params = sum([x.numel() for x in model.parameters()]) - if 'GITHUB_ACTIONS' in os.environ and num_params > 100e6: - pytest.skip("Skipping FX backward test on model with more than 100M params.") + model = _create_fx_model(model, train=True) + outputs = tuple(model(torch.randn((batch_size, *input_size))).values()) + if isinstance(outputs, tuple): + outputs = torch.cat(outputs) + outputs.mean().backward() + for n, x in model.named_parameters(): + assert x.grad is not None, f'No gradient for {n}' + num_grad = sum([x.grad.numel() for x in model.parameters() if x.grad is not None]) - model = _create_fx_model(model, train=True) - outputs = tuple(model(torch.randn((batch_size, *input_size))).values()) - if isinstance(outputs, tuple): - outputs = torch.cat(outputs) - outputs.mean().backward() - for n, x in model.named_parameters(): - assert x.grad is not None, f'No gradient for {n}' - num_grad = sum([x.grad.numel() for x in model.parameters() if x.grad is not None]) + assert outputs.shape[-1] == 42 + assert num_params == num_grad, 'Some parameters are missing gradients' + assert not torch.isnan(outputs).any(), 'Output included NaNs' - assert outputs.shape[-1] == 42 - assert num_params == num_grad, 'Some parameters are missing gradients' - assert not torch.isnan(outputs).any(), 'Output included NaNs' +if 'GITHUB_ACTIONS' not in os.environ: + # FIXME this test is causing GitHub actions to run out of RAM and abruptly kill the test process # reason: model is scripted after fx tracing, but beit has torch.jit.is_scripting() control flow EXCLUDE_FX_JIT_FILTERS = [