From ca991c1fa57373286b9876aa63370fd19f5d6032 Mon Sep 17 00:00:00 2001 From: Xiao Wang <24860335+xwang233@users.noreply.github.com> Date: Tue, 7 Jun 2022 18:01:52 -0700 Subject: [PATCH 1/2] add --aot-autograd --- benchmark.py | 18 +++++++++++++++--- train.py | 15 ++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/benchmark.py b/benchmark.py index 422da45d..e2370dcc 100755 --- a/benchmark.py +++ b/benchmark.py @@ -51,6 +51,12 @@ except ImportError as e: FlopCountAnalysis = None has_fvcore_profiling = False +try: + from functorch.compile import memory_efficient_fusion + has_functorch = True +except ImportError as e: + has_functorch = False + torch.backends.cudnn.benchmark = True _logger = logging.getLogger('validate') @@ -95,10 +101,13 @@ parser.add_argument('--amp', action='store_true', default=False, help='use PyTorch Native AMP for mixed precision training. Overrides --precision arg.') parser.add_argument('--precision', default='float32', type=str, help='Numeric precision. One of (amp, float32, float16, bfloat16, tf32)') -parser.add_argument('--torchscript', dest='torchscript', action='store_true', - help='convert model torchscript for inference') parser.add_argument('--fuser', default='', type=str, help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')") +scripting_group = parser.add_mutually_exclusive_group() +scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true', + help='convert model torchscript for inference') +scripting_group.add_argument('--aot-autograd', default=False, action='store_true', + help="Enable AOT Autograd support. (It's recommended to use this option with `--fuser nvfuser` together)") # train optimizer parameters @@ -188,7 +197,7 @@ def profile_fvcore(model, input_size=(3, 224, 224), batch_size=1, detailed=False class BenchmarkRunner: def __init__( - self, model_name, detail=False, device='cuda', torchscript=False, precision='float32', + self, model_name, detail=False, device='cuda', torchscript=False, aot_autograd=False, precision='float32', fuser='', num_warm_iter=10, num_bench_iter=50, use_train_size=False, **kwargs): self.model_name = model_name self.detail = detail @@ -220,6 +229,9 @@ class BenchmarkRunner: if torchscript: self.model = torch.jit.script(self.model) self.scripted = True + if aot_autograd: + assert has_functorch, "functorch is needed for --aot-autograd" + self.model = memory_efficient_fusion(self.model) data_config = resolve_data_config(kwargs, model=self.model, use_test_size=not use_train_size) self.input_size = data_config['input_size'] diff --git a/train.py b/train.py index acdf93c3..c95ec150 100755 --- a/train.py +++ b/train.py @@ -61,6 +61,13 @@ try: except ImportError: has_wandb = False +try: + from functorch.compile import memory_efficient_fusion + has_functorch = True +except ImportError as e: + has_functorch = False + + torch.backends.cudnn.benchmark = True _logger = logging.getLogger('train') @@ -123,8 +130,11 @@ group.add_argument('-vb', '--validation-batch-size', type=int, default=None, met help='Validation batch size override (default: None)') group.add_argument('--channels-last', action='store_true', default=False, help='Use channels_last memory layout') -group.add_argument('--torchscript', dest='torchscript', action='store_true', +scripting_group = group.add_mutually_exclusive_group() +scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true', help='torch.jit.script the full model') +scripting_group.add_argument('--aot-autograd', default=False, action='store_true', + help="Enable AOT Autograd support. (It's recommended to use this option with `--fuser nvfuser` together)") group.add_argument('--fuser', default='', type=str, help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')") group.add_argument('--grad-checkpointing', action='store_true', default=False, @@ -445,6 +455,9 @@ def main(): assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model' assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model' model = torch.jit.script(model) + if args.aot_autograd: + assert has_functorch, "functorch is needed for --aot-autograd" + model = memory_efficient_fusion(model) optimizer = create_optimizer_v2(model, **optimizer_kwargs(cfg=args)) From 2d7ab065030462f151f09ef91f86d3f0f4e6bc62 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 9 Jun 2022 14:30:21 -0700 Subject: [PATCH 2/2] Move aot-autograd opt after model metadata used to setup data config in benchmark.py --- benchmark.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmark.py b/benchmark.py index e2370dcc..f348fcb9 100755 --- a/benchmark.py +++ b/benchmark.py @@ -229,14 +229,14 @@ class BenchmarkRunner: if torchscript: self.model = torch.jit.script(self.model) self.scripted = True - if aot_autograd: - assert has_functorch, "functorch is needed for --aot-autograd" - self.model = memory_efficient_fusion(self.model) - data_config = resolve_data_config(kwargs, model=self.model, use_test_size=not use_train_size) self.input_size = data_config['input_size'] self.batch_size = kwargs.pop('batch_size', 256) + if aot_autograd: + assert has_functorch, "functorch is needed for --aot-autograd" + self.model = memory_efficient_fusion(self.model) + self.example_inputs = None self.num_warm_iter = num_warm_iter self.num_bench_iter = num_bench_iter