|
|
@ -56,13 +56,7 @@ try:
|
|
|
|
except ImportError as e:
|
|
|
|
except ImportError as e:
|
|
|
|
has_functorch = False
|
|
|
|
has_functorch = False
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
has_compile = hasattr(torch, 'compile')
|
|
|
|
import torch._dynamo
|
|
|
|
|
|
|
|
has_dynamo = True
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
|
|
has_dynamo = False
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if torch.cuda.is_available():
|
|
|
|
if torch.cuda.is_available():
|
|
|
|
torch.backends.cuda.matmul.allow_tf32 = True
|
|
|
|
torch.backends.cuda.matmul.allow_tf32 = True
|
|
|
@ -81,8 +75,10 @@ parser.add_argument('--detail', action='store_true', default=False,
|
|
|
|
help='Provide train fwd/bwd/opt breakdown detail if True. Defaults to False')
|
|
|
|
help='Provide train fwd/bwd/opt breakdown detail if True. Defaults to False')
|
|
|
|
parser.add_argument('--no-retry', action='store_true', default=False,
|
|
|
|
parser.add_argument('--no-retry', action='store_true', default=False,
|
|
|
|
help='Do not decay batch size and retry on error.')
|
|
|
|
help='Do not decay batch size and retry on error.')
|
|
|
|
parser.add_argument('--results-file', default='', type=str, metavar='FILENAME',
|
|
|
|
parser.add_argument('--results-file', default='', type=str,
|
|
|
|
help='Output csv file for validation results (summary)')
|
|
|
|
help='Output csv file for validation results (summary)')
|
|
|
|
|
|
|
|
parser.add_argument('--results-format', default='csv', type=str,
|
|
|
|
|
|
|
|
help='Format for results file one of (csv, json) (default: csv).')
|
|
|
|
parser.add_argument('--num-warm-iter', default=10, type=int,
|
|
|
|
parser.add_argument('--num-warm-iter', default=10, type=int,
|
|
|
|
metavar='N', help='Number of warmup iterations (default: 10)')
|
|
|
|
metavar='N', help='Number of warmup iterations (default: 10)')
|
|
|
|
parser.add_argument('--num-bench-iter', default=40, type=int,
|
|
|
|
parser.add_argument('--num-bench-iter', default=40, type=int,
|
|
|
@ -113,8 +109,6 @@ parser.add_argument('--precision', default='float32', type=str,
|
|
|
|
help='Numeric precision. One of (amp, float32, float16, bfloat16, tf32)')
|
|
|
|
help='Numeric precision. One of (amp, float32, float16, bfloat16, tf32)')
|
|
|
|
parser.add_argument('--fuser', default='', type=str,
|
|
|
|
parser.add_argument('--fuser', default='', type=str,
|
|
|
|
help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
|
|
|
|
help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
|
|
|
|
parser.add_argument('--dynamo-backend', default=None, type=str,
|
|
|
|
|
|
|
|
help="Select dynamo backend. Default: None")
|
|
|
|
|
|
|
|
parser.add_argument('--fast-norm', default=False, action='store_true',
|
|
|
|
parser.add_argument('--fast-norm', default=False, action='store_true',
|
|
|
|
help='enable experimental fast-norm')
|
|
|
|
help='enable experimental fast-norm')
|
|
|
|
|
|
|
|
|
|
|
@ -122,10 +116,11 @@ parser.add_argument('--fast-norm', default=False, action='store_true',
|
|
|
|
scripting_group = parser.add_mutually_exclusive_group()
|
|
|
|
scripting_group = parser.add_mutually_exclusive_group()
|
|
|
|
scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true',
|
|
|
|
scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true',
|
|
|
|
help='convert model torchscript for inference')
|
|
|
|
help='convert model torchscript for inference')
|
|
|
|
|
|
|
|
scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor',
|
|
|
|
|
|
|
|
help="Enable compilation w/ specified backend (default: inductor).")
|
|
|
|
scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
|
|
|
|
scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
|
|
|
|
help="Enable AOT Autograd optimization.")
|
|
|
|
help="Enable AOT Autograd optimization.")
|
|
|
|
scripting_group.add_argument('--dynamo', default=False, action='store_true',
|
|
|
|
|
|
|
|
help="Enable Dynamo optimization.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# train optimizer parameters
|
|
|
|
# train optimizer parameters
|
|
|
|
parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
|
|
|
|
parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
|
|
|
@ -218,9 +213,8 @@ class BenchmarkRunner:
|
|
|
|
detail=False,
|
|
|
|
detail=False,
|
|
|
|
device='cuda',
|
|
|
|
device='cuda',
|
|
|
|
torchscript=False,
|
|
|
|
torchscript=False,
|
|
|
|
|
|
|
|
torchcompile=None,
|
|
|
|
aot_autograd=False,
|
|
|
|
aot_autograd=False,
|
|
|
|
dynamo=False,
|
|
|
|
|
|
|
|
dynamo_backend=None,
|
|
|
|
|
|
|
|
precision='float32',
|
|
|
|
precision='float32',
|
|
|
|
fuser='',
|
|
|
|
fuser='',
|
|
|
|
num_warm_iter=10,
|
|
|
|
num_warm_iter=10,
|
|
|
@ -259,20 +253,19 @@ class BenchmarkRunner:
|
|
|
|
self.input_size = data_config['input_size']
|
|
|
|
self.input_size = data_config['input_size']
|
|
|
|
self.batch_size = kwargs.pop('batch_size', 256)
|
|
|
|
self.batch_size = kwargs.pop('batch_size', 256)
|
|
|
|
|
|
|
|
|
|
|
|
self.scripted = False
|
|
|
|
self.compiled = False
|
|
|
|
if torchscript:
|
|
|
|
if torchscript:
|
|
|
|
self.model = torch.jit.script(self.model)
|
|
|
|
self.model = torch.jit.script(self.model)
|
|
|
|
self.scripted = True
|
|
|
|
self.compiled = True
|
|
|
|
elif dynamo:
|
|
|
|
elif torchcompile:
|
|
|
|
assert has_dynamo, "torch._dynamo is needed for --dynamo"
|
|
|
|
assert has_compile, 'A version of torch w/ torch.compile() is required, possibly a nightly.'
|
|
|
|
torch._dynamo.reset()
|
|
|
|
torch._dynamo.reset()
|
|
|
|
if dynamo_backend is not None:
|
|
|
|
self.model = torch.compile(self.model, backend=torchcompile)
|
|
|
|
self.model = torch._dynamo.optimize(dynamo_backend)(self.model)
|
|
|
|
self.compiled = True
|
|
|
|
else:
|
|
|
|
|
|
|
|
self.model = torch._dynamo.optimize()(self.model)
|
|
|
|
|
|
|
|
elif aot_autograd:
|
|
|
|
elif aot_autograd:
|
|
|
|
assert has_functorch, "functorch is needed for --aot-autograd"
|
|
|
|
assert has_functorch, "functorch is needed for --aot-autograd"
|
|
|
|
self.model = memory_efficient_fusion(self.model)
|
|
|
|
self.model = memory_efficient_fusion(self.model)
|
|
|
|
|
|
|
|
self.compiled = True
|
|
|
|
|
|
|
|
|
|
|
|
self.example_inputs = None
|
|
|
|
self.example_inputs = None
|
|
|
|
self.num_warm_iter = num_warm_iter
|
|
|
|
self.num_warm_iter = num_warm_iter
|
|
|
@ -344,7 +337,7 @@ class InferenceBenchmarkRunner(BenchmarkRunner):
|
|
|
|
param_count=round(self.param_count / 1e6, 2),
|
|
|
|
param_count=round(self.param_count / 1e6, 2),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
retries = 0 if self.scripted else 2 # skip profiling if model is scripted
|
|
|
|
retries = 0 if self.compiled else 2 # skip profiling if model is scripted
|
|
|
|
while retries:
|
|
|
|
while retries:
|
|
|
|
retries -= 1
|
|
|
|
retries -= 1
|
|
|
|
try:
|
|
|
|
try:
|
|
|
@ -642,7 +635,6 @@ def main():
|
|
|
|
model_cfgs = [(n, None) for n in model_names]
|
|
|
|
model_cfgs = [(n, None) for n in model_names]
|
|
|
|
|
|
|
|
|
|
|
|
if len(model_cfgs):
|
|
|
|
if len(model_cfgs):
|
|
|
|
results_file = args.results_file or './benchmark.csv'
|
|
|
|
|
|
|
|
_logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
|
|
|
|
_logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
|
|
|
|
results = []
|
|
|
|
results = []
|
|
|
|
try:
|
|
|
|
try:
|
|
|
@ -663,22 +655,30 @@ def main():
|
|
|
|
sort_key = 'infer_gmacs'
|
|
|
|
sort_key = 'infer_gmacs'
|
|
|
|
results = filter(lambda x: sort_key in x, results)
|
|
|
|
results = filter(lambda x: sort_key in x, results)
|
|
|
|
results = sorted(results, key=lambda x: x[sort_key], reverse=True)
|
|
|
|
results = sorted(results, key=lambda x: x[sort_key], reverse=True)
|
|
|
|
if len(results):
|
|
|
|
|
|
|
|
write_results(results_file, results)
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
results = benchmark(args)
|
|
|
|
results = benchmark(args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if args.results_file:
|
|
|
|
|
|
|
|
write_results(args.results_file, results, format=args.results_format)
|
|
|
|
|
|
|
|
|
|
|
|
# output results in JSON to stdout w/ delimiter for runner script
|
|
|
|
# output results in JSON to stdout w/ delimiter for runner script
|
|
|
|
print(f'--result\n{json.dumps(results, indent=4)}')
|
|
|
|
print(f'--result\n{json.dumps(results, indent=4)}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write_results(results_file, results):
|
|
|
|
def write_results(results_file, results, format='csv'):
|
|
|
|
with open(results_file, mode='w') as cf:
|
|
|
|
with open(results_file, mode='w') as cf:
|
|
|
|
dw = csv.DictWriter(cf, fieldnames=results[0].keys())
|
|
|
|
if format == 'json':
|
|
|
|
dw.writeheader()
|
|
|
|
json.dump(results, cf, indent=4)
|
|
|
|
for r in results:
|
|
|
|
else:
|
|
|
|
dw.writerow(r)
|
|
|
|
if not isinstance(results, (list, tuple)):
|
|
|
|
cf.flush()
|
|
|
|
results = [results]
|
|
|
|
|
|
|
|
if not results:
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
dw = csv.DictWriter(cf, fieldnames=results[0].keys())
|
|
|
|
|
|
|
|
dw.writeheader()
|
|
|
|
|
|
|
|
for r in results:
|
|
|
|
|
|
|
|
dw.writerow(r)
|
|
|
|
|
|
|
|
cf.flush()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if __name__ == '__main__':
|
|
|
|