Update benchmark and validate scripts to output results in JSON with a fixed delimiter for use in multi-process launcher

pull/1112/head
Ross Wightman 3 years ago
parent 1331c145a3
commit cf4334391e

@ -473,6 +473,7 @@ def decay_batch_exp(batch_size, factor=0.5, divisor=16):
def _try_run(model_name, bench_fn, initial_batch_size, bench_kwargs): def _try_run(model_name, bench_fn, initial_batch_size, bench_kwargs):
batch_size = initial_batch_size batch_size = initial_batch_size
results = dict() results = dict()
error_str = 'Unknown'
while batch_size >= 1: while batch_size >= 1:
torch.cuda.empty_cache() torch.cuda.empty_cache()
try: try:
@ -480,13 +481,13 @@ def _try_run(model_name, bench_fn, initial_batch_size, bench_kwargs):
results = bench.run() results = bench.run()
return results return results
except RuntimeError as e: except RuntimeError as e:
e_str = str(e) error_str = str(e)
print(e_str) if 'channels_last' in error_str:
if 'channels_last' in e_str: _logger.error(f'{model_name} not supported in channels_last, skipping.')
print(f'Error: {model_name} not supported in channels_last, skipping.')
break break
print(f'Error: "{e_str}" while running benchmark. Reducing batch size to {batch_size} for retry.') _logger.warning(f'"{error_str}" while running benchmark. Reducing batch size to {batch_size} for retry.')
batch_size = decay_batch_exp(batch_size) batch_size = decay_batch_exp(batch_size)
results['error'] = error_str
return results return results
@ -528,13 +529,14 @@ def benchmark(args):
model_results = OrderedDict(model=model) model_results = OrderedDict(model=model)
for prefix, bench_fn in zip(prefixes, bench_fns): for prefix, bench_fn in zip(prefixes, bench_fns):
run_results = _try_run(model, bench_fn, initial_batch_size=batch_size, bench_kwargs=bench_kwargs) run_results = _try_run(model, bench_fn, initial_batch_size=batch_size, bench_kwargs=bench_kwargs)
if prefix: if prefix and 'error' not in run_results:
run_results = {'_'.join([prefix, k]): v for k, v in run_results.items()} run_results = {'_'.join([prefix, k]): v for k, v in run_results.items()}
model_results.update(run_results) model_results.update(run_results)
param_count = model_results.pop('infer_param_count', model_results.pop('train_param_count', 0)) if 'error' not in model_results:
model_results.setdefault('param_count', param_count) param_count = model_results.pop('infer_param_count', model_results.pop('train_param_count', 0))
model_results.pop('train_param_count', 0) model_results.setdefault('param_count', param_count)
return model_results if model_results['param_count'] else dict() model_results.pop('train_param_count', 0)
return model_results
def main(): def main():
@ -578,13 +580,15 @@ def main():
sort_key = 'train_samples_per_sec' sort_key = 'train_samples_per_sec'
elif 'profile' in args.bench: elif 'profile' in args.bench:
sort_key = 'infer_gmacs' sort_key = 'infer_gmacs'
results = filter(lambda x: sort_key in x, results)
results = sorted(results, key=lambda x: x[sort_key], reverse=True) results = sorted(results, key=lambda x: x[sort_key], reverse=True)
if len(results): if len(results):
write_results(results_file, results) write_results(results_file, results)
else: else:
results = benchmark(args) results = benchmark(args)
json_str = json.dumps(results, indent=4)
print(json_str) # output results in JSON to stdout w/ delimiter for runner script
print(f'--result\n{json.dumps(results, indent=4)}')
def write_results(results_file, results): def write_results(results_file, results):

@ -11,6 +11,7 @@ import argparse
import os import os
import csv import csv
import glob import glob
import json
import time import time
import logging import logging
import torch import torch
@ -263,6 +264,7 @@ def validate(args):
else: else:
top1a, top5a = top1.avg, top5.avg top1a, top5a = top1.avg, top5.avg
results = OrderedDict( results = OrderedDict(
model=args.model,
top1=round(top1a, 4), top1_err=round(100 - top1a, 4), top1=round(top1a, 4), top1_err=round(100 - top1a, 4),
top5=round(top5a, 4), top5_err=round(100 - top5a, 4), top5=round(top5a, 4), top5_err=round(100 - top5a, 4),
param_count=round(param_count / 1e6, 2), param_count=round(param_count / 1e6, 2),
@ -276,6 +278,27 @@ def validate(args):
return results return results
def _try_run(args, initial_batch_size):
batch_size = initial_batch_size
results = OrderedDict()
error_str = 'Unknown'
while batch_size >= 1:
args.batch_size = batch_size
torch.cuda.empty_cache()
try:
results = validate(args)
return results
except RuntimeError as e:
error_str = str(e)
if 'channels_last' in error_str:
break
_logger.warning(f'"{error_str}" while running validation. Reducing batch size to {batch_size} for retry.')
batch_size = batch_size // 2
results['error'] = error_str
_logger.error(f'{args.model} failed to validate ({error_str}).')
return results
def main(): def main():
setup_default_logging() setup_default_logging()
args = parser.parse_args() args = parser.parse_args()
@ -308,36 +331,25 @@ def main():
_logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names))) _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
results = [] results = []
try: try:
start_batch_size = args.batch_size initial_batch_size = args.batch_size
for m, c in model_cfgs: for m, c in model_cfgs:
batch_size = start_batch_size
args.model = m args.model = m
args.checkpoint = c args.checkpoint = c
result = OrderedDict(model=args.model) r = _try_run(args, initial_batch_size)
r = {} if 'error' in r:
while not r and batch_size >= args.num_gpu: continue
torch.cuda.empty_cache()
try:
args.batch_size = batch_size
print('Validating with batch size: %d' % args.batch_size)
r = validate(args)
except RuntimeError as e:
if batch_size <= args.num_gpu:
print("Validation failed with no ability to reduce batch size. Exiting.")
raise e
batch_size = max(batch_size // 2, args.num_gpu)
print("Validation failed, reducing batch size by 50%")
result.update(r)
if args.checkpoint: if args.checkpoint:
result['checkpoint'] = args.checkpoint r['checkpoint'] = args.checkpoint
results.append(result) results.append(r)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
pass pass
results = sorted(results, key=lambda x: x['top1'], reverse=True) results = sorted(results, key=lambda x: x['top1'], reverse=True)
if len(results): if len(results):
write_results(results_file, results) write_results(results_file, results)
else: else:
validate(args) results = validate(args)
# output results in JSON to stdout w/ delimiter for runner script
print(f'--result\n{json.dumps(results, indent=4)}')
def write_results(results_file, results): def write_results(results_file, results):

Loading…
Cancel
Save