Merge pull request #550 from amaarora/wandb

Wandb Support
pull/571/head
Ross Wightman 4 years ago committed by GitHub
commit e685618f45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -5,7 +5,10 @@ Hacked together by / Copyright 2020 Ross Wightman
import csv import csv
import os import os
from collections import OrderedDict from collections import OrderedDict
try:
import wandb
except ImportError:
pass
def get_outdir(path, *paths, inc=False): def get_outdir(path, *paths, inc=False):
outdir = os.path.join(path, *paths) outdir = os.path.join(path, *paths)
@ -23,10 +26,12 @@ def get_outdir(path, *paths, inc=False):
return outdir return outdir
def update_summary(epoch, train_metrics, eval_metrics, filename, write_header=False): def update_summary(epoch, train_metrics, eval_metrics, filename, write_header=False, log_wandb=False):
rowd = OrderedDict(epoch=epoch) rowd = OrderedDict(epoch=epoch)
rowd.update([('train_' + k, v) for k, v in train_metrics.items()]) rowd.update([('train_' + k, v) for k, v in train_metrics.items()])
rowd.update([('eval_' + k, v) for k, v in eval_metrics.items()]) rowd.update([('eval_' + k, v) for k, v in eval_metrics.items()])
if log_wandb:
wandb.log(rowd)
with open(filename, mode='a') as cf: with open(filename, mode='a') as cf:
dw = csv.DictWriter(cf, fieldnames=rowd.keys()) dw = csv.DictWriter(cf, fieldnames=rowd.keys())
if write_header: # first iteration (epoch == 1 can't be used) if write_header: # first iteration (epoch == 1 can't be used)

@ -52,6 +52,12 @@ try:
except AttributeError: except AttributeError:
pass pass
try:
import wandb
has_wandb = True
except ImportError:
has_wandb = False
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
_logger = logging.getLogger('train') _logger = logging.getLogger('train')
@ -271,6 +277,8 @@ parser.add_argument('--use-multi-epochs-loader', action='store_true', default=Fa
help='use the multi-epochs-loader to save time at the beginning of every epoch') help='use the multi-epochs-loader to save time at the beginning of every epoch')
parser.add_argument('--torchscript', dest='torchscript', action='store_true', parser.add_argument('--torchscript', dest='torchscript', action='store_true',
help='convert model torchscript for inference') help='convert model torchscript for inference')
parser.add_argument('--log-wandb', action='store_true', default=False,
help='log training and validation metrics to wandb')
def _parse_args(): def _parse_args():
@ -293,7 +301,14 @@ def _parse_args():
def main(): def main():
setup_default_logging() setup_default_logging()
args, args_text = _parse_args() args, args_text = _parse_args()
if args.log_wandb:
if has_wandb:
wandb.init(project=args.experiment, config=args)
else:
_logger.warning("You've requested to log metrics to wandb but package not found. "
"Metrics not being logged to wandb, try `pip install wandb`")
args.prefetcher = not args.no_prefetcher args.prefetcher = not args.no_prefetcher
args.distributed = False args.distributed = False
if 'WORLD_SIZE' in os.environ: if 'WORLD_SIZE' in os.environ:
@ -593,7 +608,7 @@ def main():
update_summary( update_summary(
epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'),
write_header=best_metric is None) write_header=best_metric is None, log_wandb=args.log_wandb and has_wandb)
if saver is not None: if saver is not None:
# save proper checkpoint with eval metric # save proper checkpoint with eval metric

Loading…
Cancel
Save