make train.py compatible with torchrun

pull/1336/head
Xiao Wang 2 years ago
parent beef62e7ab
commit 11060f84c5

@ -355,6 +355,8 @@ def main():
args.world_size = 1 args.world_size = 1
args.rank = 0 # global rank args.rank = 0 # global rank
if args.distributed: if args.distributed:
if 'LOCAL_RANK' in os.environ:
args.local_rank = int(os.getenv('LOCAL_RANK'))
args.device = 'cuda:%d' % args.local_rank args.device = 'cuda:%d' % args.local_rank
torch.cuda.set_device(args.local_rank) torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend='nccl', init_method='env://') torch.distributed.init_process_group(backend='nccl', init_method='env://')

Loading…
Cancel
Save