Merge pull request #99 from andravin/save-last

Modified save_checkpoint to always save last checkpoint. Fixes #98.
pull/105/head
Ross Wightman 4 years ago committed by GitHub
commit 7deacf5477
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -62,6 +62,12 @@ class CheckpointSaver:
def save_checkpoint(self, model, optimizer, args, epoch, model_ema=None, metric=None, use_amp=False):
assert epoch >= 0
tmp_save_path = os.path.join(self.checkpoint_dir, 'tmp' + self.extension)
last_save_path = os.path.join(self.checkpoint_dir, 'last' + self.extension)
self._save(tmp_save_path, model, optimizer, args, epoch, model_ema, metric, use_amp)
if os.path.exists(last_save_path):
os.unlink(last_save_path) # required for Windows support.
os.rename(tmp_save_path, last_save_path)
worst_file = self.checkpoint_files[-1] if self.checkpoint_files else None
if (len(self.checkpoint_files) < self.max_history
or metric is None or self.cmp(metric, worst_file[1])):
@ -69,7 +75,7 @@ class CheckpointSaver:
self._cleanup_checkpoints(1)
filename = '-'.join([self.save_prefix, str(epoch)]) + self.extension
save_path = os.path.join(self.checkpoint_dir, filename)
self._save(save_path, model, optimizer, args, epoch, model_ema, metric, use_amp)
os.link(last_save_path, save_path)
self.checkpoint_files.append((save_path, metric))
self.checkpoint_files = sorted(
self.checkpoint_files, key=lambda x: x[1],
@ -83,7 +89,10 @@ class CheckpointSaver:
if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)):
self.best_epoch = epoch
self.best_metric = metric
shutil.copyfile(save_path, os.path.join(self.checkpoint_dir, 'model_best' + self.extension))
best_save_path = os.path.join(self.checkpoint_dir, 'model_best' + self.extension)
if os.path.exists(best_save_path):
os.unlink(best_save_path)
os.link(last_save_path, best_save_path)
return (None, None) if self.best_metric is None else (self.best_metric, self.best_epoch)

Loading…
Cancel
Save