diff --git a/timm/utils.py b/timm/utils.py index 1da69a96..8957d564 100644 --- a/timm/utils.py +++ b/timm/utils.py @@ -62,6 +62,12 @@ class CheckpointSaver: def save_checkpoint(self, model, optimizer, args, epoch, model_ema=None, metric=None, use_amp=False): assert epoch >= 0 + tmp_save_path = os.path.join(self.checkpoint_dir, 'tmp' + self.extension) + last_save_path = os.path.join(self.checkpoint_dir, 'last' + self.extension) + self._save(tmp_save_path, model, optimizer, args, epoch, model_ema, metric, use_amp) + if os.path.exists(last_save_path): + os.unlink(last_save_path) # required for Windows support. + os.rename(tmp_save_path, last_save_path) worst_file = self.checkpoint_files[-1] if self.checkpoint_files else None if (len(self.checkpoint_files) < self.max_history or metric is None or self.cmp(metric, worst_file[1])): @@ -69,7 +75,7 @@ class CheckpointSaver: self._cleanup_checkpoints(1) filename = '-'.join([self.save_prefix, str(epoch)]) + self.extension save_path = os.path.join(self.checkpoint_dir, filename) - self._save(save_path, model, optimizer, args, epoch, model_ema, metric, use_amp) + os.link(last_save_path, save_path) self.checkpoint_files.append((save_path, metric)) self.checkpoint_files = sorted( self.checkpoint_files, key=lambda x: x[1], @@ -83,7 +89,10 @@ class CheckpointSaver: if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)): self.best_epoch = epoch self.best_metric = metric - shutil.copyfile(save_path, os.path.join(self.checkpoint_dir, 'model_best' + self.extension)) + best_save_path = os.path.join(self.checkpoint_dir, 'model_best' + self.extension) + if os.path.exists(best_save_path): + os.unlink(best_save_path) + os.link(last_save_path, best_save_path) return (None, None) if self.best_metric is None else (self.best_metric, self.best_epoch)