#!/usr/bin/env python3 """ Checkpoint Cleaning Script Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, etc. and outputs a CPU tensor checkpoint with only the `state_dict` along with SHA256 calculation for model zoo compatibility. Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) """ import torch import argparse import os import hashlib import shutil import tempfile from timm.models import load_state_dict try: import safetensors.torch _has_safetensors = True except ImportError: _has_safetensors = False parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner') parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--output', default='', type=str, metavar='PATH', help='output path') parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true', help='use ema version of weights if present') parser.add_argument('--no-hash', dest='no_hash', action='store_true', help='no hash in output filename') parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true', help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint') parser.add_argument('--safetensors', action='store_true', help='Save weights using safetensors instead of the default torch way (pickle).') def main(): args = parser.parse_args() if os.path.exists(args.output): print("Error: Output filename ({}) already exists.".format(args.output)) exit(1) clean_checkpoint( args.checkpoint, args.output, not args.no_use_ema, args.no_hash, args.clean_aux_bn, safe_serialization=args.safetensors, ) def clean_checkpoint( checkpoint, output, use_ema=True, no_hash=False, clean_aux_bn=False, safe_serialization: bool=False, ): # Load an existing checkpoint to CPU, strip everything but the state_dict and re-save if checkpoint and os.path.isfile(checkpoint): print("=> Loading checkpoint '{}'".format(checkpoint)) state_dict = load_state_dict(checkpoint, use_ema=use_ema) new_state_dict = {} for k, v in state_dict.items(): if clean_aux_bn and 'aux_bn' in k: # If all aux_bn keys are removed, the SplitBN layers will end up as normal and # load with the unmodified model using BatchNorm2d. continue name = k[7:] if k.startswith('module.') else k new_state_dict[name] = v print("=> Loaded state_dict from '{}'".format(checkpoint)) ext = '' if output: checkpoint_root, checkpoint_base = os.path.split(output) checkpoint_base, ext = os.path.splitext(checkpoint_base) else: checkpoint_root = '' checkpoint_base = os.path.split(checkpoint)[1] checkpoint_base = os.path.splitext(checkpoint_base)[0] temp_filename = '__' + checkpoint_base if safe_serialization: assert _has_safetensors, "`pip install safetensors` to use .safetensors" safetensors.torch.save_file(new_state_dict, temp_filename) else: torch.save(new_state_dict, temp_filename) with open(temp_filename, 'rb') as f: sha_hash = hashlib.sha256(f.read()).hexdigest() if ext: final_ext = ext else: final_ext = ('.safetensors' if safe_serialization else '.pth') if no_hash: final_filename = checkpoint_base + final_ext else: final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + final_ext shutil.move(temp_filename, os.path.join(checkpoint_root, final_filename)) print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash)) return final_filename else: print("Error: Checkpoint ({}) doesn't exist".format(checkpoint)) return '' if __name__ == '__main__': main()