pytorch-image-models/clean_checkpoint.py

#!/usr/bin/env python3
""" Checkpoint Cleaning Script

Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, etc.
and outputs a CPU  tensor checkpoint with only the `state_dict` along with SHA256
calculation for model zoo compatibility.

Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
"""
import torch
import argparse
import os
import hashlib
import shutil
import tempfile
from timm.models import load_state_dict
try:
    import safetensors.torch
    _has_safetensors = True
except ImportError:
    _has_safetensors = False

parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner')
parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('--output', default='', type=str, metavar='PATH',
                    help='output path')
parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',
                    help='use ema version of weights if present')
parser.add_argument('--no-hash', dest='no_hash', action='store_true',
                    help='no hash in output filename')
parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true',
                    help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint')
parser.add_argument('--safetensors', action='store_true',
                    help='Save weights using safetensors instead of the default torch way (pickle).')


def main():
    args = parser.parse_args()

    if os.path.exists(args.output):
        print("Error: Output filename ({}) already exists.".format(args.output))
        exit(1)

    clean_checkpoint(
        args.checkpoint,
        args.output,
        not args.no_use_ema,
        args.no_hash,
        args.clean_aux_bn,
        safe_serialization=args.safetensors,
    )


def clean_checkpoint(
        checkpoint,
        output,
        use_ema=True,
        no_hash=False,
        clean_aux_bn=False,
        safe_serialization: bool=False,
):
    # Load an existing checkpoint to CPU, strip everything but the state_dict and re-save
    if checkpoint and os.path.isfile(checkpoint):
        print("=> Loading checkpoint '{}'".format(checkpoint))
        state_dict = load_state_dict(checkpoint, use_ema=use_ema)
        new_state_dict = {}
        for k, v in state_dict.items():
            if clean_aux_bn and 'aux_bn' in k:
                # If all aux_bn keys are removed, the SplitBN layers will end up as normal and
                # load with the unmodified model using BatchNorm2d.
                continue
            name = k[7:] if k.startswith('module.') else k
            new_state_dict[name] = v
        print("=> Loaded state_dict from '{}'".format(checkpoint))

        ext = ''
        if output:
            checkpoint_root, checkpoint_base = os.path.split(output)
            checkpoint_base, ext = os.path.splitext(checkpoint_base)
        else:
            checkpoint_root = ''
            checkpoint_base = os.path.split(checkpoint)[1]
            checkpoint_base = os.path.splitext(checkpoint_base)[0]

        temp_filename = '__' + checkpoint_base
        if safe_serialization:
            assert _has_safetensors, "`pip install safetensors` to use .safetensors"
            safetensors.torch.save_file(new_state_dict, temp_filename)
        else:
            torch.save(new_state_dict, temp_filename)

        with open(temp_filename, 'rb') as f:
            sha_hash = hashlib.sha256(f.read()).hexdigest()

        if ext:
            final_ext = ext
        else:
            final_ext = ('.safetensors' if safe_serialization else '.pth')

        if no_hash:
            final_filename = checkpoint_base + final_ext
        else:
            final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + final_ext

        shutil.move(temp_filename, os.path.join(checkpoint_root, final_filename))
        print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))
        return final_filename
    else:
        print("Error: Checkpoint ({}) doesn't exist".format(checkpoint))
        return ''


if __name__ == '__main__':
    main()
Change the Python interpreter to Python 3.x in the scripts 4 years ago			`#!/usr/bin/env python3`
Add checkpoint averaging script. Add headers, shebangs, exec perms to all scripts 5 years ago			`""" Checkpoint Cleaning Script`

			`Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, etc.`
			and outputs a CPU tensor checkpoint with only the `state_dict` along with SHA256
			`calculation for model zoo compatibility.`

Fix some attributions, add copyrights to some file docstrings 4 years ago			`Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)`
Add checkpoint averaging script. Add headers, shebangs, exec perms to all scripts 5 years ago			`"""`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`import torch`
			`import argparse`
			`import os`
			`import hashlib`
Clean checkpoint renames pth w/ SHA hash in a torch hub compatible way 5 years ago			`import shutil`
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`import tempfile`
Major module / path restructure, timm.models.layers -> timm.layers, add _ prefix to all non model modules in timm.models 2 years ago			`from timm.models import load_state_dict`
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`try:`
			`import safetensors.torch`
			`_has_safetensors = True`
			`except ImportError:`
			`_has_safetensors = False`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago
Add checkpoint averaging script. Add headers, shebangs, exec perms to all scripts 5 years ago			`parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner')`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',`
			`help='path to latest checkpoint (default: none)')`
Finish with HRNet, weights and models updated. Improve consistency in model classifier/global pool treatment. 5 years ago			`parser.add_argument('--output', default='', type=str, metavar='PATH',`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`help='output path')`
Checkpoint clean fn useable stand alone 3 years ago			`parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',`
Add exponential moving average for model weights + few other additions and cleanup * ModelEma class added to track an EMA set of weights for the model being trained * EMA handling added to train, validation and clean_checkpoint scripts * Add multi checkpoint or multi-model validation support to validate.py * Add syncbn option (APEX) to train script for experimentation * Cleanup interface of CheckpointSaver while adding ema functionality 6 years ago			`help='use ema version of weights if present')`
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`parser.add_argument('--no-hash', dest='no_hash', action='store_true',`
			`help='no hash in output filename')`
Add support to clean_checkpoint.py to remove aux_bn weights/biases from SplitBatchNorm 5 years ago			`parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true',`
			`help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint')`
Add support to load safetensors weights 2 years ago			`parser.add_argument('--safetensors', action='store_true',`
			`help='Save weights using safetensors instead of the default torch way (pickle).')`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago
Finish with HRNet, weights and models updated. Improve consistency in model classifier/global pool treatment. 5 years ago
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`def main():`
			`args = parser.parse_args()`

			`if os.path.exists(args.output):`
			`print("Error: Output filename ({}) already exists.".format(args.output))`
			`exit(1)`

Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`clean_checkpoint(`
			`args.checkpoint,`
			`args.output,`
			`not args.no_use_ema,`
			`args.no_hash,`
			`args.clean_aux_bn,`
			`safe_serialization=args.safetensors,`
			`)`
Checkpoint clean fn useable stand alone 3 years ago

Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`def clean_checkpoint(`
			`checkpoint,`
			`output,`
			`use_ema=True,`
			`no_hash=False,`
			`clean_aux_bn=False,`
			`safe_serialization: bool=False,`
			`):`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`# Load an existing checkpoint to CPU, strip everything but the state_dict and re-save`
Checkpoint clean fn useable stand alone 3 years ago			`if checkpoint and os.path.isfile(checkpoint):`
			`print("=> Loading checkpoint '{}'".format(checkpoint))`
			`state_dict = load_state_dict(checkpoint, use_ema=use_ema)`
Clean a1/a2/3 rsb _0 checkpoints properly, fix v2 loading. 3 years ago			`new_state_dict = {}`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`for k, v in state_dict.items():`
Checkpoint clean fn useable stand alone 3 years ago			`if clean_aux_bn and 'aux_bn' in k:`
Add support to clean_checkpoint.py to remove aux_bn weights/biases from SplitBatchNorm 5 years ago			`# If all aux_bn keys are removed, the SplitBN layers will end up as normal and`
			`# load with the unmodified model using BatchNorm2d.`
			`continue`
Transitioning default_cfg -> pretrained_cfg. Improving handling of pretrained_cfg source (HF-Hub, files, timm config, etc). Checkpoint handling tweaks. 3 years ago			`name = k[7:] if k.startswith('module.') else k`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`new_state_dict[name] = v`
Checkpoint clean fn useable stand alone 3 years ago			`print("=> Loaded state_dict from '{}'".format(checkpoint))`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`ext = ''`
			`if output:`
			`checkpoint_root, checkpoint_base = os.path.split(output)`
			`checkpoint_base, ext = os.path.splitext(checkpoint_base)`
			`else:`
			`checkpoint_root = ''`
			`checkpoint_base = os.path.split(checkpoint)[1]`
			`checkpoint_base = os.path.splitext(checkpoint_base)[0]`

			`temp_filename = '__' + checkpoint_base`
Add support to load safetensors weights 2 years ago			`if safe_serialization:`
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			assert _has_safetensors, "`pip install safetensors` to use .safetensors"
			`safetensors.torch.save_file(new_state_dict, temp_filename)`
Add support to load safetensors weights 2 years ago			`else:`
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`torch.save(new_state_dict, temp_filename)`
Default to old checkpoint format for now, still want compatibility with older torch ver for released models 4 years ago
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`with open(temp_filename, 'rb') as f:`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`sha_hash = hashlib.sha256(f.read()).hexdigest()`
Clean checkpoint renames pth w/ SHA hash in a torch hub compatible way 5 years ago
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`if ext:`
			`final_ext = ext`
Finish with HRNet, weights and models updated. Improve consistency in model classifier/global pool treatment. 5 years ago			`else:`
Make safetensor import option for now. Improve avg/clean checkpoints ext handling a bit (more consistent). 2 years ago			`final_ext = ('.safetensors' if safe_serialization else '.pth')`

			`if no_hash:`
			`final_filename = checkpoint_base + final_ext`
			`else:`
			`final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + final_ext`

			`shutil.move(temp_filename, os.path.join(checkpoint_root, final_filename))`
Clean checkpoint renames pth w/ SHA hash in a torch hub compatible way 5 years ago			`print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))`
Checkpoint clean fn useable stand alone 3 years ago			`return final_filename`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago			`else:`
Checkpoint clean fn useable stand alone 3 years ago			`print("Error: Checkpoint ({}) doesn't exist".format(checkpoint))`
			`return ''`
Add checkpoint clean script, add link to pretrained resnext50 weights 6 years ago

			`if __name__ == '__main__':`
			`main()`