|
|
@ -723,13 +723,19 @@ default_cfgs = generate_default_cfgs({
|
|
|
|
|
|
|
|
|
|
|
|
# CLIP weights, fine-tuned on in1k or in12k + in1k
|
|
|
|
# CLIP weights, fine-tuned on in1k or in12k + in1k
|
|
|
|
'convnext_base.clip_laion2b_augreg_ft_in1k': _cfg(
|
|
|
|
'convnext_base.clip_laion2b_augreg_ft_in1k': _cfg(
|
|
|
|
# hf_hub_id='timm/',
|
|
|
|
hf_hub_id='timm/',
|
|
|
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
|
|
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
|
|
|
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0),
|
|
|
|
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0),
|
|
|
|
'convnext_base.clip_laiona_augreg_ft_in1k_384': _cfg(
|
|
|
|
'convnext_base.clip_laiona_augreg_ft_in1k_384': _cfg(
|
|
|
|
# hf_hub_id='timm/',
|
|
|
|
hf_hub_id='timm/',
|
|
|
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
|
|
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
|
|
|
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0),
|
|
|
|
input_size=(3, 384, 384), pool_size=(12, 12), crop_pct=1.0),
|
|
|
|
|
|
|
|
'convnext_large_mlp.clip_laion2b_augreg_ft_in1k': _cfg(
|
|
|
|
|
|
|
|
hf_hub_id='timm/',
|
|
|
|
|
|
|
|
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
|
|
|
|
|
|
|
|
input_size=(3, 256, 256), pool_size=(8, 8), crop_pct=1.0
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# CLIP based weights, original image tower weights and fine-tunes
|
|
|
|
# CLIP based weights, original image tower weights and fine-tunes
|
|
|
|
'convnext_base.clip_laion2b': _cfg(
|
|
|
|
'convnext_base.clip_laion2b': _cfg(
|
|
|
|