@ -45,7 +45,8 @@ def _get_coreml_inputs(sample_inputs, args):
# Simpler version of `DiagonalGaussianDistribution` with only needed calculations
# as implemented in vae.py as part of the AutoencoderKL class
# This is because coreml tools does not support the `randn` operation, so we pass in a random tensor.
# https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vae.py#L312
# coremltools-6.1 does not yet implement the randn operation with the option of setting a random seed
class CoreMLDiagonalGaussianDistribution ( object ) :
def __init__ ( self , parameters , noise ) :
self . parameters = parameters
@ -55,10 +56,7 @@ class CoreMLDiagonalGaussianDistribution(object):
self . std = torch . exp ( 0.5 * self . logvar )
def sample ( self ) - > torch . FloatTensor :
device = self . parameters . device
# make sure sample is on the same device as the parameters and has same dtype
sample = self . noise . to ( device = device , dtype = self . parameters . dtype )
x = self . mean + self . std * sample
x = self . mean + self . std * self . noise
return x
def compute_psnr ( a , b ) :
@ -489,8 +487,8 @@ def convert_vae_encoder(pipe, args):
sample_shape = (
1 , # B
3 , # C (RGB range from -1 to 1)
args . latent_h or pipe . unet . config . sample_size * 8 , # H
args . latent_w or pipe . unet . config . sample_size * 8 , # w
( args . latent_h or pipe . unet . config . sample_size ) * 8 , # H
( args . latent_w or pipe . unet . config . sample_size ) * 8 , # w
)
noise_shape = (
@ -505,15 +503,15 @@ def convert_vae_encoder(pipe, args):
1 ,
)
sqrt AlphasCumprodTorchS hape = torch . tensor ( [ [ 0.2 , ] ] )
sqrt OneMinusAlphasCumprodTorchS hape = torch . tensor ( [ [ 0.8 , ] ] )
sqrt _alphas_cumprod_torch_s hape = torch . tensor ( [ [ 0.2 , ] ] )
sqrt _one_minus_alphas_cumprod_torch_s hape = torch . tensor ( [ [ 0.8 , ] ] )
sample_vae_encoder_inputs = {
" sample " : torch . rand ( * sample_shape , dtype = torch . float16 ) ,
" diagonal N oise" : torch . rand ( * noise_shape , dtype = torch . float16 ) ,
" diagonal _n oise" : torch . rand ( * noise_shape , dtype = torch . float16 ) ,
" noise " : torch . rand ( * noise_shape , dtype = torch . float16 ) ,
" sqrt AlphasC umprod" : torch . rand ( * float_value_shape , dtype = torch . float16 ) ,
" sqrt OneMinusAlphasC umprod" : torch . rand ( * float_value_shape , dtype = torch . float16 ) ,
" sqrt _alphas_c umprod" : torch . rand ( * float_value_shape , dtype = torch . float16 ) ,
" sqrt _one_minus_alphas_c umprod" : torch . rand ( * float_value_shape , dtype = torch . float16 ) ,
}
class VAEEncoder ( nn . Module ) :
@ -528,31 +526,27 @@ def convert_vae_encoder(pipe, args):
# Because CoreMLTools does not support the torch.randn op, we pass in both
# the diagonal Noise for the `DiagonalGaussianDistribution` operation and
# the noise tensor combined with precalculated `sqrt AlphasCumprod` and `sqrtOneMinusAlphasC umprod`
# the noise tensor combined with precalculated `sqrt _alphas_cumprod` and `sqrt_one_minus_alphas_c umprod`
# for faster computation.
def forward ( self , sample , diagonal Noise, noise , sqrtAlphasCumprod , sqrtOneMinusAlphasC umprod) :
def forward ( self , sample , diagonal _noise, noise , sqrt_alphas_cumprod , sqrt_one_minus_alphas_c umprod) :
h = self . encoder ( sample )
moments = self . quant_conv ( h )
diagonalNoise = diagonalNoise . to ( sample . device )
posterior = CoreMLDiagonalGaussianDistribution ( moments , diagonalNoise )
posterior = CoreMLDiagonalGaussianDistribution ( moments , diagonal_noise )
posteriorSample = posterior . sample ( )
# Add the scaling operation and the latent noise for faster computation
init_latents = 0.18215 * posteriorSample
result = self . add_noise ( init_latents , noise , sqrt AlphasCumprod, sqrtOneMinusAlphasC umprod)
result = self . add_noise ( init_latents , noise , sqrt _alphas_cumprod, sqrt_one_minus_alphas_c umprod)
return result
def add_noise (
self ,
original_samples : torch . FloatTensor ,
noise : torch . FloatTensor ,
sqrt AlphasC umprod: torch . FloatTensor ,
sqrt OneMinusAlphasC umprod: torch . FloatTensor
sqrt _alphas_c umprod: torch . FloatTensor ,
sqrt _one_minus_alphas_c umprod: torch . FloatTensor
) - > torch . FloatTensor :
noise = noise . to ( original_samples . device )
sqrtAlphasCumprod = sqrtAlphasCumprod . to ( original_samples . device )
sqrtOneMinusAlphasCumprod = sqrtOneMinusAlphasCumprod . to ( original_samples . device )
noisy_samples = sqrtAlphasCumprod * original_samples + sqrtOneMinusAlphasCumprod * noise
noisy_samples = sqrt_alphas_cumprod * original_samples + sqrt_one_minus_alphas_cumprod * noise
return noisy_samples
@ -562,10 +556,10 @@ def convert_vae_encoder(pipe, args):
traced_vae_encoder = torch . jit . trace (
baseline_encoder , (
sample_vae_encoder_inputs [ " sample " ] . to ( torch . float32 ) ,
sample_vae_encoder_inputs [ " diagonal N oise" ] . to ( torch . float32 ) ,
sample_vae_encoder_inputs [ " diagonal _n oise" ] . to ( torch . float32 ) ,
sample_vae_encoder_inputs [ " noise " ] . to ( torch . float32 ) ,
sqrt AlphasCumprodTorchS hape. to ( torch . float32 ) ,
sqrt OneMinusAlphasCumprodTorchS hape. to ( torch . float32 )
sqrt _alphas_cumprod_torch_s hape. to ( torch . float32 ) ,
sqrt _one_minus_alphas_cumprod_torch_s hape. to ( torch . float32 )
) )
modify_coremltools_torch_frontend_badbmm ( )
@ -584,18 +578,18 @@ def convert_vae_encoder(pipe, args):
# Set the input descriptions
coreml_vae_encoder . input_description [ " sample " ] = \
" An image of the correct size to create the latent space with, image2image and in-painting. "
coreml_vae_encoder . input_description [ " diagonal N oise" ] = \
coreml_vae_encoder . input_description [ " diagonal _n oise" ] = \
" Latent noise for `DiagonalGaussianDistribution` operation. "
coreml_vae_encoder . input_description [ " noise " ] = \
" Latent noise for use with strength parameter of image2image "
coreml_vae_encoder . input_description [ " sqrt AlphasC umprod" ] = \
" Precalculated `sqrt AlphasC umprod` value based on strength and the current schedular' s alphasCumprod values "
coreml_vae_encoder . input_description [ " sqrt OneMinusAlphasC umprod" ] = \
" Precalculated `sqrt OneMinusAlphasC umprod` value based on strength and the current schedular' s alphasCumprod values "
coreml_vae_encoder . input_description [ " sqrt _alphas_c umprod" ] = \
" Precalculated `sqrt _alphas_c umprod` value based on strength and the current schedular' s alphasCumprod values "
coreml_vae_encoder . input_description [ " sqrt _one_minus_alphas_c umprod" ] = \
" Precalculated `sqrt _one_minus_alphas_c umprod` value based on strength and the current schedular' s alphasCumprod values "
# Set the output descriptions
coreml_vae_encoder . output_description [
" latent_dist " ] = " The latent embeddings from the unet model from the input image for image2image ."
" latent_dist " ] = " The latent embeddings from the unet model from the input image ."
_save_mlpackage ( coreml_vae_encoder , out_path )
@ -605,20 +599,20 @@ def convert_vae_encoder(pipe, args):
if args . check_output_correctness :
baseline_out = baseline_encoder (
sample = sample_vae_encoder_inputs [ " sample " ] . to ( torch . float32 ) ,
diagonal Noise= sample_vae_encoder_inputs [ " diagonalN oise" ] . to ( torch . float32 ) ,
diagonal _noise= sample_vae_encoder_inputs [ " diagonal_n oise" ] . to ( torch . float32 ) ,
noise = sample_vae_encoder_inputs [ " noise " ] . to ( torch . float32 ) ,
sqrt AlphasCumprod= sqrtAlphasCumprodTorchS hape,
sqrt OneMinusAlphasCumprod= sqrtOneMinusAlphasCumprodTorchS hape,
sqrt _alphas_cumprod= sqrt_alphas_cumprod_torch_s hape,
sqrt _one_minus_alphas_cumprod= sqrt_one_minus_alphas_cumprod_torch_s hape,
) . numpy ( ) ,
coreml_out = list (
coreml_vae_encoder . predict (
{
" sample " : sample_vae_encoder_inputs [ " sample " ] . numpy ( ) ,
" diagonal N oise" : sample_vae_encoder_inputs [ " diagonal N oise" ] . numpy ( ) ,
" diagonal _n oise" : sample_vae_encoder_inputs [ " diagonal _n oise" ] . numpy ( ) ,
" noise " : sample_vae_encoder_inputs [ " noise " ] . numpy ( ) ,
" sqrt AlphasCumprod" : sqrtAlphasCumprodTorchS hape. numpy ( ) ,
" sqrt OneMinusAlphasCumprod" : sqrtOneMinusAlphasCumprodTorchS hape. numpy ( )
" sqrt _alphas_cumprod" : sqrt_alphas_cumprod_torch_s hape. numpy ( ) ,
" sqrt _one_minus_alphas_cumprod" : sqrt_one_minus_alphas_cumprod_torch_s hape. numpy ( )
} ) . values ( ) )
report_correctness ( baseline_out [ 0 ] , coreml_out [ 0 ] ,