ml-stable-diffusion/swift/StableDiffusion/pipeline/Decoder.swift

// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML
import Accelerate

/// A decoder model which produces RGB images from latent samples
@available(iOS 16.2, macOS 13.1, *)
public struct Decoder: ResourceManaging {

    /// VAE decoder model
    var model: ManagedMLModel

    /// Create decoder from Core ML model
    ///
    /// - Parameters:
    ///     - url: Location of compiled VAE decoder Core ML model
    ///     - configuration: configuration to be used when the model is loaded
    /// - Returns: A decoder that will lazily load its required resources when needed or requested
    public init(modelAt url: URL, configuration: MLModelConfiguration) {
        self.model = ManagedMLModel(modelAt: url, configuration: configuration)
    }

    /// Ensure the model has been loaded into memory
    public func loadResources() throws {
        try model.loadResources()
    }

    /// Unload the underlying model to free up memory
    public func unloadResources() {
       model.unloadResources()
    }

    /// Batch decode latent samples into images
    ///
    ///  - Parameters:
    ///    - latents: Batch of latent samples to decode
    ///  - Returns: decoded images
    public func decode(_ latents: [MLShapedArray<Float32>]) throws -> [CGImage] {

        // Form batch inputs for model
        let inputs: [MLFeatureProvider] = try latents.map { sample in
            // Reference pipeline scales the latent samples before decoding
            let sampleScaled = MLShapedArray<Float32>(
                scalars: sample.scalars.map { $0 / 0.18215 },
                shape: sample.shape)

            let dict = [inputName: MLMultiArray(sampleScaled)]
            return try MLDictionaryFeatureProvider(dictionary: dict)
        }
        let batch = MLArrayBatchProvider(array: inputs)

        // Batch predict with model
        let results = try model.perform { model in
            try model.predictions(fromBatch: batch)
        }

        // Transform the outputs to CGImages
        let images: [CGImage] = (0..<results.count).map { i in
            let result = results.features(at: i)
            let outputName = result.featureNames.first!
            let output = result.featureValue(for: outputName)!.multiArrayValue!

            return toRGBCGImage(MLShapedArray<Float32>(output))
        }

        return images
    }

    var inputName: String {
        try! model.perform { model in
            model.modelDescription.inputDescriptionsByName.first!.key
        }
    }

    typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>
    typealias PixelBufferP8x3 = vImage.PixelBuffer<vImage.Planar8x3>
    typealias PixelBufferIFx3 = vImage.PixelBuffer<vImage.InterleavedFx3>
    typealias PixelBufferI8x3 = vImage.PixelBuffer<vImage.Interleaved8x3>

    func toRGBCGImage(_ array: MLShapedArray<Float32>) -> CGImage {

        // array is [N,C,H,W], where C==3
        let channelCount = array.shape[1]
        assert(channelCount == 3,
               "Decoding model output has \(channelCount) channels, expected 3")
        let height = array.shape[2]
        let width = array.shape[3]

        // Normalize each channel into a float between 0 and 1.0
        let floatChannels = (0..<channelCount).map { i in

            // Normalized channel output
            let cOut = PixelBufferPFx1(width: width, height:height)

            // Reference this channel in the array and normalize
            array[0][i].withUnsafeShapedBufferPointer { ptr, _, strides in
                let cIn = PixelBufferPFx1(data: .init(mutating: ptr.baseAddress!),
                                          width: width, height: height,
                                          byteCountPerRow: strides[0]*4)
                // Map [-1.0 1.0] -> [0.0 1.0]
                cIn.multiply(by: 0.5, preBias: 1.0, postBias: 0.0, destination: cOut)
            }
            return cOut
        }

        // Convert to interleaved and then to UInt8
        let floatImage = PixelBufferIFx3(planarBuffers: floatChannels)
        let uint8Image = PixelBufferI8x3(width: width, height: height)
        floatImage.convert(to:uint8Image) // maps [0.0 1.0] -> [0 255] and clips

        // Convert to uint8x3 to RGB CGImage (no alpha)
        let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.none.rawValue)
        let cgImage = uint8Image.makeCGImage(cgImageFormat:
                .init(bitsPerComponent: 8,
                      bitsPerPixel: 3*8,
                      colorSpace: CGColorSpaceCreateDeviceRGB(),
                      bitmapInfo: bitmapInfo)!)!

        return cgImage
    }
}
Initial commit Co-authored-by: aseemw <aseem.elec@gmail.com> Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`// For licensing see accompanying LICENSE.md file.`
			`// Copyright (C) 2022 Apple Inc. All Rights Reserved.`

			`import Foundation`
			`import CoreML`
			`import Accelerate`

			`/// A decoder model which produces RGB images from latent samples`
Add Availability Annotations (#18) 2 years ago			`@available(iOS 16.2, macOS 13.1, *)`
README improvements and reduceMemory option in Swift Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`public struct Decoder: ResourceManaging {`
Initial commit Co-authored-by: aseemw <aseem.elec@gmail.com> Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago
			`/// VAE decoder model`
README improvements and reduceMemory option in Swift Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`var model: ManagedMLModel`
Initial commit Co-authored-by: aseemw <aseem.elec@gmail.com> Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago
			`/// Create decoder from Core ML model`
			`///`
README improvements and reduceMemory option in Swift Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`/// - Parameters:`
			`/// - url: Location of compiled VAE decoder Core ML model`
			`/// - configuration: configuration to be used when the model is loaded`
			`/// - Returns: A decoder that will lazily load its required resources when needed or requested`
			`public init(modelAt url: URL, configuration: MLModelConfiguration) {`
			`self.model = ManagedMLModel(modelAt: url, configuration: configuration)`
Initial commit Co-authored-by: aseemw <aseem.elec@gmail.com> Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`}`

README improvements and reduceMemory option in Swift Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`/// Ensure the model has been loaded into memory`
			`public func loadResources() throws {`
			`try model.loadResources()`
			`}`

			`/// Unload the underlying model to free up memory`
			`public func unloadResources() {`
			`model.unloadResources()`
			`}`
Initial commit Co-authored-by: aseemw <aseem.elec@gmail.com> Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago
			`/// Batch decode latent samples into images`
			`///`
			`/// - Parameters:`
			`/// - latents: Batch of latent samples to decode`
			`/// - Returns: decoded images`
			`public func decode(_ latents: [MLShapedArray<Float32>]) throws -> [CGImage] {`

			`// Form batch inputs for model`
			`let inputs: [MLFeatureProvider] = try latents.map { sample in`
			`// Reference pipeline scales the latent samples before decoding`
			`let sampleScaled = MLShapedArray<Float32>(`
			`scalars: sample.scalars.map { $0 / 0.18215 },`
			`shape: sample.shape)`

			`let dict = [inputName: MLMultiArray(sampleScaled)]`
			`return try MLDictionaryFeatureProvider(dictionary: dict)`
			`}`
			`let batch = MLArrayBatchProvider(array: inputs)`

			`// Batch predict with model`
README improvements and reduceMemory option in Swift Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`let results = try model.perform { model in`
			`try model.predictions(fromBatch: batch)`
			`}`
Initial commit Co-authored-by: aseemw <aseem.elec@gmail.com> Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago
			`// Transform the outputs to CGImages`
			`let images: [CGImage] = (0..<results.count).map { i in`
			`let result = results.features(at: i)`
			`let outputName = result.featureNames.first!`
			`let output = result.featureValue(for: outputName)!.multiArrayValue!`

			`return toRGBCGImage(MLShapedArray<Float32>(output))`
			`}`

			`return images`
			`}`

			`var inputName: String {`
README improvements and reduceMemory option in Swift Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`try! model.perform { model in`
			`model.modelDescription.inputDescriptionsByName.first!.key`
			`}`
Initial commit Co-authored-by: aseemw <aseem.elec@gmail.com> Co-authored-by: msiracusa <msiracusa+github@gmail.com> 2 years ago			`}`

			`typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>`
			`typealias PixelBufferP8x3 = vImage.PixelBuffer<vImage.Planar8x3>`
			`typealias PixelBufferIFx3 = vImage.PixelBuffer<vImage.InterleavedFx3>`
			`typealias PixelBufferI8x3 = vImage.PixelBuffer<vImage.Interleaved8x3>`

			`func toRGBCGImage(_ array: MLShapedArray<Float32>) -> CGImage {`

			`// array is [N,C,H,W], where C==3`
			`let channelCount = array.shape[1]`
			`assert(channelCount == 3,`
			`"Decoding model output has \(channelCount) channels, expected 3")`
			`let height = array.shape[2]`
			`let width = array.shape[3]`

			`// Normalize each channel into a float between 0 and 1.0`
			`let floatChannels = (0..<channelCount).map { i in`

			`// Normalized channel output`
			`let cOut = PixelBufferPFx1(width: width, height:height)`

			`// Reference this channel in the array and normalize`
			`array[0][i].withUnsafeShapedBufferPointer { ptr, _, strides in`
			`let cIn = PixelBufferPFx1(data: .init(mutating: ptr.baseAddress!),`
			`width: width, height: height,`
			`byteCountPerRow: strides[0]*4)`
			`// Map [-1.0 1.0] -> [0.0 1.0]`
			`cIn.multiply(by: 0.5, preBias: 1.0, postBias: 0.0, destination: cOut)`
			`}`
			`return cOut`
			`}`

			`// Convert to interleaved and then to UInt8`
			`let floatImage = PixelBufferIFx3(planarBuffers: floatChannels)`
			`let uint8Image = PixelBufferI8x3(width: width, height: height)`
			`floatImage.convert(to:uint8Image) // maps [0.0 1.0] -> [0 255] and clips`

			`// Convert to uint8x3 to RGB CGImage (no alpha)`
			`let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.none.rawValue)`
			`let cgImage = uint8Image.makeCGImage(cgImageFormat:`
			`.init(bitsPerComponent: 8,`
			`bitsPerPixel: 3*8,`
			`colorSpace: CGColorSpaceCreateDeviceRGB(),`
			`bitmapInfo: bitmapInfo)!)!`

			`return cgImage`
			`}`
			`}`