diff --git a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift index 1fbfff1..3e8342c 100644 --- a/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift +++ b/swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift @@ -23,12 +23,16 @@ public final class DPMSolverMultistepScheduler: Scheduler { public let betas: [Float] public let alphas: [Float] public let alphasCumProd: [Float] - public let timeSteps: [Int] + private let timeSteps: [Int] public let alpha_t: [Float] public let sigma_t: [Float] public let lambda_t: [Float] + public var allTimeSteps: [Int] { + timeSteps + } + public let solverOrder = 2 private(set) var lowerOrderStepped = 0 diff --git a/swift/StableDiffusion/pipeline/Scheduler.swift b/swift/StableDiffusion/pipeline/Scheduler.swift index 0bd9284..d0ed12e 100644 --- a/swift/StableDiffusion/pipeline/Scheduler.swift +++ b/swift/StableDiffusion/pipeline/Scheduler.swift @@ -10,9 +10,12 @@ public protocol Scheduler { /// Number of inference steps to be performed var inferenceStepCount: Int { get } + + /// Training diffusion time steps index by inference time step + var allTimeSteps: [Int] { get } /// Training diffusion time steps index by inference time step - var timeSteps: [Int] { get } + func calculateTimesteps(strength: Float?) -> [Int] /// Schedule of betas which controls the amount of noise added at each timestep var betas: [Float] { get } @@ -71,6 +74,35 @@ public extension Scheduler { } } +// MARK: - Image2Image + +@available(iOS 16.2, macOS 13.1, *) +public extension Scheduler { + + func calculateAlphasCumprod(strength: Float) -> AlphasCumprodCalculation { + AlphasCumprodCalculation( + alphasCumprod: alphasCumProd, + timesteps: trainStepCount, + steps: inferenceStepCount, + strength: strength) + } +} + +// MARK: - Timesteps + +@available(iOS 16.2, macOS 13.1, *) +public extension Scheduler { + + func calculateTimesteps(strength: Float?) -> [Int] { + guard let strength else { return allTimeSteps.reversed() } + let startStep = Int(Float(inferenceStepCount) * strength) + let acutalTimesteps = Array(allTimeSteps[0..] + let timestepStrength: Float? + + if let startingImage { + timestepStrength = strength + guard let encoder else { + throw Error.startingImageProvidedWithoutEncoder + } + let noiseTuples = generateImage2ImageLatentSamples(imageCount, stdev: 1, seed: seed) + latents = try noiseTuples.map({ + try encoder.encode( + image: startingImage, + diagonalNoise: $0.diagonal, + noise: $0.latentNoise, + alphasCumprodStep: scheduler[0].calculateAlphasCumprod(strength: strength)) + }) + } else { + timestepStrength = nil + // Generate random latent samples from specified seed + latents = generateLatentSamples(imageCount, stdev: stdev, seed: seed) + } // De-noising loop - for (step,t) in scheduler[0].timeSteps.enumerated() { + let timeSteps = scheduler[0].calculateTimesteps(strength: timestepStrength) + for (step,t) in timeSteps.enumerated() { // Expand the latents for classifier-free guidance // and input to the Unet noise prediction model @@ -215,6 +247,35 @@ public struct StableDiffusionPipeline: ResourceManaging { } return samples } + + + /// For image2image - + /// - Parameters: + /// - count: batch size + /// - stdev: 1 + /// - seed: seed provided + /// - diagonalAndLatentNoiseIsSame: Diffusions library does not seem to use the same noise for the `DiagonalGaussianDistribution` operation, + /// but I have seen implementations of pipelines where it is the same. + /// - Returns: An array of tuples of noise values with length of batch size. + func generateImage2ImageLatentSamples(_ count: Int, stdev: Float, seed: Int, diagonalAndLatentNoiseIsSame: Bool = false) -> [(diagonal: MLShapedArray, latentNoise: MLShapedArray)] { + var sampleShape = unet.latentSampleShape + sampleShape[0] = 1 + + var random = NumPyRandomSource(seed: UInt32(truncatingIfNeeded: seed)) + let samples = (0..( + converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev))) + return (noise, noise) + } else { + return (MLShapedArray( + converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev))), + MLShapedArray( + converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev)))) + } + } + return samples + } func toHiddenStates(_ embedding: MLShapedArray) -> MLShapedArray { // Unoptimized manual transpose [0, 2, None, 1]