// For licensing see accompanying LICENSE.md file. // Copyright (C) 2022 Apple Inc. All Rights Reserved. import CoreML /// A scheduler used to compute a de-noised image /// /// This implementation matches: /// [Hugging Face Diffusers PNDMScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py) /// /// It uses the pseudo linear multi-step (PLMS) method only, skipping pseudo Runge-Kutta (PRK) steps @available(iOS 16.2, macOS 13.1, *) public final class Scheduler { /// Number of diffusion steps performed during training public let trainStepCount: Int /// Number of inference steps to be performed public let inferenceStepCount: Int /// Training diffusion time steps index by inference time step public let timeSteps: [Int] /// Schedule of betas which controls the amount of noise added at each timestep public let betas: [Float] /// 1 - betas let alphas: [Float] /// Cached cumulative product of alphas let alphasCumProd: [Float] /// Standard deviation of the initial noise distribution public let initNoiseSigma: Float // Internal state var counter: Int var ets: [MLShapedArray] var currentSample: MLShapedArray? /// Create a scheduler that uses a pseudo linear multi-step (PLMS) method /// /// - Parameters: /// - stepCount: Number of inference steps to schedule /// - trainStepCount: Number of training diffusion steps /// - betaSchedule: Method to schedule betas from betaStart to betaEnd /// - betaStart: The starting value of beta for inference /// - betaEnd: The end value for beta for inference /// - Returns: A scheduler ready for its first step public init( stepCount: Int = 50, trainStepCount: Int = 1000, betaSchedule: BetaSchedule = .scaledLinear, betaStart: Float = 0.00085, betaEnd: Float = 0.012 ) { self.trainStepCount = trainStepCount self.inferenceStepCount = stepCount switch betaSchedule { case .linear: self.betas = linspace(betaStart, betaEnd, trainStepCount) case .scaledLinear: self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 }) } self.alphas = betas.map({ 1.0 - $0 }) self.initNoiseSigma = 1.0 var alphasCumProd = self.alphas for i in 1.., timeStep t: Int, sample s: MLShapedArray ) -> MLShapedArray { var timeStep = t let stepInc = (trainStepCount / inferenceStepCount) var prevStep = timeStep - stepInc var modelOutput = output var sample = s if counter != 1 { if ets.count > 3 { ets = Array(ets[(ets.count - 3)..]) -> MLShapedArray { assert(weights.count > 1 && values.count == weights.count) assert(values.allSatisfy({$0.scalarCount == values.first!.scalarCount})) var w = Float(weights.first!) var scalars = values.first!.scalars.map({ $0 * w }) for next in 1 ..< values.count { w = Float(weights[next]) let nextScalars = values[next].scalars for i in 0 ..< scalars.count { scalars[i] += w * nextScalars[i] } } return MLShapedArray(scalars: scalars, shape: values.first!.shape) } /// Compute sample (denoised image) at previous step given a current time step /// /// - Parameters: /// - sample: The current input to the model x_t /// - timeStep: The current time step t /// - prevStep: The previous time step t−δ /// - modelOutput: Predicted noise residual the current time step e_θ(x_t, t) /// - Returns: Computes previous sample x_(t−δ) func previousSample( _ sample: MLShapedArray, _ timeStep: Int, _ prevStep: Int, _ modelOutput: MLShapedArray ) -> MLShapedArray { // Compute x_(t−δ) using formula (9) from // "Pseudo Numerical Methods for Diffusion Models on Manifolds", // Luping Liu, Yi Ren, Zhijie Lin & Zhou Zhao. // ICLR 2022 // // Notation: // // alphaProdt α_t // alphaProdtPrev α_(t−δ) // betaProdt (1 - α_t) // betaProdtPrev (1 - α_(t−δ)) let alphaProdt = alphasCumProd[timeStep] let alphaProdtPrev = alphasCumProd[max(0,prevStep)] let betaProdt = 1 - alphaProdt let betaProdtPrev = 1 - alphaProdtPrev // sampleCoeff = (α_(t−δ) - α_t) divided by // denominator of x_t in formula (9) and plus 1 // Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) = // sqrt(α_(t−δ)) / sqrt(α_t)) let sampleCoeff = sqrt(alphaProdtPrev / alphaProdt) // Denominator of e_θ(x_t, t) in formula (9) let modelOutputDenomCoeff = alphaProdt * sqrt(betaProdtPrev) + sqrt(alphaProdt * betaProdt * alphaProdtPrev) // full formula (9) let modelCoeff = -(alphaProdtPrev - alphaProdt)/modelOutputDenomCoeff let prevSample = weightedSum( [Double(sampleCoeff), Double(modelCoeff)], [sample, modelOutput] ) return prevSample } } @available(iOS 16.2, macOS 13.1, *) extension Scheduler { /// How to map a beta range to a sequence of betas to step over public enum BetaSchedule { /// Linear stepping between start and end case linear /// Steps using linspace(sqrt(start),sqrt(end))^2 case scaledLinear } } /// Evenly spaced floats between specified interval /// /// - Parameters: /// - start: Start of the interval /// - end: End of the interval /// - count: The number of floats to return between [*start*, *end*] /// - Returns: Float array with *count* elements evenly spaced between at *start* and *end* func linspace(_ start: Float, _ end: Float, _ count: Int) -> [Float] { let scale = (end - start) / Float(count - 1) return (0.. Element { return self[index(endIndex, offsetBy: -i)] } }