Scheduler and pipeline

pull/73/head
Timothy Kautz 1 year ago
parent 41d4637350
commit effc166c62

@ -23,12 +23,16 @@ public final class DPMSolverMultistepScheduler: Scheduler {
public let betas: [Float]
public let alphas: [Float]
public let alphasCumProd: [Float]
public let timeSteps: [Int]
private let timeSteps: [Int]
public let alpha_t: [Float]
public let sigma_t: [Float]
public let lambda_t: [Float]
public var allTimeSteps: [Int] {
timeSteps
}
public let solverOrder = 2
private(set) var lowerOrderStepped = 0

@ -10,9 +10,12 @@ public protocol Scheduler {
/// Number of inference steps to be performed
var inferenceStepCount: Int { get }
/// Training diffusion time steps index by inference time step
var allTimeSteps: [Int] { get }
/// Training diffusion time steps index by inference time step
var timeSteps: [Int] { get }
func calculateTimesteps(strength: Float?) -> [Int]
/// Schedule of betas which controls the amount of noise added at each timestep
var betas: [Float] { get }
@ -71,6 +74,35 @@ public extension Scheduler {
}
}
// MARK: - Image2Image
@available(iOS 16.2, macOS 13.1, *)
public extension Scheduler {
func calculateAlphasCumprod(strength: Float) -> AlphasCumprodCalculation {
AlphasCumprodCalculation(
alphasCumprod: alphasCumProd,
timesteps: trainStepCount,
steps: inferenceStepCount,
strength: strength)
}
}
// MARK: - Timesteps
@available(iOS 16.2, macOS 13.1, *)
public extension Scheduler {
func calculateTimesteps(strength: Float?) -> [Int] {
guard let strength else { return allTimeSteps.reversed() }
let startStep = Int(Float(inferenceStepCount) * strength)
let acutalTimesteps = Array(allTimeSteps[0..<startStep].reversed())
return acutalTimesteps
}
}
// MARK: - BetaSchedule
/// How to map a beta range to a sequence of betas to step over
@available(iOS 16.2, macOS 13.1, *)
public enum BetaSchedule {
@ -80,6 +112,7 @@ public enum BetaSchedule {
case scaledLinear
}
// MARK: - PNDMScheduler
/// A scheduler used to compute a de-noised image
///
@ -94,7 +127,11 @@ public final class PNDMScheduler: Scheduler {
public let betas: [Float]
public let alphas: [Float]
public let alphasCumProd: [Float]
public let timeSteps: [Int]
private let timeSteps: [Int]
public var allTimeSteps: [Int] {
timeSteps
}
// Internal state
var counter: Int

@ -14,6 +14,7 @@ public extension StableDiffusionPipeline {
public let unetChunk1URL: URL
public let unetChunk2URL: URL
public let decoderURL: URL
public let encoderURL: URL
public let safetyCheckerURL: URL
public let vocabURL: URL
public let mergesURL: URL
@ -24,6 +25,7 @@ public extension StableDiffusionPipeline {
unetChunk1URL = baseURL.appending(path: "UnetChunk1.mlmodelc")
unetChunk2URL = baseURL.appending(path: "UnetChunk2.mlmodelc")
decoderURL = baseURL.appending(path: "VAEDecoder.mlmodelc")
encoderURL = baseURL.appending(path: "VAEEncoder.mlmodelc")
safetyCheckerURL = baseURL.appending(path: "SafetyChecker.mlmodelc")
vocabURL = baseURL.appending(path: "vocab.json")
mergesURL = baseURL.appending(path: "merges.txt")
@ -74,11 +76,22 @@ public extension StableDiffusionPipeline {
FileManager.default.fileExists(atPath: urls.safetyCheckerURL.path) {
safetyChecker = SafetyChecker(modelAt: urls.safetyCheckerURL, configuration: config)
}
// Optional Image Encoder
let encoder: Encoder?
if
let encoderModel = try? MLModel(contentsOf: urls.encoderURL, configuration: config)
{
encoder = Encoder(model: encoderModel)
} else {
encoder = nil
}
// Construct pipeline
self.init(textEncoder: textEncoder,
unet: unet,
decoder: decoder,
encoder: encoder,
safetyChecker: safetyChecker,
reduceMemory: reduceMemory)
}

@ -20,6 +20,10 @@ public enum StableDiffusionScheduler {
/// [Hugging Face Diffusers Pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py)
@available(iOS 16.2, macOS 13.1, *)
public struct StableDiffusionPipeline: ResourceManaging {
public enum Error: String, Swift.Error {
case startingImageProvidedWithoutEncoder
}
/// Model to generate embeddings for tokenized input text
var textEncoder: TextEncoder
@ -29,6 +33,9 @@ public struct StableDiffusionPipeline: ResourceManaging {
/// Model used to generate final image from latent diffusion process
var decoder: Decoder
/// Model used to latent space for image2image, and soon, in-painting
var encoder: Encoder?
/// Optional model for checking safety of generated image
var safetyChecker: SafetyChecker? = nil
@ -58,11 +65,13 @@ public struct StableDiffusionPipeline: ResourceManaging {
public init(textEncoder: TextEncoder,
unet: Unet,
decoder: Decoder,
encoder: Encoder?,
safetyChecker: SafetyChecker? = nil,
reduceMemory: Bool = false) {
self.textEncoder = textEncoder
self.unet = unet
self.decoder = decoder
self.encoder = encoder
self.safetyChecker = safetyChecker
self.reduceMemory = reduceMemory
}
@ -114,6 +123,8 @@ public struct StableDiffusionPipeline: ResourceManaging {
public func generateImages(
prompt: String,
negativePrompt: String = "",
startingImage: CGImage? = nil,
strength: Float = 1.0,
imageCount: Int = 1,
stepCount: Int = 50,
seed: UInt32 = 0,
@ -150,10 +161,31 @@ public struct StableDiffusionPipeline: ResourceManaging {
let stdev = scheduler[0].initNoiseSigma
// Generate random latent samples from specified seed
var latents = generateLatentSamples(imageCount, stdev: stdev, seed: seed)
var latents: [MLShapedArray<Float32>]
let timestepStrength: Float?
if let startingImage {
timestepStrength = strength
guard let encoder else {
throw Error.startingImageProvidedWithoutEncoder
}
let noiseTuples = generateImage2ImageLatentSamples(imageCount, stdev: 1, seed: seed)
latents = try noiseTuples.map({
try encoder.encode(
image: startingImage,
diagonalNoise: $0.diagonal,
noise: $0.latentNoise,
alphasCumprodStep: scheduler[0].calculateAlphasCumprod(strength: strength))
})
} else {
timestepStrength = nil
// Generate random latent samples from specified seed
latents = generateLatentSamples(imageCount, stdev: stdev, seed: seed)
}
// De-noising loop
for (step,t) in scheduler[0].timeSteps.enumerated() {
let timeSteps = scheduler[0].calculateTimesteps(strength: timestepStrength)
for (step,t) in timeSteps.enumerated() {
// Expand the latents for classifier-free guidance
// and input to the Unet noise prediction model
@ -215,6 +247,35 @@ public struct StableDiffusionPipeline: ResourceManaging {
}
return samples
}
/// For image2image -
/// - Parameters:
/// - count: batch size
/// - stdev: 1
/// - seed: seed provided
/// - diagonalAndLatentNoiseIsSame: Diffusions library does not seem to use the same noise for the `DiagonalGaussianDistribution` operation,
/// but I have seen implementations of pipelines where it is the same.
/// - Returns: An array of tuples of noise values with length of batch size.
func generateImage2ImageLatentSamples(_ count: Int, stdev: Float, seed: Int, diagonalAndLatentNoiseIsSame: Bool = false) -> [(diagonal: MLShapedArray<Float32>, latentNoise: MLShapedArray<Float32>)] {
var sampleShape = unet.latentSampleShape
sampleShape[0] = 1
var random = NumPyRandomSource(seed: UInt32(truncatingIfNeeded: seed))
let samples = (0..<count).map { _ in
if diagonalAndLatentNoiseIsSame {
let noise = MLShapedArray<Float32>(
converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev)))
return (noise, noise)
} else {
return (MLShapedArray<Float32>(
converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev))),
MLShapedArray<Float32>(
converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev))))
}
}
return samples
}
func toHiddenStates(_ embedding: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
// Unoptimized manual transpose [0, 2, None, 1]

Loading…
Cancel
Save