pull/116/head
Timothy Kautz 1 year ago
parent c199a4225d
commit c17c80ff8b

@ -178,5 +178,5 @@ public final class DPMSolverMultistepScheduler: Scheduler {
}
return prevSample
}
}
}

@ -4,9 +4,9 @@
import Foundation
import CoreML
@available(iOS 16.0, macOS 13.0, *)
@available(iOS 16.0, macOS 13.1, *)
/// Encoder, currently supports image2image
public struct Encoder {
public struct Encoder: ResourceManaging {
public enum Error: String, Swift.Error {
case latentOutputNotValid
@ -14,14 +14,26 @@ public struct Encoder {
}
/// VAE encoder model + post math and adding noise from schedular
var model: MLModel
var model: ManagedMLModel
/// Create decoder from Core ML model
/// Create encoder from Core ML model
///
/// - Parameters
/// - model: Core ML model for VAE decoder
public init(model: MLModel) {
self.model = model
/// - Parameters:
/// - url: Location of compiled VAE encoder Core ML model
/// - configuration: configuration to be used when the model is loaded
/// - Returns: An encoder that will lazily load its required resources when needed or requested
public init(modelAt url: URL, configuration: MLModelConfiguration) {
self.model = ManagedMLModel(modelAt: url, configuration: configuration)
}
/// Ensure the model has been loaded into memory
public func loadResources() throws {
try model.loadResources()
}
/// Unload the underlying model to free up memory
public func unloadResources() {
model.unloadResources()
}
/// Prediction queue
@ -46,17 +58,22 @@ public struct Encoder {
let dict: [String: Any] = [
"sample": MLMultiArray(sample),
"diagonalNoise": MLMultiArray(diagonalNoise),
"diagonal_noise": MLMultiArray(diagonalNoise),
"noise": MLMultiArray(noise),
"sqrtAlphasCumprod": MLMultiArray(sqrtAlphasCumprod),
"sqrtOneMinusAlphasCumprod": MLMultiArray(sqrtOneMinusAlphasCumprod),
"sqrt_alphas_cumprod": MLMultiArray(sqrtAlphasCumprod),
"sqrt_one_minus_alphas_cumprod": MLMultiArray(sqrtOneMinusAlphasCumprod),
]
let featureProvider = try MLDictionaryFeatureProvider(dictionary: dict)
let batch = MLArrayBatchProvider(array: [featureProvider])
// Batch predict with model
let results = try queue.sync { try model.predictions(fromBatch: batch) }
let results = try queue.sync {
try model.perform { model in
try model.predictions(fromBatch: batch)
}
}
let batchLatents: [MLShapedArray<Float32>] = try (0..<results.count).compactMap { i in
let result = results.features(at: i)

@ -10,7 +10,7 @@ public protocol Scheduler {
/// Number of inference steps to be performed
var inferenceStepCount: Int { get }
/// Training diffusion time steps index by inference time step
var timeSteps: [Int] { get }

@ -79,10 +79,8 @@ public extension StableDiffusionPipeline {
// Optional Image Encoder
let encoder: Encoder?
if
let encoderModel = try? MLModel(contentsOf: urls.encoderURL, configuration: config)
{
encoder = Encoder(model: encoderModel)
if FileManager.default.fileExists(atPath: urls.encoderURL.path) {
encoder = Encoder(modelAt: urls.encoderURL, configuration: config)
} else {
encoder = nil
}

@ -24,6 +24,36 @@ public struct StableDiffusionPipeline: ResourceManaging {
public enum Error: String, Swift.Error {
case startingImageProvidedWithoutEncoder
}
public enum Mode {
case textToImage
case imageToImage
// case inPainting
}
public struct SampleInput: Hashable {
public var prompt: String
public var negativePrompt: String = ""
public var startingImage: CGImage? = nil
//public var maskImage: CGImage? = nil
public var strength: Float = 1.0
public var imageCount: Int = 1
public var stepCount: Int = 50
public var seed: UInt32 = 0
public var guidanceScale: Float = 7.5
public var disableSafety: Bool = false
public var schedulerType: StableDiffusionScheduler = .pndmScheduler
public var mode: Mode {
guard startingImage != nil else {
return .textToImage
}
guard strength < 1.0 else {
return .textToImage
}
return .imageToImage
}
}
/// Model to generate embeddings for tokenized input text
var textEncoder: TextEncoder
@ -133,10 +163,27 @@ public struct StableDiffusionPipeline: ResourceManaging {
scheduler schedulerType: StableDiffusionScheduler = .pndmScheduler,
progressHandler: (Progress) -> Bool = { _ in true }
) throws -> [CGImage?] {
try generateImages(input: SampleInput(
prompt: prompt,
negativePrompt: negativePrompt,
startingImage: startingImage,
strength: strength,
imageCount: imageCount,
stepCount: stepCount,
seed: seed,
guidanceScale: guidanceScale,
disableSafety: disableSafety,
schedulerType: schedulerType), progressHandler: progressHandler)
}
public func generateImages(
input: SampleInput,
progressHandler: (Progress) -> Bool = { _ in true }
) throws -> [CGImage?] {
// Encode the input prompt and negative prompt
let promptEmbedding = try textEncoder.encode(prompt)
let negativePromptEmbedding = try textEncoder.encode(negativePrompt)
let promptEmbedding = try textEncoder.encode(input.prompt)
let negativePromptEmbedding = try textEncoder.encode(input.negativePrompt)
if reduceMemory {
textEncoder.unloadResources()
@ -152,10 +199,10 @@ public struct StableDiffusionPipeline: ResourceManaging {
let hiddenStates = toHiddenStates(concatEmbedding)
/// Setup schedulers
let scheduler: [Scheduler] = (0..<imageCount).map { _ in
switch schedulerType {
case .pndmScheduler: return PNDMScheduler(stepCount: stepCount)
case .dpmSolverMultistepScheduler: return DPMSolverMultistepScheduler(stepCount: stepCount)
let scheduler: [Scheduler] = (0..<input.imageCount).map { _ in
switch input.schedulerType {
case .pndmScheduler: return PNDMScheduler(stepCount: input.stepCount)
case .dpmSolverMultistepScheduler: return DPMSolverMultistepScheduler(stepCount: input.stepCount)
}
}
let stdev = scheduler[0].initNoiseSigma
@ -164,24 +211,27 @@ public struct StableDiffusionPipeline: ResourceManaging {
var latents: [MLShapedArray<Float32>]
let timestepStrength: Float?
if let startingImage {
timestepStrength = strength
if
let startingImage = input.startingImage,
input.mode == .imageToImage
{
timestepStrength = input.strength
guard let encoder else {
throw Error.startingImageProvidedWithoutEncoder
}
let noiseTuples = generateImage2ImageLatentSamples(imageCount, stdev: 1, seed: seed)
let noiseTuples = generateImage2ImageLatentSamples(input.imageCount, stdev: 1, seed: input.seed)
latents = try noiseTuples.map({
try encoder.encode(
image: startingImage,
diagonalNoise: $0.diagonal,
noise: $0.latentNoise,
alphasCumprodStep: scheduler[0].calculateAlphasCumprod(strength: strength))
alphasCumprodStep: scheduler[0].calculateAlphasCumprod(strength: input.strength))
})
} else {
timestepStrength = nil
// Generate random latent samples from specified seed
latents = generateLatentSamples(imageCount, stdev: stdev, seed: seed)
latents = generateLatentSamples(input.imageCount, stdev: stdev, seed: input.seed)
}
// De-noising loop
@ -202,11 +252,11 @@ public struct StableDiffusionPipeline: ResourceManaging {
hiddenStates: hiddenStates
)
noise = performGuidance(noise, guidanceScale)
noise = performGuidance(noise, input.guidanceScale)
// Have the scheduler compute the previous (t-1) latent
// sample given the predicted noise and current sample
for i in 0..<imageCount {
for i in 0..<input.imageCount {
latents[i] = scheduler[i].step(
output: noise[i],
timeStep: t,
@ -217,11 +267,11 @@ public struct StableDiffusionPipeline: ResourceManaging {
// Report progress
let progress = Progress(
pipeline: self,
prompt: prompt,
prompt: input.prompt,
step: step,
stepCount: timeSteps.count,
currentLatentSamples: latents,
isSafetyEnabled: canSafetyCheck && !disableSafety
isSafetyEnabled: canSafetyCheck && !input.disableSafety
)
if !progressHandler(progress) {
// Stop if requested by handler
@ -234,7 +284,7 @@ public struct StableDiffusionPipeline: ResourceManaging {
}
// Decode the latent samples to images
return try decodeToImages(latents, disableSafety: disableSafety)
return try decodeToImages(latents, disableSafety: input.disableSafety)
}
func generateLatentSamples(_ count: Int, stdev: Float, seed: UInt32) -> [MLShapedArray<Float32>] {

@ -34,7 +34,7 @@ struct StableDiffusionSample: ParsableCommand {
var resourcePath: String = "./"
@Option(help: "Path to starting image.")
var image: String = "none"
var image: String? = nil
@Option(help: "Strength for image2image.")
var strength: Float = 0.5
@ -92,7 +92,7 @@ struct StableDiffusionSample: ParsableCommand {
try pipeline.loadResources()
let startingImage: CGImage?
if image != "none" {
if let image {
let imageURL = URL(filePath: image)
do {
let imageData = try Data(contentsOf: imageURL)

Loading…
Cancel
Save