|
|
|
// For licensing see accompanying LICENSE.md file.
|
|
|
|
// Copyright (C) 2022 Apple Inc. All Rights Reserved.
|
|
|
|
|
|
|
|
import ArgumentParser
|
|
|
|
import CoreGraphics
|
|
|
|
import CoreML
|
|
|
|
import Foundation
|
|
|
|
import StableDiffusion
|
|
|
|
import UniformTypeIdentifiers
|
|
|
|
import Cocoa
|
|
|
|
|
|
|
|
@available(iOS 16.2, macOS 13.1, *)
|
|
|
|
struct StableDiffusionSample: ParsableCommand {
|
|
|
|
|
|
|
|
static let configuration = CommandConfiguration(
|
|
|
|
abstract: "Run stable diffusion to generate images guided by a text prompt",
|
|
|
|
version: "0.1"
|
|
|
|
)
|
|
|
|
|
|
|
|
@Argument(help: "Input string prompt")
|
|
|
|
var prompt: String
|
|
|
|
|
|
|
|
@Option(help: "Input string negative prompt")
|
|
|
|
var negativePrompt: String = ""
|
|
|
|
|
|
|
|
@Option(
|
|
|
|
help: ArgumentHelp(
|
|
|
|
"Path to stable diffusion resources.",
|
|
|
|
discussion: "The resource directory should contain\n" +
|
|
|
|
" - *compiled* models: {TextEncoder,Unet,VAEDecoder}.mlmodelc\n" +
|
|
|
|
" - tokenizer info: vocab.json, merges.txt",
|
|
|
|
valueName: "directory-path"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
var resourcePath: String = "./"
|
|
|
|
|
|
|
|
@Option(help: "Path to starting image.")
|
|
|
|
var image: String? = nil
|
|
|
|
|
|
|
|
@Option(help: "Strength for image2image.")
|
|
|
|
var strength: Float = 0.5
|
|
|
|
|
|
|
|
@Option(help: "Number of images to sample / generate")
|
|
|
|
var imageCount: Int = 1
|
|
|
|
|
|
|
|
@Option(help: "Number of diffusion steps to perform")
|
|
|
|
var stepCount: Int = 50
|
|
|
|
|
|
|
|
@Option(
|
|
|
|
help: ArgumentHelp(
|
|
|
|
"How often to save samples at intermediate steps",
|
|
|
|
discussion: "Set to 0 to only save the final sample"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
var saveEvery: Int = 0
|
|
|
|
|
|
|
|
@Option(help: "Output path")
|
|
|
|
var outputPath: String = "./"
|
|
|
|
|
|
|
|
@Option(help: "Random seed")
|
|
|
|
var seed: UInt32 = UInt32.random(in: 0...UInt32.max)
|
|
|
|
|
|
|
|
@Option(help: "Controls the influence of the text prompt on sampling process (0=random images)")
|
|
|
|
var guidanceScale: Float = 7.5
|
|
|
|
|
|
|
|
@Option(help: "Compute units to load model with {all,cpuOnly,cpuAndGPU,cpuAndNeuralEngine}")
|
|
|
|
var computeUnits: ComputeUnits = .all
|
|
|
|
|
|
|
|
@Option(help: "Scheduler to use, one of {pndm, dpmpp}")
|
|
|
|
var scheduler: SchedulerOption = .pndm
|
|
|
|
|
Add random source that matches PyTorch (#124)
* Add random source that matches PyTorch
This added random source that matches PyTorch on CPU. In particular, it
matches: `torch.randn([], dtype=torch.float)` result.
PyTorch's RNG is a bit convoluted and not claimed to be version-stable
(will open a separate issue in PyTorch repo on this). However, the
current implementation on CPU is fairly straightforward^*.
1. If it is less than 16 elements, it uses Gaussian distribution sampled
from MT19937 for double + Box-Muller transformation.
2. If it is more than 16 (16 included), it first do uniform sampling
with whatever the resulting data type would be (in this case, torch.float),
and then apply Box-Muller transformation over 16-element segment at a
type, treating the first floating-point and the 8th as a pair, so on
so forth.
3. If it is not a multiple of 16, trace back from the end for 16
elements and redo step 2.
* Update with configuration available in SwiftDiffusionCLI
* Fix the RNG is not passed into pipelineConfig.
1 year ago
|
|
|
@Option(help: "Random number generator to use, one of {numpy, torch}")
|
|
|
|
var rng: RNGOption = .numpy
|
|
|
|
|
|
|
|
@Flag(help: "Disable safety checking")
|
|
|
|
var disableSafety: Bool = false
|
|
|
|
|
|
|
|
@Flag(help: "Reduce memory usage")
|
|
|
|
var reduceMemory: Bool = false
|
|
|
|
|
|
|
|
mutating func run() throws {
|
|
|
|
guard FileManager.default.fileExists(atPath: resourcePath) else {
|
|
|
|
throw RunError.resources("Resource path does not exist \(resourcePath)")
|
|
|
|
}
|
|
|
|
|
|
|
|
let config = MLModelConfiguration()
|
|
|
|
config.computeUnits = computeUnits.asMLComputeUnits
|
|
|
|
let resourceURL = URL(filePath: resourcePath)
|
|
|
|
|
|
|
|
log("Loading resources and creating pipeline\n")
|
|
|
|
log("(Note: This can take a while the first time using these resources)\n")
|
|
|
|
let pipeline = try StableDiffusionPipeline(resourcesAt: resourceURL,
|
|
|
|
configuration: config,
|
|
|
|
disableSafety: disableSafety,
|
|
|
|
reduceMemory: reduceMemory)
|
|
|
|
try pipeline.loadResources()
|
|
|
|
|
|
|
|
let startingImage: CGImage?
|
|
|
|
if let image {
|
|
|
|
let imageURL = URL(filePath: image)
|
|
|
|
do {
|
|
|
|
let imageData = try Data(contentsOf: imageURL)
|
|
|
|
guard
|
|
|
|
let nsImage = NSImage(data: imageData),
|
|
|
|
let loadedImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil)
|
|
|
|
else {
|
|
|
|
throw RunError.resources("Starting Image not available \(resourcePath)")
|
|
|
|
}
|
|
|
|
startingImage = loadedImage
|
|
|
|
} catch let error {
|
|
|
|
throw RunError.resources("Starting image not found \(imageURL), error: \(error)")
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
startingImage = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
log("Sampling ...\n")
|
|
|
|
let sampleTimer = SampleTimer()
|
|
|
|
sampleTimer.start()
|
|
|
|
|
|
|
|
var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt)
|
|
|
|
|
|
|
|
pipelineConfig.negativePrompt = negativePrompt
|
|
|
|
pipelineConfig.startingImage = startingImage
|
|
|
|
pipelineConfig.strength = strength
|
|
|
|
pipelineConfig.imageCount = imageCount
|
|
|
|
pipelineConfig.stepCount = stepCount
|
|
|
|
pipelineConfig.seed = seed
|
|
|
|
pipelineConfig.guidanceScale = guidanceScale
|
|
|
|
pipelineConfig.schedulerType = scheduler.stableDiffusionScheduler
|
Add random source that matches PyTorch (#124)
* Add random source that matches PyTorch
This added random source that matches PyTorch on CPU. In particular, it
matches: `torch.randn([], dtype=torch.float)` result.
PyTorch's RNG is a bit convoluted and not claimed to be version-stable
(will open a separate issue in PyTorch repo on this). However, the
current implementation on CPU is fairly straightforward^*.
1. If it is less than 16 elements, it uses Gaussian distribution sampled
from MT19937 for double + Box-Muller transformation.
2. If it is more than 16 (16 included), it first do uniform sampling
with whatever the resulting data type would be (in this case, torch.float),
and then apply Box-Muller transformation over 16-element segment at a
type, treating the first floating-point and the 8th as a pair, so on
so forth.
3. If it is not a multiple of 16, trace back from the end for 16
elements and redo step 2.
* Update with configuration available in SwiftDiffusionCLI
* Fix the RNG is not passed into pipelineConfig.
1 year ago
|
|
|
pipelineConfig.rngType = rng.stableDiffusionRNG
|
|
|
|
|
|
|
|
let images = try pipeline.generateImages(
|
|
|
|
configuration: pipelineConfig,
|
|
|
|
progressHandler: { progress in
|
|
|
|
sampleTimer.stop()
|
|
|
|
handleProgress(progress,sampleTimer)
|
|
|
|
if progress.stepCount != progress.step {
|
|
|
|
sampleTimer.start()
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
|
|
|
|
_ = try saveImages(images, logNames: true)
|
|
|
|
}
|
|
|
|
|
|
|
|
func handleProgress(
|
|
|
|
_ progress: StableDiffusionPipeline.Progress,
|
|
|
|
_ sampleTimer: SampleTimer
|
|
|
|
) {
|
|
|
|
log("\u{1B}[1A\u{1B}[K")
|
|
|
|
log("Step \(progress.step) of \(progress.stepCount) ")
|
|
|
|
log(" [")
|
|
|
|
log(String(format: "mean: %.2f, ", 1.0/sampleTimer.mean))
|
|
|
|
log(String(format: "median: %.2f, ", 1.0/sampleTimer.median))
|
|
|
|
log(String(format: "last %.2f", 1.0/sampleTimer.allSamples.last!))
|
|
|
|
log("] step/sec")
|
|
|
|
|
|
|
|
if saveEvery > 0, progress.step % saveEvery == 0 {
|
|
|
|
let saveCount = (try? saveImages(progress.currentImages, step: progress.step)) ?? 0
|
|
|
|
log(" saved \(saveCount) image\(saveCount != 1 ? "s" : "")")
|
|
|
|
}
|
|
|
|
log("\n")
|
|
|
|
}
|
|
|
|
|
|
|
|
func saveImages(
|
|
|
|
_ images: [CGImage?],
|
|
|
|
step: Int? = nil,
|
|
|
|
logNames: Bool = false
|
|
|
|
) throws -> Int {
|
|
|
|
let url = URL(filePath: outputPath)
|
|
|
|
var saved = 0
|
|
|
|
for i in 0 ..< images.count {
|
|
|
|
|
|
|
|
guard let image = images[i] else {
|
|
|
|
if logNames {
|
|
|
|
log("Image \(i) failed safety check and was not saved")
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
let name = imageName(i, step: step)
|
|
|
|
let fileURL = url.appending(path:name)
|
|
|
|
|
|
|
|
guard let dest = CGImageDestinationCreateWithURL(fileURL as CFURL, UTType.png.identifier as CFString, 1, nil) else {
|
|
|
|
throw RunError.saving("Failed to create destination for \(fileURL)")
|
|
|
|
}
|
|
|
|
CGImageDestinationAddImage(dest, image, nil)
|
|
|
|
if !CGImageDestinationFinalize(dest) {
|
|
|
|
throw RunError.saving("Failed to save \(fileURL)")
|
|
|
|
}
|
|
|
|
if logNames {
|
|
|
|
log("Saved \(name)\n")
|
|
|
|
}
|
|
|
|
saved += 1
|
|
|
|
}
|
|
|
|
return saved
|
|
|
|
}
|
|
|
|
|
|
|
|
func imageName(_ sample: Int, step: Int? = nil) -> String {
|
|
|
|
let fileCharLimit = 75
|
|
|
|
var name = prompt.prefix(fileCharLimit).replacingOccurrences(of: " ", with: "_")
|
|
|
|
if imageCount != 1 {
|
|
|
|
name += ".\(sample)"
|
|
|
|
}
|
|
|
|
|
|
|
|
if image != "none" {
|
|
|
|
name += ".str\(Int(strength * 100))"
|
|
|
|
}
|
|
|
|
|
|
|
|
name += ".\(seed)"
|
|
|
|
|
|
|
|
if let step = step {
|
|
|
|
name += ".\(step)"
|
|
|
|
} else {
|
|
|
|
name += ".final"
|
|
|
|
}
|
|
|
|
name += ".png"
|
|
|
|
return name
|
|
|
|
}
|
|
|
|
|
|
|
|
func log(_ str: String, term: String = "") {
|
|
|
|
print(str, terminator: term)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
enum RunError: Error {
|
|
|
|
case resources(String)
|
|
|
|
case saving(String)
|
|
|
|
}
|
|
|
|
|
|
|
|
@available(iOS 16.2, macOS 13.1, *)
|
|
|
|
enum ComputeUnits: String, ExpressibleByArgument, CaseIterable {
|
|
|
|
case all, cpuAndGPU, cpuOnly, cpuAndNeuralEngine
|
|
|
|
var asMLComputeUnits: MLComputeUnits {
|
|
|
|
switch self {
|
|
|
|
case .all: return .all
|
|
|
|
case .cpuAndGPU: return .cpuAndGPU
|
|
|
|
case .cpuOnly: return .cpuOnly
|
|
|
|
case .cpuAndNeuralEngine: return .cpuAndNeuralEngine
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@available(iOS 16.2, macOS 13.1, *)
|
|
|
|
enum SchedulerOption: String, ExpressibleByArgument {
|
|
|
|
case pndm, dpmpp
|
|
|
|
var stableDiffusionScheduler: StableDiffusionScheduler {
|
|
|
|
switch self {
|
|
|
|
case .pndm: return .pndmScheduler
|
|
|
|
case .dpmpp: return .dpmSolverMultistepScheduler
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add random source that matches PyTorch (#124)
* Add random source that matches PyTorch
This added random source that matches PyTorch on CPU. In particular, it
matches: `torch.randn([], dtype=torch.float)` result.
PyTorch's RNG is a bit convoluted and not claimed to be version-stable
(will open a separate issue in PyTorch repo on this). However, the
current implementation on CPU is fairly straightforward^*.
1. If it is less than 16 elements, it uses Gaussian distribution sampled
from MT19937 for double + Box-Muller transformation.
2. If it is more than 16 (16 included), it first do uniform sampling
with whatever the resulting data type would be (in this case, torch.float),
and then apply Box-Muller transformation over 16-element segment at a
type, treating the first floating-point and the 8th as a pair, so on
so forth.
3. If it is not a multiple of 16, trace back from the end for 16
elements and redo step 2.
* Update with configuration available in SwiftDiffusionCLI
* Fix the RNG is not passed into pipelineConfig.
1 year ago
|
|
|
@available(iOS 16.2, macOS 13.1, *)
|
|
|
|
enum RNGOption: String, ExpressibleByArgument {
|
|
|
|
case numpy, torch
|
|
|
|
var stableDiffusionRNG: StableDiffusionRNG {
|
|
|
|
switch self {
|
|
|
|
case .numpy: return .numpyRNG
|
|
|
|
case .torch: return .torchRNG
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if #available(iOS 16.2, macOS 13.1, *) {
|
|
|
|
StableDiffusionSample.main()
|
|
|
|
} else {
|
|
|
|
print("Unsupported OS")
|
|
|
|
}
|