v1.4.0: added imageToImage

main
Yasuhito Nagatomo 1 year ago
parent 9543b2d742
commit 9541b078eb

@ -19,12 +19,17 @@ The project uses the Apple/ml-stable-diffusion Swift Package.
You can see how it works through the simple sample code.
- Apple/ml-stable-diffusion repo: https://github.com/apple/ml-stable-diffusion
- v0.2.0 is required
![Image](images/ss_4_imgs.png)
![Image](images/ss0_1280.png)
## Change Log
- [1.4.0 (10)] - Feb 11, 2023 `[Added]`
- Added imageToImage generation functionality, ImageToImageView.
- Added an image asset as a start-image for imageToImage generation.
- The latest apple/ml-stable-diffusion v0.2.0 is required.
- [1.3.0 (9)] - Feb 10, 2023 `[Changed]`
- Changed to use the Configuration structure when calling the generateImages(configuration: progressHandler:) API to support changes in the API of apple/ml-stable-diffusion v0.2.0.
- The apple/ml-stable-diffusion Swift Package v0.2.0 or later is required.

@ -12,8 +12,11 @@
BA03C37C293D785D001426DE /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = BA03C37B293D785D001426DE /* Assets.xcassets */; };
BA03C37F293D785D001426DE /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = BA03C37E293D785D001426DE /* Preview Assets.xcassets */; };
BA03C38D293D8773001426DE /* ImageGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA03C38C293D8773001426DE /* ImageGenerator.swift */; };
BA2A0BFF2974E73C0037687E /* CoreMLModels in Resources */ = {isa = PBXBuildFile; fileRef = BA2A0BFE2974E73C0037687E /* CoreMLModels */; };
BA34A6BF299605F70062CAF4 /* StableDiffusion in Frameworks */ = {isa = PBXBuildFile; productRef = BA34A6BE299605F70062CAF4 /* StableDiffusion */; };
BA34A6C129961CCE0062CAF4 /* CoreMLModels in Resources */ = {isa = PBXBuildFile; fileRef = BA34A6C029961CCE0062CAF4 /* CoreMLModels */; };
BA34A6C72997388D0062CAF4 /* TextToImageView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA34A6C62997388D0062CAF4 /* TextToImageView.swift */; };
BA34A6C9299738DC0062CAF4 /* PromptView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA34A6C8299738DC0062CAF4 /* PromptView.swift */; };
BA34A6CB29974CBC0062CAF4 /* ImageToImageView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA34A6CA29974CBC0062CAF4 /* ImageToImageView.swift */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
@ -24,7 +27,10 @@
BA03C37E293D785D001426DE /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
BA03C386293D7CE5001426DE /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
BA03C38C293D8773001426DE /* ImageGenerator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageGenerator.swift; sourceTree = "<group>"; };
BA2A0BFE2974E73C0037687E /* CoreMLModels */ = {isa = PBXFileReference; lastKnownFileType = folder; path = CoreMLModels; sourceTree = "<group>"; };
BA34A6C029961CCE0062CAF4 /* CoreMLModels */ = {isa = PBXFileReference; lastKnownFileType = folder; path = CoreMLModels; sourceTree = "<group>"; };
BA34A6C62997388D0062CAF4 /* TextToImageView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextToImageView.swift; sourceTree = "<group>"; };
BA34A6C8299738DC0062CAF4 /* PromptView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptView.swift; sourceTree = "<group>"; };
BA34A6CA29974CBC0062CAF4 /* ImageToImageView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageToImageView.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -59,9 +65,12 @@
BA03C376293D785C001426DE /* imggensd2 */ = {
isa = PBXGroup;
children = (
BA2A0BFE2974E73C0037687E /* CoreMLModels */,
BA34A6C029961CCE0062CAF4 /* CoreMLModels */,
BA03C377293D785C001426DE /* imggensd2App.swift */,
BA03C379293D785C001426DE /* ContentView.swift */,
BA34A6C62997388D0062CAF4 /* TextToImageView.swift */,
BA34A6CA29974CBC0062CAF4 /* ImageToImageView.swift */,
BA34A6C8299738DC0062CAF4 /* PromptView.swift */,
BA03C38C293D8773001426DE /* ImageGenerator.swift */,
BA03C37B293D785D001426DE /* Assets.xcassets */,
BA03C37D293D785D001426DE /* Preview Content */,
@ -142,7 +151,7 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
BA2A0BFF2974E73C0037687E /* CoreMLModels in Resources */,
BA34A6C129961CCE0062CAF4 /* CoreMLModels in Resources */,
BA03C37F293D785D001426DE /* Preview Assets.xcassets in Resources */,
BA03C37C293D785D001426DE /* Assets.xcassets in Resources */,
);
@ -178,6 +187,9 @@
files = (
BA03C37A293D785C001426DE /* ContentView.swift in Sources */,
BA03C38D293D8773001426DE /* ImageGenerator.swift in Sources */,
BA34A6C72997388D0062CAF4 /* TextToImageView.swift in Sources */,
BA34A6CB29974CBC0062CAF4 /* ImageToImageView.swift in Sources */,
BA34A6C9299738DC0062CAF4 /* PromptView.swift in Sources */,
BA03C378293D785C001426DE /* imggensd2App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -305,7 +317,7 @@
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 9;
CURRENT_PROJECT_VERSION = 10;
DEVELOPMENT_ASSET_PATHS = "\"imggensd2/Preview Content\"";
DEVELOPMENT_TEAM = J5CY9Q9UP5;
ENABLE_PREVIEWS = YES;
@ -321,7 +333,7 @@
"$(inherited)",
"@executable_path/Frameworks",
);
MARKETING_VERSION = 1.3.0;
MARKETING_VERSION = 1.4.0;
PRODUCT_BUNDLE_IDENTIFIER = com.atarayosd.imggensd2;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
@ -336,7 +348,7 @@
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 9;
CURRENT_PROJECT_VERSION = 10;
DEVELOPMENT_ASSET_PATHS = "\"imggensd2/Preview Content\"";
DEVELOPMENT_TEAM = J5CY9Q9UP5;
ENABLE_PREVIEWS = YES;
@ -352,7 +364,7 @@
"$(inherited)",
"@executable_path/Frameworks",
);
MARKETING_VERSION = 1.3.0;
MARKETING_VERSION = 1.4.0;
PRODUCT_BUNDLE_IDENTIFIER = com.atarayosd.imggensd2;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;

@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}

@ -0,0 +1,12 @@
{
"images" : [
{
"filename" : "sample1_512x512.png",
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 555 KiB

@ -8,51 +8,22 @@
import SwiftUI
struct ContentView: View {
static let prompt = "a photo of an astronaut riding a horse on mars"
static let negativePrompt =
"""
lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits,
cropped, worst quality, low quality, normal quality, jpeg artifacts, blurry, multiple legs, malformation
"""
@StateObject var imageGenerator = ImageGenerator()
@State private var generationParameter =
ImageGenerator.GenerationParameter(prompt: prompt,
negativePrompt: negativePrompt,
guidanceScale: 8.0,
seed: 1_000_000,
stepCount: 20,
imageCount: 1, disableSafety: false)
var body: some View {
ScrollView {
VStack {
Text("Stable Diffusion v2").font(.title).padding()
PromptView(parameter: $generationParameter)
.disabled(imageGenerator.generationState != .idle)
if imageGenerator.generationState == .idle {
Button(action: generate) {
Text("Generate").font(.title)
}.buttonStyle(.borderedProminent)
} else {
ProgressView()
var body: some View {
TabView {
TextToImageView(imageGenerator: imageGenerator)
.tabItem {
Image(systemName: "text.below.photo.fill")
Text("Text to Image")
}
if let generatedImages = imageGenerator.generatedImages {
ForEach(generatedImages.images) {
Image(uiImage: $0.uiImage)
.resizable()
.scaledToFit()
}
ImageToImageView(imageGenerator: imageGenerator)
.tabItem {
Image(systemName: "photo.stack.fill")
Text("Image to Image")
}
}
}
.padding()
}
func generate() {
imageGenerator.generateImages(generationParameter)
.accentColor(.purple)
}
}
@ -61,42 +32,3 @@ struct ContentView_Previews: PreviewProvider {
ContentView()
}
}
struct PromptView: View {
@Binding var parameter: ImageGenerator.GenerationParameter
var body: some View {
VStack {
HStack { Text("Prompt:"); Spacer() }
TextField("Prompt:", text: $parameter.prompt)
.textFieldStyle(RoundedBorderTextFieldStyle())
HStack { Text("Negative Prompt:"); Spacer() }
TextField("Negative Prompt:", text: $parameter.negativePrompt)
.textFieldStyle(RoundedBorderTextFieldStyle())
Stepper(value: $parameter.guidanceScale, in: 0.0...40.0, step: 0.5) {
Text("Guidance scale: \(parameter.guidanceScale, specifier: "%.1f") ")
}
Stepper(value: $parameter.imageCount, in: 1...10) {
Text("Image Count: \(parameter.imageCount)")
}
Stepper(value: $parameter.stepCount, in: 1...100) {
Text("Iteration steps: \(parameter.stepCount)")
}
HStack { Text("Seed:"); Spacer() }
TextField("Seed number (0 ... 4_294_967_295)",
value: $parameter.seed,
formatter: NumberFormatter())
.textFieldStyle(RoundedBorderTextFieldStyle())
.onSubmit {
if parameter.seed < 0 {
parameter.seed = 0
} else if parameter.seed > UInt32.max {
parameter.seed = Int(UInt32.max)
} else {
// do nothing
}
}
}
.padding()
}
}

@ -11,7 +11,12 @@ import CoreML
@MainActor
final class ImageGenerator: ObservableObject {
enum GenerationMode {
case textToImage, imageToImage
}
struct GenerationParameter {
let mode: GenerationMode
var prompt: String
var negativePrompt: String
var guidanceScale: Float
@ -19,6 +24,8 @@ final class ImageGenerator: ObservableObject {
var stepCount: Int
var imageCount: Int
var disableSafety: Bool
var startImage: CGImage?
var strength: Float = 1.0
}
struct GeneratedImage: Identifiable {
@ -74,6 +81,7 @@ final class ImageGenerator: ObservableObject {
generatedImages = images
}
// swiftlint:disable function_body_length
func generateImages(_ parameter: GenerationParameter) {
guard generationState == .idle else { return }
Task.detached(priority: .high) {
@ -117,19 +125,9 @@ final class ImageGenerator: ObservableObject {
// https://github.com/ynagatomo/ARDiffMuseum
// It handles the progressHandler and displays the generating images step by step.
// at v1.3.0
// apple/ml-stable-diffusion v0.2.0 changed the generateImages() API
// to generateImages(configuration:progressHandler:)
// let cgImages = try sdPipeline.generateImages(prompt: parameter.prompt,
// negativePrompt: parameter.negativePrompt,
// imageCount: parameter.imageCount,
// stepCount: parameter.stepCount,
// seed: UInt32(parameter.seed),
// guidanceScale: parameter.guidanceScale,
// disableSafety: parameter.disableSafety)
// Mode: textToImage or imageToImage
// when startingImage != nil AND strength < 1.0, imageToImage mode is selected
// to generateImages(configuration:progressHandler:)
var configuration = StableDiffusionPipeline.Configuration(prompt: parameter.prompt)
configuration.negativePrompt = parameter.negativePrompt
configuration.imageCount = parameter.imageCount
@ -137,6 +135,17 @@ final class ImageGenerator: ObservableObject {
configuration.seed = UInt32(parameter.seed)
configuration.guidanceScale = parameter.guidanceScale
configuration.disableSafety = parameter.disableSafety
// [Note] generation mode: textToImage or imageToImage
// when startingImage != nil AND strength < 1.0, imageToImage mode is selected
switch parameter.mode {
case .textToImage:
configuration.strength = 1.0
case .imageToImage:
configuration.startingImage = parameter.startImage
configuration.strength = parameter.strength
}
let cgImages = try sdPipeline.generateImages(configuration: configuration)
print("images were generated.")

@ -0,0 +1,78 @@
//
// ImageToImageView.swift
// imggensd2
//
// Created by Yasuhito Nagatomo on 2023/02/11.
//
import SwiftUI
struct ImageToImageView: View {
static let prompt = "happy smile snow winter"
static let negativePrompt =
"""
lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits,
cropped, worst quality, low quality, normal quality, jpeg artifacts, blurry, multiple legs, malformation
"""
static let startImageName = "sample1_512x512"
@ObservedObject var imageGenerator: ImageGenerator
@State private var generationParameter =
ImageGenerator.GenerationParameter(mode: .imageToImage,
prompt: prompt,
negativePrompt: negativePrompt,
guidanceScale: 8.0,
seed: 1_000_000,
stepCount: 20,
imageCount: 1, disableSafety: false,
startImage: UIImage(named: startImageName)?.cgImage,
strength: 0.5)
var body: some View {
ScrollView {
VStack {
Text("Image to image").font(.title3).bold().padding(6)
Text("Sample App using apple/ml-stable-diffusion")
.foregroundColor(.secondary)
.font(.caption)
.padding(.bottom)
Image(ImageToImageView.startImageName)
.resizable()
.scaledToFit()
.frame(height: 200)
PromptView(parameter: $generationParameter)
.disabled(imageGenerator.generationState != .idle)
if imageGenerator.generationState == .idle {
Button(action: generate) {
Text("Generate").font(.title)
}.buttonStyle(.borderedProminent)
} else {
ProgressView()
}
if let generatedImages = imageGenerator.generatedImages {
ForEach(generatedImages.images) {
Image(uiImage: $0.uiImage)
.resizable()
.scaledToFit()
}
}
}
}
.padding()
}
func generate() {
imageGenerator.generateImages(generationParameter)
}
}
struct ImageToImageView_Previews: PreviewProvider {
static let imageGenerator = ImageGenerator()
static var previews: some View {
ImageToImageView(imageGenerator: imageGenerator)
}
}

@ -0,0 +1,67 @@
//
// PromptView.swift
// imggensd2
//
// Created by Yasuhito Nagatomo on 2023/02/11.
//
import SwiftUI
struct PromptView: View {
@Binding var parameter: ImageGenerator.GenerationParameter
var body: some View {
VStack {
HStack { Text("Prompt:"); Spacer() }
TextField("Prompt:", text: $parameter.prompt)
.textFieldStyle(RoundedBorderTextFieldStyle())
HStack { Text("Negative Prompt:"); Spacer() }
TextField("Negative Prompt:", text: $parameter.negativePrompt)
.textFieldStyle(RoundedBorderTextFieldStyle())
Stepper(value: $parameter.guidanceScale, in: 0.0...40.0, step: 0.5) {
Text("Guidance scale: \(parameter.guidanceScale, specifier: "%.1f") ")
}
Stepper(value: $parameter.imageCount, in: 1...10) {
Text("Image Count: \(parameter.imageCount)")
}
Stepper(value: $parameter.stepCount, in: 1...100) {
Text("Iteration steps: \(parameter.stepCount)")
}
HStack { Text("Seed:"); Spacer() }
TextField("Seed number (0 ... 4_294_967_295)",
value: $parameter.seed,
formatter: NumberFormatter())
.textFieldStyle(RoundedBorderTextFieldStyle())
.onSubmit {
if parameter.seed < 0 {
parameter.seed = 0
} else if parameter.seed > UInt32.max {
parameter.seed = Int(UInt32.max)
} else {
// do nothing
}
}
if parameter.mode == .imageToImage {
Stepper(value: $parameter.strength, in: 0.0...0.9, step: 0.1) {
Text("Strength: \(parameter.strength, specifier: "%.1f") ")
}
}
}
.padding()
}
}
struct PromptView_Previews: PreviewProvider {
@State static var param = ImageGenerator.GenerationParameter(mode: .imageToImage,
prompt: "a prompt",
negativePrompt: "a negative prompt",
guidanceScale: 0.5,
seed: 1_000,
stepCount: 20,
imageCount: 1,
disableSafety: false,
strength: 0.5)
static var previews: some View {
PromptView(parameter: $param)
}
}

@ -0,0 +1,69 @@
//
// TextToImageView.swift
// imggensd2
//
// Created by Yasuhito Nagatomo on 2023/02/11.
//
import SwiftUI
struct TextToImageView: View {
static let prompt = "a photo of an astronaut riding a horse on mars"
static let negativePrompt =
"""
lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits,
cropped, worst quality, low quality, normal quality, jpeg artifacts, blurry, multiple legs, malformation
"""
@ObservedObject var imageGenerator: ImageGenerator
@State private var generationParameter =
ImageGenerator.GenerationParameter(mode: .textToImage,
prompt: prompt,
negativePrompt: negativePrompt,
guidanceScale: 8.0,
seed: 1_000_000,
stepCount: 20,
imageCount: 1, disableSafety: false)
var body: some View {
ScrollView {
VStack {
Text("Text to image").font(.title3).bold().padding(6)
Text("Sample App using apple/ml-stable-diffusion")
.foregroundColor(.secondary)
.font(.caption)
.padding(.bottom)
PromptView(parameter: $generationParameter)
.disabled(imageGenerator.generationState != .idle)
if imageGenerator.generationState == .idle {
Button(action: generate) {
Text("Generate").font(.title)
}.buttonStyle(.borderedProminent)
} else {
ProgressView()
}
if let generatedImages = imageGenerator.generatedImages {
ForEach(generatedImages.images) {
Image(uiImage: $0.uiImage)
.resizable()
.scaledToFit()
}
}
}
}
.padding()
}
func generate() {
imageGenerator.generateImages(generationParameter)
}
}
struct TextToImageView_Previews: PreviewProvider {
static let imageGenerator = ImageGenerator()
static var previews: some View {
TextToImageView(imageGenerator: imageGenerator)
}
}
Loading…
Cancel
Save