diff --git a/README.md b/README.md index 96e34e9..a0ec901 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,17 @@ The project uses the Apple/ml-stable-diffusion Swift Package. You can see how it works through the simple sample code. - Apple/ml-stable-diffusion repo: https://github.com/apple/ml-stable-diffusion +- v0.2.0 is required ![Image](images/ss_4_imgs.png) ![Image](images/ss0_1280.png) ## Change Log +- [1.4.0 (10)] - Feb 11, 2023 `[Added]` + - Added imageToImage generation functionality, ImageToImageView. + - Added an image asset as a start-image for imageToImage generation. + - The latest apple/ml-stable-diffusion v0.2.0 is required. - [1.3.0 (9)] - Feb 10, 2023 `[Changed]` - Changed to use the Configuration structure when calling the generateImages(configuration: progressHandler:) API to support changes in the API of apple/ml-stable-diffusion v0.2.0. - The apple/ml-stable-diffusion Swift Package v0.2.0 or later is required. diff --git a/imggensd2.xcodeproj/project.pbxproj b/imggensd2.xcodeproj/project.pbxproj index a0f491c..76506a9 100644 --- a/imggensd2.xcodeproj/project.pbxproj +++ b/imggensd2.xcodeproj/project.pbxproj @@ -12,8 +12,11 @@ BA03C37C293D785D001426DE /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = BA03C37B293D785D001426DE /* Assets.xcassets */; }; BA03C37F293D785D001426DE /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = BA03C37E293D785D001426DE /* Preview Assets.xcassets */; }; BA03C38D293D8773001426DE /* ImageGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA03C38C293D8773001426DE /* ImageGenerator.swift */; }; - BA2A0BFF2974E73C0037687E /* CoreMLModels in Resources */ = {isa = PBXBuildFile; fileRef = BA2A0BFE2974E73C0037687E /* CoreMLModels */; }; BA34A6BF299605F70062CAF4 /* StableDiffusion in Frameworks */ = {isa = PBXBuildFile; productRef = BA34A6BE299605F70062CAF4 /* StableDiffusion */; }; + BA34A6C129961CCE0062CAF4 /* CoreMLModels in Resources */ = {isa = PBXBuildFile; fileRef = BA34A6C029961CCE0062CAF4 /* CoreMLModels */; }; + BA34A6C72997388D0062CAF4 /* TextToImageView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA34A6C62997388D0062CAF4 /* TextToImageView.swift */; }; + BA34A6C9299738DC0062CAF4 /* PromptView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA34A6C8299738DC0062CAF4 /* PromptView.swift */; }; + BA34A6CB29974CBC0062CAF4 /* ImageToImageView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BA34A6CA29974CBC0062CAF4 /* ImageToImageView.swift */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ @@ -24,7 +27,10 @@ BA03C37E293D785D001426DE /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; BA03C386293D7CE5001426DE /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; BA03C38C293D8773001426DE /* ImageGenerator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageGenerator.swift; sourceTree = ""; }; - BA2A0BFE2974E73C0037687E /* CoreMLModels */ = {isa = PBXFileReference; lastKnownFileType = folder; path = CoreMLModels; sourceTree = ""; }; + BA34A6C029961CCE0062CAF4 /* CoreMLModels */ = {isa = PBXFileReference; lastKnownFileType = folder; path = CoreMLModels; sourceTree = ""; }; + BA34A6C62997388D0062CAF4 /* TextToImageView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextToImageView.swift; sourceTree = ""; }; + BA34A6C8299738DC0062CAF4 /* PromptView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptView.swift; sourceTree = ""; }; + BA34A6CA29974CBC0062CAF4 /* ImageToImageView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageToImageView.swift; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -59,9 +65,12 @@ BA03C376293D785C001426DE /* imggensd2 */ = { isa = PBXGroup; children = ( - BA2A0BFE2974E73C0037687E /* CoreMLModels */, + BA34A6C029961CCE0062CAF4 /* CoreMLModels */, BA03C377293D785C001426DE /* imggensd2App.swift */, BA03C379293D785C001426DE /* ContentView.swift */, + BA34A6C62997388D0062CAF4 /* TextToImageView.swift */, + BA34A6CA29974CBC0062CAF4 /* ImageToImageView.swift */, + BA34A6C8299738DC0062CAF4 /* PromptView.swift */, BA03C38C293D8773001426DE /* ImageGenerator.swift */, BA03C37B293D785D001426DE /* Assets.xcassets */, BA03C37D293D785D001426DE /* Preview Content */, @@ -142,7 +151,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - BA2A0BFF2974E73C0037687E /* CoreMLModels in Resources */, + BA34A6C129961CCE0062CAF4 /* CoreMLModels in Resources */, BA03C37F293D785D001426DE /* Preview Assets.xcassets in Resources */, BA03C37C293D785D001426DE /* Assets.xcassets in Resources */, ); @@ -178,6 +187,9 @@ files = ( BA03C37A293D785C001426DE /* ContentView.swift in Sources */, BA03C38D293D8773001426DE /* ImageGenerator.swift in Sources */, + BA34A6C72997388D0062CAF4 /* TextToImageView.swift in Sources */, + BA34A6CB29974CBC0062CAF4 /* ImageToImageView.swift in Sources */, + BA34A6C9299738DC0062CAF4 /* PromptView.swift in Sources */, BA03C378293D785C001426DE /* imggensd2App.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -305,7 +317,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 9; + CURRENT_PROJECT_VERSION = 10; DEVELOPMENT_ASSET_PATHS = "\"imggensd2/Preview Content\""; DEVELOPMENT_TEAM = J5CY9Q9UP5; ENABLE_PREVIEWS = YES; @@ -321,7 +333,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 1.3.0; + MARKETING_VERSION = 1.4.0; PRODUCT_BUNDLE_IDENTIFIER = com.atarayosd.imggensd2; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; @@ -336,7 +348,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 9; + CURRENT_PROJECT_VERSION = 10; DEVELOPMENT_ASSET_PATHS = "\"imggensd2/Preview Content\""; DEVELOPMENT_TEAM = J5CY9Q9UP5; ENABLE_PREVIEWS = YES; @@ -352,7 +364,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 1.3.0; + MARKETING_VERSION = 1.4.0; PRODUCT_BUNDLE_IDENTIFIER = com.atarayosd.imggensd2; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; diff --git a/imggensd2/Assets.xcassets/SampleImages/Contents.json b/imggensd2/Assets.xcassets/SampleImages/Contents.json new file mode 100644 index 0000000..73c0059 --- /dev/null +++ b/imggensd2/Assets.xcassets/SampleImages/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/imggensd2/Assets.xcassets/SampleImages/sample1_512x512.imageset/Contents.json b/imggensd2/Assets.xcassets/SampleImages/sample1_512x512.imageset/Contents.json new file mode 100644 index 0000000..47b6dc7 --- /dev/null +++ b/imggensd2/Assets.xcassets/SampleImages/sample1_512x512.imageset/Contents.json @@ -0,0 +1,12 @@ +{ + "images" : [ + { + "filename" : "sample1_512x512.png", + "idiom" : "universal" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/imggensd2/Assets.xcassets/SampleImages/sample1_512x512.imageset/sample1_512x512.png b/imggensd2/Assets.xcassets/SampleImages/sample1_512x512.imageset/sample1_512x512.png new file mode 100644 index 0000000..8309832 Binary files /dev/null and b/imggensd2/Assets.xcassets/SampleImages/sample1_512x512.imageset/sample1_512x512.png differ diff --git a/imggensd2/ContentView.swift b/imggensd2/ContentView.swift index b7092e5..aa516ed 100644 --- a/imggensd2/ContentView.swift +++ b/imggensd2/ContentView.swift @@ -8,51 +8,22 @@ import SwiftUI struct ContentView: View { - static let prompt = "a photo of an astronaut riding a horse on mars" - static let negativePrompt = -""" -lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, - cropped, worst quality, low quality, normal quality, jpeg artifacts, blurry, multiple legs, malformation -""" - @StateObject var imageGenerator = ImageGenerator() - @State private var generationParameter = - ImageGenerator.GenerationParameter(prompt: prompt, - negativePrompt: negativePrompt, - guidanceScale: 8.0, - seed: 1_000_000, - stepCount: 20, - imageCount: 1, disableSafety: false) - var body: some View { - ScrollView { - VStack { - Text("Stable Diffusion v2").font(.title).padding() - PromptView(parameter: $generationParameter) - .disabled(imageGenerator.generationState != .idle) - - if imageGenerator.generationState == .idle { - Button(action: generate) { - Text("Generate").font(.title) - }.buttonStyle(.borderedProminent) - } else { - ProgressView() + var body: some View { + TabView { + TextToImageView(imageGenerator: imageGenerator) + .tabItem { + Image(systemName: "text.below.photo.fill") + Text("Text to Image") } - - if let generatedImages = imageGenerator.generatedImages { - ForEach(generatedImages.images) { - Image(uiImage: $0.uiImage) - .resizable() - .scaledToFit() - } + ImageToImageView(imageGenerator: imageGenerator) + .tabItem { + Image(systemName: "photo.stack.fill") + Text("Image to Image") } - } } - .padding() - } - - func generate() { - imageGenerator.generateImages(generationParameter) + .accentColor(.purple) } } @@ -61,42 +32,3 @@ struct ContentView_Previews: PreviewProvider { ContentView() } } - -struct PromptView: View { - @Binding var parameter: ImageGenerator.GenerationParameter - - var body: some View { - VStack { - HStack { Text("Prompt:"); Spacer() } - TextField("Prompt:", text: $parameter.prompt) - .textFieldStyle(RoundedBorderTextFieldStyle()) - HStack { Text("Negative Prompt:"); Spacer() } - TextField("Negative Prompt:", text: $parameter.negativePrompt) - .textFieldStyle(RoundedBorderTextFieldStyle()) - Stepper(value: $parameter.guidanceScale, in: 0.0...40.0, step: 0.5) { - Text("Guidance scale: \(parameter.guidanceScale, specifier: "%.1f") ") - } - Stepper(value: $parameter.imageCount, in: 1...10) { - Text("Image Count: \(parameter.imageCount)") - } - Stepper(value: $parameter.stepCount, in: 1...100) { - Text("Iteration steps: \(parameter.stepCount)") - } - HStack { Text("Seed:"); Spacer() } - TextField("Seed number (0 ... 4_294_967_295)", - value: $parameter.seed, - formatter: NumberFormatter()) - .textFieldStyle(RoundedBorderTextFieldStyle()) - .onSubmit { - if parameter.seed < 0 { - parameter.seed = 0 - } else if parameter.seed > UInt32.max { - parameter.seed = Int(UInt32.max) - } else { - // do nothing - } - } - } - .padding() - } -} diff --git a/imggensd2/ImageGenerator.swift b/imggensd2/ImageGenerator.swift index 5dae773..616eebe 100644 --- a/imggensd2/ImageGenerator.swift +++ b/imggensd2/ImageGenerator.swift @@ -11,7 +11,12 @@ import CoreML @MainActor final class ImageGenerator: ObservableObject { + enum GenerationMode { + case textToImage, imageToImage + } + struct GenerationParameter { + let mode: GenerationMode var prompt: String var negativePrompt: String var guidanceScale: Float @@ -19,6 +24,8 @@ final class ImageGenerator: ObservableObject { var stepCount: Int var imageCount: Int var disableSafety: Bool + var startImage: CGImage? + var strength: Float = 1.0 } struct GeneratedImage: Identifiable { @@ -74,6 +81,7 @@ final class ImageGenerator: ObservableObject { generatedImages = images } + // swiftlint:disable function_body_length func generateImages(_ parameter: GenerationParameter) { guard generationState == .idle else { return } Task.detached(priority: .high) { @@ -117,19 +125,9 @@ final class ImageGenerator: ObservableObject { // https://github.com/ynagatomo/ARDiffMuseum // It handles the progressHandler and displays the generating images step by step. - // at v1.3.0 // apple/ml-stable-diffusion v0.2.0 changed the generateImages() API - // to generateImages(configuration:progressHandler:) - // let cgImages = try sdPipeline.generateImages(prompt: parameter.prompt, - // negativePrompt: parameter.negativePrompt, - // imageCount: parameter.imageCount, - // stepCount: parameter.stepCount, - // seed: UInt32(parameter.seed), - // guidanceScale: parameter.guidanceScale, - // disableSafety: parameter.disableSafety) - - // Mode: textToImage or imageToImage - // when startingImage != nil AND strength < 1.0, imageToImage mode is selected + // to generateImages(configuration:progressHandler:) + var configuration = StableDiffusionPipeline.Configuration(prompt: parameter.prompt) configuration.negativePrompt = parameter.negativePrompt configuration.imageCount = parameter.imageCount @@ -137,6 +135,17 @@ final class ImageGenerator: ObservableObject { configuration.seed = UInt32(parameter.seed) configuration.guidanceScale = parameter.guidanceScale configuration.disableSafety = parameter.disableSafety + + // [Note] generation mode: textToImage or imageToImage + // when startingImage != nil AND strength < 1.0, imageToImage mode is selected + switch parameter.mode { + case .textToImage: + configuration.strength = 1.0 + case .imageToImage: + configuration.startingImage = parameter.startImage + configuration.strength = parameter.strength + } + let cgImages = try sdPipeline.generateImages(configuration: configuration) print("images were generated.") diff --git a/imggensd2/ImageToImageView.swift b/imggensd2/ImageToImageView.swift new file mode 100644 index 0000000..9b4d137 --- /dev/null +++ b/imggensd2/ImageToImageView.swift @@ -0,0 +1,78 @@ +// +// ImageToImageView.swift +// imggensd2 +// +// Created by Yasuhito Nagatomo on 2023/02/11. +// + +import SwiftUI + +struct ImageToImageView: View { + static let prompt = "happy smile snow winter" + static let negativePrompt = +""" +lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, + cropped, worst quality, low quality, normal quality, jpeg artifacts, blurry, multiple legs, malformation +""" + static let startImageName = "sample1_512x512" + + @ObservedObject var imageGenerator: ImageGenerator + @State private var generationParameter = + ImageGenerator.GenerationParameter(mode: .imageToImage, + prompt: prompt, + negativePrompt: negativePrompt, + guidanceScale: 8.0, + seed: 1_000_000, + stepCount: 20, + imageCount: 1, disableSafety: false, + startImage: UIImage(named: startImageName)?.cgImage, + strength: 0.5) + + var body: some View { + ScrollView { + VStack { + Text("Image to image").font(.title3).bold().padding(6) + Text("Sample App using apple/ml-stable-diffusion") + .foregroundColor(.secondary) + .font(.caption) + .padding(.bottom) + + Image(ImageToImageView.startImageName) + .resizable() + .scaledToFit() + .frame(height: 200) + + PromptView(parameter: $generationParameter) + .disabled(imageGenerator.generationState != .idle) + + if imageGenerator.generationState == .idle { + Button(action: generate) { + Text("Generate").font(.title) + }.buttonStyle(.borderedProminent) + } else { + ProgressView() + } + + if let generatedImages = imageGenerator.generatedImages { + ForEach(generatedImages.images) { + Image(uiImage: $0.uiImage) + .resizable() + .scaledToFit() + } + } + } + } + .padding() + } + + func generate() { + imageGenerator.generateImages(generationParameter) + } +} + +struct ImageToImageView_Previews: PreviewProvider { + static let imageGenerator = ImageGenerator() + static var previews: some View { + ImageToImageView(imageGenerator: imageGenerator) + } +} diff --git a/imggensd2/PromptView.swift b/imggensd2/PromptView.swift new file mode 100644 index 0000000..b72bf36 --- /dev/null +++ b/imggensd2/PromptView.swift @@ -0,0 +1,67 @@ +// +// PromptView.swift +// imggensd2 +// +// Created by Yasuhito Nagatomo on 2023/02/11. +// + +import SwiftUI + +struct PromptView: View { + @Binding var parameter: ImageGenerator.GenerationParameter + + var body: some View { + VStack { + HStack { Text("Prompt:"); Spacer() } + TextField("Prompt:", text: $parameter.prompt) + .textFieldStyle(RoundedBorderTextFieldStyle()) + HStack { Text("Negative Prompt:"); Spacer() } + TextField("Negative Prompt:", text: $parameter.negativePrompt) + .textFieldStyle(RoundedBorderTextFieldStyle()) + Stepper(value: $parameter.guidanceScale, in: 0.0...40.0, step: 0.5) { + Text("Guidance scale: \(parameter.guidanceScale, specifier: "%.1f") ") + } + Stepper(value: $parameter.imageCount, in: 1...10) { + Text("Image Count: \(parameter.imageCount)") + } + Stepper(value: $parameter.stepCount, in: 1...100) { + Text("Iteration steps: \(parameter.stepCount)") + } + HStack { Text("Seed:"); Spacer() } + TextField("Seed number (0 ... 4_294_967_295)", + value: $parameter.seed, + formatter: NumberFormatter()) + .textFieldStyle(RoundedBorderTextFieldStyle()) + .onSubmit { + if parameter.seed < 0 { + parameter.seed = 0 + } else if parameter.seed > UInt32.max { + parameter.seed = Int(UInt32.max) + } else { + // do nothing + } + } + if parameter.mode == .imageToImage { + Stepper(value: $parameter.strength, in: 0.0...0.9, step: 0.1) { + Text("Strength: \(parameter.strength, specifier: "%.1f") ") + } + } + } + .padding() + } +} + +struct PromptView_Previews: PreviewProvider { + @State static var param = ImageGenerator.GenerationParameter(mode: .imageToImage, + prompt: "a prompt", + negativePrompt: "a negative prompt", + guidanceScale: 0.5, + seed: 1_000, + stepCount: 20, + imageCount: 1, + disableSafety: false, + strength: 0.5) + static var previews: some View { + PromptView(parameter: $param) + } +} diff --git a/imggensd2/TextToImageView.swift b/imggensd2/TextToImageView.swift new file mode 100644 index 0000000..1bde8e3 --- /dev/null +++ b/imggensd2/TextToImageView.swift @@ -0,0 +1,69 @@ +// +// TextToImageView.swift +// imggensd2 +// +// Created by Yasuhito Nagatomo on 2023/02/11. +// + +import SwiftUI + +struct TextToImageView: View { + static let prompt = "a photo of an astronaut riding a horse on mars" + static let negativePrompt = +""" +lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, + cropped, worst quality, low quality, normal quality, jpeg artifacts, blurry, multiple legs, malformation +""" + + @ObservedObject var imageGenerator: ImageGenerator + @State private var generationParameter = + ImageGenerator.GenerationParameter(mode: .textToImage, + prompt: prompt, + negativePrompt: negativePrompt, + guidanceScale: 8.0, + seed: 1_000_000, + stepCount: 20, + imageCount: 1, disableSafety: false) + var body: some View { + ScrollView { + VStack { + Text("Text to image").font(.title3).bold().padding(6) + Text("Sample App using apple/ml-stable-diffusion") + .foregroundColor(.secondary) + .font(.caption) + .padding(.bottom) + + PromptView(parameter: $generationParameter) + .disabled(imageGenerator.generationState != .idle) + + if imageGenerator.generationState == .idle { + Button(action: generate) { + Text("Generate").font(.title) + }.buttonStyle(.borderedProminent) + } else { + ProgressView() + } + + if let generatedImages = imageGenerator.generatedImages { + ForEach(generatedImages.images) { + Image(uiImage: $0.uiImage) + .resizable() + .scaledToFit() + } + } + } + } + .padding() + } + + func generate() { + imageGenerator.generateImages(generationParameter) + } +} + +struct TextToImageView_Previews: PreviewProvider { + static let imageGenerator = ImageGenerator() + static var previews: some View { + TextToImageView(imageGenerator: imageGenerator) + } +}