From ed15f786281a2e84235768fcfaeca516530a4bb5 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 20 Feb 2023 19:14:48 +0100 Subject: [PATCH] Decide attention and ANE on hardware capabilities (#29) For Macs with >= 8 performance cores, we select CPU+GPU (original attention). Otherwise we select CPU+ANE (split einsum). Some computers (M1 Pro, 16 core GPU) might yield slightly better performance using CPU+GPU+ANE with SPLIT_EINSUM. --- Diffusion-macOS/Capabilities.swift | 36 ++++++++++++++++++++++++ Diffusion-macOS/ControlsView.swift | 2 +- Diffusion-macOS/Diffusion_macOSApp.swift | 9 ------ Diffusion.xcodeproj/project.pbxproj | 4 +++ Diffusion/ModelInfo.swift | 7 +++-- 5 files changed, 45 insertions(+), 13 deletions(-) create mode 100644 Diffusion-macOS/Capabilities.swift diff --git a/Diffusion-macOS/Capabilities.swift b/Diffusion-macOS/Capabilities.swift new file mode 100644 index 0000000..5a419e2 --- /dev/null +++ b/Diffusion-macOS/Capabilities.swift @@ -0,0 +1,36 @@ +// +// Capabilities.swift +// Diffusion-macOS +// +// Created by Pedro Cuenca on 20/2/23. +// See LICENSE at https://github.com/huggingface/swift-coreml-diffusers/LICENSE +// + +import Foundation + +let runningOnMac = true + +#if canImport(MLCompute) +import MLCompute +let _hasANE = MLCDevice.ane() != nil +#else +let _hasANE = false +#endif + +final class Capabilities { + static let hasANE = _hasANE + + // According to my tests this is a good proxy to estimate whether CPU+GPU + // or CPU+NE works better. Things may become more complicated if we + // choose all compute units. + static var performanceCores: Int = { + var ncores: Int32 = 0 + var bytes = MemoryLayout.size + + // In M1/M2 perflevel0 refers to the performance cores and perflevel1 are the efficiency cores + // In Intel there's only one performance level + let result = sysctlbyname("hw.perflevel0.physicalcpu", &ncores, &bytes, nil, 0) + guard result == 0 else { return 0 } + return Int(ncores) + }() +} diff --git a/Diffusion-macOS/ControlsView.swift b/Diffusion-macOS/ControlsView.swift index 9e8ef1c..e0a93ce 100644 --- a/Diffusion-macOS/ControlsView.swift +++ b/Diffusion-macOS/ControlsView.swift @@ -284,7 +284,7 @@ struct ControlsView: View { }.foregroundColor(.secondary) } - if hasANE { + if Capabilities.hasANE { Divider() DisclosureGroup(isExpanded: $disclosedAdvanced) { HStack { diff --git a/Diffusion-macOS/Diffusion_macOSApp.swift b/Diffusion-macOS/Diffusion_macOSApp.swift index 1c2c59d..9124f86 100644 --- a/Diffusion-macOS/Diffusion_macOSApp.swift +++ b/Diffusion-macOS/Diffusion_macOSApp.swift @@ -16,12 +16,3 @@ struct Diffusion_macOSApp: App { } } } - -let runningOnMac = true - -#if canImport(MLCompute) -import MLCompute -let hasANE = MLCDevice.ane() != nil -#else -let hasANE = false -#endif diff --git a/Diffusion.xcodeproj/project.pbxproj b/Diffusion.xcodeproj/project.pbxproj index a800a09..39d8b5a 100644 --- a/Diffusion.xcodeproj/project.pbxproj +++ b/Diffusion.xcodeproj/project.pbxproj @@ -9,6 +9,7 @@ /* Begin PBXBuildFile section */ EB067F872992E561004D1AD9 /* HelpContent.swift in Sources */ = {isa = PBXBuildFile; fileRef = EB067F862992E561004D1AD9 /* HelpContent.swift */; }; EB33A51D2954D89F00B16357 /* StableDiffusion in Frameworks */ = {isa = PBXBuildFile; productRef = EB33A51C2954D89F00B16357 /* StableDiffusion */; }; + EB560F0429A3C20800C0F8B8 /* Capabilities.swift in Sources */ = {isa = PBXBuildFile; fileRef = EB560F0329A3C20800C0F8B8 /* Capabilities.swift */; }; EBB5BA5329425BEE003A2A5B /* PipelineLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = EBB5BA5229425BEE003A2A5B /* PipelineLoader.swift */; }; EBB5BA5829425E17003A2A5B /* Path in Frameworks */ = {isa = PBXBuildFile; productRef = EBB5BA5729425E17003A2A5B /* Path */; }; EBB5BA5A29426E06003A2A5B /* Downloader.swift in Sources */ = {isa = PBXBuildFile; fileRef = EBB5BA5929426E06003A2A5B /* Downloader.swift */; }; @@ -64,6 +65,7 @@ /* Begin PBXFileReference section */ EB067F862992E561004D1AD9 /* HelpContent.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HelpContent.swift; sourceTree = ""; }; EB33A51E2954E1BC00B16357 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = ""; }; + EB560F0329A3C20800C0F8B8 /* Capabilities.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Capabilities.swift; sourceTree = ""; }; EBB5BA5229425BEE003A2A5B /* PipelineLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PipelineLoader.swift; sourceTree = ""; }; EBB5BA5929426E06003A2A5B /* Downloader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Downloader.swift; sourceTree = ""; }; EBDD7DB22973200200C1C4B2 /* Utils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Utils.swift; sourceTree = ""; }; @@ -247,6 +249,7 @@ F1552030297109C300DC009B /* ControlsView.swift */, F155203329710B3600DC009B /* StatusView.swift */, EB067F862992E561004D1AD9 /* HelpContent.swift */, + EB560F0329A3C20800C0F8B8 /* Capabilities.swift */, F155202C2971093400DC009B /* Diffusion_macOS.entitlements */, F15520292971093400DC009B /* Preview Content */, ); @@ -502,6 +505,7 @@ F1552031297109C300DC009B /* ControlsView.swift in Sources */, EBDD7DB62973206600C1C4B2 /* Downloader.swift in Sources */, F155203429710B3600DC009B /* StatusView.swift in Sources */, + EB560F0429A3C20800C0F8B8 /* Capabilities.swift in Sources */, F15520242971093300DC009B /* Diffusion_macOSApp.swift in Sources */, EBDD7DB52973201800C1C4B2 /* ModelInfo.swift in Sources */, EBDD7DBD2977FFB300C1C4B2 /* GeneratedImageView.swift in Sources */, diff --git a/Diffusion/ModelInfo.swift b/Diffusion/ModelInfo.swift index f9865c9..de93638 100644 --- a/Diffusion/ModelInfo.swift +++ b/Diffusion/ModelInfo.swift @@ -40,10 +40,11 @@ struct ModelInfo { extension ModelInfo { static var defaultAttention: AttentionVariant { - return runningOnMac ? .original : .splitEinsum + guard runningOnMac else { return .splitEinsum } + guard Capabilities.hasANE else { return .original } + return Capabilities.performanceCores >= 8 ? .original : .splitEinsum } - // TODO: heuristics per {model, device} var bestAttention: AttentionVariant { return ModelInfo.defaultAttention } @@ -60,7 +61,7 @@ extension ModelInfo { } /// Best variant for the current platform. - /// Currently using `split_einsum` for iOS and `original` for macOS, but could vary depending on model. + /// Currently using `split_einsum` for iOS and simple performance heuristics for macOS. var bestURL: URL { modelURL(for: bestAttention) } var reduceMemory: Bool {