Decide attention and ANE on hardware capabilities (#29)

For Macs with >= 8 performance cores, we select CPU+GPU (original
attention). Otherwise we select CPU+ANE (split einsum).

Some computers (M1 Pro, 16 core GPU) might yield slightly better
performance using CPU+GPU+ANE with SPLIT_EINSUM.
main
Pedro Cuenca 2 years ago committed by GitHub
parent 8796695928
commit ed15f78628
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,36 @@
//
// Capabilities.swift
// Diffusion-macOS
//
// Created by Pedro Cuenca on 20/2/23.
// See LICENSE at https://github.com/huggingface/swift-coreml-diffusers/LICENSE
//
import Foundation
let runningOnMac = true
#if canImport(MLCompute)
import MLCompute
let _hasANE = MLCDevice.ane() != nil
#else
let _hasANE = false
#endif
final class Capabilities {
static let hasANE = _hasANE
// According to my tests this is a good proxy to estimate whether CPU+GPU
// or CPU+NE works better. Things may become more complicated if we
// choose all compute units.
static var performanceCores: Int = {
var ncores: Int32 = 0
var bytes = MemoryLayout<Int32>.size
// In M1/M2 perflevel0 refers to the performance cores and perflevel1 are the efficiency cores
// In Intel there's only one performance level
let result = sysctlbyname("hw.perflevel0.physicalcpu", &ncores, &bytes, nil, 0)
guard result == 0 else { return 0 }
return Int(ncores)
}()
}

@ -284,7 +284,7 @@ struct ControlsView: View {
}.foregroundColor(.secondary) }.foregroundColor(.secondary)
} }
if hasANE { if Capabilities.hasANE {
Divider() Divider()
DisclosureGroup(isExpanded: $disclosedAdvanced) { DisclosureGroup(isExpanded: $disclosedAdvanced) {
HStack { HStack {

@ -16,12 +16,3 @@ struct Diffusion_macOSApp: App {
} }
} }
} }
let runningOnMac = true
#if canImport(MLCompute)
import MLCompute
let hasANE = MLCDevice.ane() != nil
#else
let hasANE = false
#endif

@ -9,6 +9,7 @@
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
EB067F872992E561004D1AD9 /* HelpContent.swift in Sources */ = {isa = PBXBuildFile; fileRef = EB067F862992E561004D1AD9 /* HelpContent.swift */; }; EB067F872992E561004D1AD9 /* HelpContent.swift in Sources */ = {isa = PBXBuildFile; fileRef = EB067F862992E561004D1AD9 /* HelpContent.swift */; };
EB33A51D2954D89F00B16357 /* StableDiffusion in Frameworks */ = {isa = PBXBuildFile; productRef = EB33A51C2954D89F00B16357 /* StableDiffusion */; }; EB33A51D2954D89F00B16357 /* StableDiffusion in Frameworks */ = {isa = PBXBuildFile; productRef = EB33A51C2954D89F00B16357 /* StableDiffusion */; };
EB560F0429A3C20800C0F8B8 /* Capabilities.swift in Sources */ = {isa = PBXBuildFile; fileRef = EB560F0329A3C20800C0F8B8 /* Capabilities.swift */; };
EBB5BA5329425BEE003A2A5B /* PipelineLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = EBB5BA5229425BEE003A2A5B /* PipelineLoader.swift */; }; EBB5BA5329425BEE003A2A5B /* PipelineLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = EBB5BA5229425BEE003A2A5B /* PipelineLoader.swift */; };
EBB5BA5829425E17003A2A5B /* Path in Frameworks */ = {isa = PBXBuildFile; productRef = EBB5BA5729425E17003A2A5B /* Path */; }; EBB5BA5829425E17003A2A5B /* Path in Frameworks */ = {isa = PBXBuildFile; productRef = EBB5BA5729425E17003A2A5B /* Path */; };
EBB5BA5A29426E06003A2A5B /* Downloader.swift in Sources */ = {isa = PBXBuildFile; fileRef = EBB5BA5929426E06003A2A5B /* Downloader.swift */; }; EBB5BA5A29426E06003A2A5B /* Downloader.swift in Sources */ = {isa = PBXBuildFile; fileRef = EBB5BA5929426E06003A2A5B /* Downloader.swift */; };
@ -64,6 +65,7 @@
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
EB067F862992E561004D1AD9 /* HelpContent.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HelpContent.swift; sourceTree = "<group>"; }; EB067F862992E561004D1AD9 /* HelpContent.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HelpContent.swift; sourceTree = "<group>"; };
EB33A51E2954E1BC00B16357 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; }; EB33A51E2954E1BC00B16357 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
EB560F0329A3C20800C0F8B8 /* Capabilities.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Capabilities.swift; sourceTree = "<group>"; };
EBB5BA5229425BEE003A2A5B /* PipelineLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PipelineLoader.swift; sourceTree = "<group>"; }; EBB5BA5229425BEE003A2A5B /* PipelineLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PipelineLoader.swift; sourceTree = "<group>"; };
EBB5BA5929426E06003A2A5B /* Downloader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Downloader.swift; sourceTree = "<group>"; }; EBB5BA5929426E06003A2A5B /* Downloader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Downloader.swift; sourceTree = "<group>"; };
EBDD7DB22973200200C1C4B2 /* Utils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Utils.swift; sourceTree = "<group>"; }; EBDD7DB22973200200C1C4B2 /* Utils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Utils.swift; sourceTree = "<group>"; };
@ -247,6 +249,7 @@
F1552030297109C300DC009B /* ControlsView.swift */, F1552030297109C300DC009B /* ControlsView.swift */,
F155203329710B3600DC009B /* StatusView.swift */, F155203329710B3600DC009B /* StatusView.swift */,
EB067F862992E561004D1AD9 /* HelpContent.swift */, EB067F862992E561004D1AD9 /* HelpContent.swift */,
EB560F0329A3C20800C0F8B8 /* Capabilities.swift */,
F155202C2971093400DC009B /* Diffusion_macOS.entitlements */, F155202C2971093400DC009B /* Diffusion_macOS.entitlements */,
F15520292971093400DC009B /* Preview Content */, F15520292971093400DC009B /* Preview Content */,
); );
@ -502,6 +505,7 @@
F1552031297109C300DC009B /* ControlsView.swift in Sources */, F1552031297109C300DC009B /* ControlsView.swift in Sources */,
EBDD7DB62973206600C1C4B2 /* Downloader.swift in Sources */, EBDD7DB62973206600C1C4B2 /* Downloader.swift in Sources */,
F155203429710B3600DC009B /* StatusView.swift in Sources */, F155203429710B3600DC009B /* StatusView.swift in Sources */,
EB560F0429A3C20800C0F8B8 /* Capabilities.swift in Sources */,
F15520242971093300DC009B /* Diffusion_macOSApp.swift in Sources */, F15520242971093300DC009B /* Diffusion_macOSApp.swift in Sources */,
EBDD7DB52973201800C1C4B2 /* ModelInfo.swift in Sources */, EBDD7DB52973201800C1C4B2 /* ModelInfo.swift in Sources */,
EBDD7DBD2977FFB300C1C4B2 /* GeneratedImageView.swift in Sources */, EBDD7DBD2977FFB300C1C4B2 /* GeneratedImageView.swift in Sources */,

@ -40,10 +40,11 @@ struct ModelInfo {
extension ModelInfo { extension ModelInfo {
static var defaultAttention: AttentionVariant { static var defaultAttention: AttentionVariant {
return runningOnMac ? .original : .splitEinsum guard runningOnMac else { return .splitEinsum }
guard Capabilities.hasANE else { return .original }
return Capabilities.performanceCores >= 8 ? .original : .splitEinsum
} }
// TODO: heuristics per {model, device}
var bestAttention: AttentionVariant { var bestAttention: AttentionVariant {
return ModelInfo.defaultAttention return ModelInfo.defaultAttention
} }
@ -60,7 +61,7 @@ extension ModelInfo {
} }
/// Best variant for the current platform. /// Best variant for the current platform.
/// Currently using `split_einsum` for iOS and `original` for macOS, but could vary depending on model. /// Currently using `split_einsum` for iOS and simple performance heuristics for macOS.
var bestURL: URL { modelURL(for: bestAttention) } var bestURL: URL { modelURL(for: bestAttention) }
var reduceMemory: Bool { var reduceMemory: Bool {

Loading…
Cancel
Save