From b0ac915265ce60409fef79fbf1d8862fa3929d7d Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sun, 5 Mar 2023 11:11:51 +0200
Subject: [PATCH] coreml : use Core ML encoder inference

---
 .gitignore                      |   2 +
 CMakeLists.txt                  |  68 +++++++++--
 Makefile                        |  44 ++++---
 coreml/whisper-encoder-impl.h   | 142 +++++++++++++++++++++++
 coreml/whisper-encoder-impl.m   | 197 ++++++++++++++++++++++++++++++++
 coreml/whisper-encoder.h        |  22 ++++
 coreml/whisper-encoder.mm       |  61 ++++++++++
 models/download-coreml-model.sh |  82 +++++++++++++
 whisper.cpp                     |  49 +++++++-
 9 files changed, 643 insertions(+), 24 deletions(-)
 create mode 100644 coreml/whisper-encoder-impl.h
 create mode 100644 coreml/whisper-encoder-impl.m
 create mode 100644 coreml/whisper-encoder.h
 create mode 100644 coreml/whisper-encoder.mm
 create mode 100755 models/download-coreml-model.sh

diff --git a/.gitignore b/.gitignore
index 2dae328..402ca15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 *.o
 *.a
+*.mlmodel
+*.mlmodelc
 .cache/
 .vs/
 .vscode/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 476ceb8..422d7f0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,6 +54,8 @@ if (APPLE)
     option(WHISPER_NO_AVX              "whisper: disable AVX" OFF)
     option(WHISPER_NO_AVX2             "whisper: disable AVX2" OFF)
     option(WHISPER_NO_FMA              "whisper: disable FMA" OFF)
+
+    option(WHISPER_COREML              "whisper: enable Core ML framework" OFF)
 else()
     option(WHISPER_SUPPORT_OPENBLAS    "whisper: support for OpenBLAS" OFF)
 endif()
@@ -86,16 +88,33 @@ endif()
 
 find_package(Threads REQUIRED)
 
-# on APPLE - include Accelerate framework
-if (APPLE AND NOT WHISPER_NO_ACCELERATE)
-    find_library(ACCELERATE_FRAMEWORK Accelerate)
-    if (ACCELERATE_FRAMEWORK)
-        message(STATUS "Accelerate framework found")
+# on APPLE
+if (APPLE)
+    # include Accelerate framework
+    if (NOT WHISPER_NO_ACCELERATE)
+        find_library(ACCELERATE_FRAMEWORK Accelerate)
+
+        if (ACCELERATE_FRAMEWORK)
+            message(STATUS "Accelerate framework found")
 
-        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${ACCELERATE_FRAMEWORK})
-        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
-    else()
-        message(WARNING "Accelerate framework not found")
+            set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${ACCELERATE_FRAMEWORK})
+            set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
+        else()
+            message(WARNING "Accelerate framework not found")
+        endif()
+    endif()
+
+    if (WHISPER_COREML)
+        find_library(FOUNDATION_FRAMEWORK Foundation)
+        find_library(COREML_FRAMEWORK CoreML)
+
+        if (COREML_FRAMEWORK)
+            message(STATUS "CoreML framework found")
+
+            set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
+        else()
+            message(WARNING "CoreML framework not found")
+        endif()
     endif()
 endif()
 
@@ -181,6 +200,33 @@ if (WHISPER_PERF)
     set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
 endif()
 
+#
+# whisper.coreml - Core ML support
+#
+
+if (WHISPER_COREML)
+    set(TARGET whisper.coreml)
+
+    add_library(${TARGET}
+        coreml/whisper-encoder.h
+        coreml/whisper-encoder.mm
+        coreml/whisper-encoder-impl.h
+        coreml/whisper-encoder-impl.m
+        )
+
+    include(DefaultTargetOptions)
+
+    target_include_directories(${TARGET} PUBLIC
+        .
+        )
+
+    target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
+
+    set_target_properties(${TARGET} PROPERTIES
+        COMPILE_FLAGS "-fobjc-arc"
+        )
+endif()
+
 #
 # whisper - this is the main library of the project
 #
@@ -200,6 +246,10 @@ target_include_directories(${TARGET} PUBLIC
     .
     )
 
+if (WHISPER_COREML)
+    target_link_libraries(${TARGET} PRIVATE whisper.coreml)
+endif()
+
 if (MSVC)
     target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
 
diff --git a/Makefile b/Makefile
index 15094a4..441bfcf 100644
--- a/Makefile
+++ b/Makefile
@@ -132,6 +132,10 @@ ifndef WHISPER_NO_ACCELERATE
 		LDFLAGS += -framework Accelerate
 	endif
 endif
+ifdef WHISPER_COREML
+	CXXFLAGS += -DWHISPER_USE_COREML
+	LDFLAGS  += -framework Foundation -framework CoreML
+endif
 ifdef WHISPER_OPENBLAS
 	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
 	LDFLAGS += -lopenblas
@@ -184,11 +188,23 @@ ggml.o: ggml.c ggml.h
 whisper.o: whisper.cpp whisper.h
 	$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
 
-libwhisper.a: ggml.o whisper.o
-	$(AR) rcs libwhisper.a ggml.o whisper.o
+ifndef WHISPER_COREML
+WHISPER_OBJ = whisper.o
+else
+whisper-encoder.o: coreml/whisper-encoder.mm coreml/whisper-encoder.h
+	$(CXX) -O3 -I . -c coreml/whisper-encoder.mm -o whisper-encoder.o
+
+whisper-encoder-impl.o: coreml/whisper-encoder-impl.m coreml/whisper-encoder-impl.h
+	$(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder-impl.m -o whisper-encoder-impl.o
+
+WHISPER_OBJ = whisper.o whisper-encoder.o whisper-encoder-impl.o
+endif
+
+libwhisper.a: ggml.o $(WHISPER_OBJ)
+	$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
 
-libwhisper.so: ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
+libwhisper.so: ggml.o $(WHISPER_OBJ)
+	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
 
 clean:
 	rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
@@ -202,21 +218,21 @@ CC_SDL=`sdl2-config --cflags --libs`
 SRC_COMMON = examples/common.cpp
 SRC_COMMON_SDL = examples/common-sdl.cpp
 
-main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
+main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
 	./main -h
 
-stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
+stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
 
-command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
+command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
 
-talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
+talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
+	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
 
-bench: examples/bench/bench.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
+bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
 
 #
 # Audio samples
diff --git a/coreml/whisper-encoder-impl.h b/coreml/whisper-encoder-impl.h
new file mode 100644
index 0000000..9395acb
--- /dev/null
+++ b/coreml/whisper-encoder-impl.h
@@ -0,0 +1,142 @@
+//
+// CoremlEncoder.h
+//
+// This file was automatically generated and should not be edited.
+//
+
+#import <Foundation/Foundation.h>
+#import <CoreML/CoreML.h>
+#include <stdint.h>
+#include <os/log.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+
+/// Model Prediction Input Type
+API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
+@interface CoremlEncoderInput : NSObject<MLFeatureProvider>
+
+/// melSegment as 1 × 80 × 3000 3-dimensional array of floats
+@property (readwrite, nonatomic, strong) MLMultiArray * melSegment;
+- (instancetype)init NS_UNAVAILABLE;
+- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment NS_DESIGNATED_INITIALIZER;
+
+@end
+
+
+/// Model Prediction Output Type
+API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
+@interface CoremlEncoderOutput : NSObject<MLFeatureProvider>
+
+/// output as multidimensional array of floats
+@property (readwrite, nonatomic, strong) MLMultiArray * output;
+- (instancetype)init NS_UNAVAILABLE;
+- (instancetype)initWithOutput:(MLMultiArray *)output NS_DESIGNATED_INITIALIZER;
+
+@end
+
+
+/// Class for model loading and prediction
+API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
+@interface CoremlEncoder : NSObject
+@property (readonly, nonatomic, nullable) MLModel * model;
+
+/**
+    URL of the underlying .mlmodelc directory.
+*/
++ (nullable NSURL *)URLOfModelInThisBundle;
+
+/**
+    Initialize CoremlEncoder instance from an existing MLModel object.
+
+    Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
+    Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
+*/
+- (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
+
+/**
+    Initialize CoremlEncoder instance with the model in this bundle.
+*/
+- (nullable instancetype)init;
+
+/**
+    Initialize CoremlEncoder instance with the model in this bundle.
+
+    @param configuration The model configuration object
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+    Initialize CoremlEncoder instance from the model URL.
+
+    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+    Initialize CoremlEncoder instance from the model URL.
+
+    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
+    @param configuration The model configuration object
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+    Construct CoremlEncoder instance asynchronously with configuration.
+    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+    @param configuration The model configuration
+    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
+*/
++ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
+
+/**
+    Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
+
+    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+    @param modelURL The model URL.
+    @param configuration The model configuration
+    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
+*/
++ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
+
+/**
+    Make a prediction using the standard interface
+    @param input an instance of CoremlEncoderInput to predict from
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+    @return the prediction as CoremlEncoderOutput
+*/
+- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+    Make a prediction using the standard interface
+    @param input an instance of CoremlEncoderInput to predict from
+    @param options prediction options
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+    @return the prediction as CoremlEncoderOutput
+*/
+- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+    Make a prediction using the convenience interface
+    @param melSegment as 1 × 80 × 3000 3-dimensional array of floats:
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+    @return the prediction as CoremlEncoderOutput
+*/
+- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+
+/**
+    Batch prediction
+    @param inputArray array of CoremlEncoderInput instances to obtain predictions from
+    @param options prediction options
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+    @return the predictions as NSArray<CoremlEncoderOutput *>
+*/
+- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/coreml/whisper-encoder-impl.m b/coreml/whisper-encoder-impl.m
new file mode 100644
index 0000000..9d3a08b
--- /dev/null
+++ b/coreml/whisper-encoder-impl.m
@@ -0,0 +1,197 @@
+//
+// CoremlEncoder.m
+//
+// This file was automatically generated and should not be edited.
+//
+
+#if !__has_feature(objc_arc)
+#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
+#endif
+
+#import "whisper-encoder-impl.h"
+
+@implementation CoremlEncoderInput
+
+- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment {
+    self = [super init];
+    if (self) {
+        _melSegment = melSegment;
+    }
+    return self;
+}
+
+- (NSSet<NSString *> *)featureNames {
+    return [NSSet setWithArray:@[@"melSegment"]];
+}
+
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+    if ([featureName isEqualToString:@"melSegment"]) {
+        return [MLFeatureValue featureValueWithMultiArray:self.melSegment];
+    }
+    return nil;
+}
+
+@end
+
+@implementation CoremlEncoderOutput
+
+- (instancetype)initWithOutput:(MLMultiArray *)output {
+    self = [super init];
+    if (self) {
+        _output = output;
+    }
+    return self;
+}
+
+- (NSSet<NSString *> *)featureNames {
+    return [NSSet setWithArray:@[@"output"]];
+}
+
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+    if ([featureName isEqualToString:@"output"]) {
+        return [MLFeatureValue featureValueWithMultiArray:self.output];
+    }
+    return nil;
+}
+
+@end
+
+@implementation CoremlEncoder
+
+
+/**
+    URL of the underlying .mlmodelc directory.
+*/
++ (nullable NSURL *)URLOfModelInThisBundle {
+    NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"CoremlEncoder" ofType:@"mlmodelc"];
+    if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load CoremlEncoder.mlmodelc in the bundle resource"); return nil; }
+    return [NSURL fileURLWithPath:assetPath];
+}
+
+
+/**
+    Initialize CoremlEncoder instance from an existing MLModel object.
+
+    Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
+    Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
+*/
+- (instancetype)initWithMLModel:(MLModel *)model {
+    self = [super init];
+    if (!self) { return nil; }
+    _model = model;
+    if (_model == nil) { return nil; }
+    return self;
+}
+
+
+/**
+    Initialize CoremlEncoder instance with the model in this bundle.
+*/
+- (nullable instancetype)init {
+    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
+}
+
+
+/**
+    Initialize CoremlEncoder instance with the model in this bundle.
+
+    @param configuration The model configuration object
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
+}
+
+
+/**
+    Initialize CoremlEncoder instance from the model URL.
+
+    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
+    if (model == nil) { return nil; }
+    return [self initWithMLModel:model];
+}
+
+
+/**
+    Initialize CoremlEncoder instance from the model URL.
+
+    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
+    @param configuration The model configuration object
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
+    if (model == nil) { return nil; }
+    return [self initWithMLModel:model];
+}
+
+
+/**
+    Construct CoremlEncoder instance asynchronously with configuration.
+    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+    @param configuration The model configuration
+    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
+*/
++ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
+    [self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
+              configuration:configuration
+          completionHandler:handler];
+}
+
+
+/**
+    Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
+
+    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+
+    @param modelURL The model URL.
+    @param configuration The model configuration
+    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
+*/
++ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
+    [MLModel loadContentsOfURL:modelURL
+                 configuration:configuration
+             completionHandler:^(MLModel *model, NSError *error) {
+        if (model != nil) {
+            CoremlEncoder *typedModel = [[CoremlEncoder alloc] initWithMLModel:model];
+            handler(typedModel, nil);
+        } else {
+            handler(nil, error);
+        }
+    }];
+}
+
+- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
+}
+
+- (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
+    if (!outFeatures) { return nil; }
+    return [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
+}
+
+- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    CoremlEncoderInput *input_ = [[CoremlEncoderInput alloc] initWithMelSegment:melSegment];
+    return [self predictionFromFeatures:input_ error:error];
+}
+
+- (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
+    id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
+    if (!outBatch) { return nil; }
+    NSMutableArray<CoremlEncoderOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
+    for (NSInteger i = 0; i < outBatch.count; i++) {
+        id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
+        CoremlEncoderOutput * result = [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
+        [results addObject:result];
+    }
+    return results;
+}
+
+@end
diff --git a/coreml/whisper-encoder.h b/coreml/whisper-encoder.h
new file mode 100644
index 0000000..84bbe41
--- /dev/null
+++ b/coreml/whisper-encoder.h
@@ -0,0 +1,22 @@
+// Wrapper of the Core ML Whisper Encoder model
+//
+// Code is derived from the work of Github user @wangchou
+// ref: https://github.com/wangchou/callCoreMLFromCpp
+
+#if __cplusplus
+extern "C" {
+#endif
+
+struct whisper_coreml_context;
+
+struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
+void whisper_coreml_free(struct whisper_coreml_context * ctx);
+
+void whisper_coreml_encode(
+        const whisper_coreml_context * ctx,
+                               float * mel,
+                               float * out);
+
+#if __cplusplus
+}
+#endif
diff --git a/coreml/whisper-encoder.mm b/coreml/whisper-encoder.mm
new file mode 100644
index 0000000..09091c2
--- /dev/null
+++ b/coreml/whisper-encoder.mm
@@ -0,0 +1,61 @@
+#import "coreml/whisper-encoder.h"
+#import "coreml/whisper-encoder-impl.h"
+
+#import <CoreML/CoreML.h>
+
+#include <stdlib.h>
+
+#if __cplusplus
+extern "C" {
+#endif
+
+struct whisper_coreml_context {
+    const void * data;
+};
+
+struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
+    NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];
+
+    NSURL * url_model = [NSURL fileURLWithPath: path_model_str];
+
+    const void * data = CFBridgingRetain([[CoremlEncoder alloc] initWithContentsOfURL:url_model error:nil]);
+
+    if (data == NULL) {
+        return NULL;
+    }
+
+    whisper_coreml_context * ctx = new whisper_coreml_context;
+
+    ctx->data = data;
+
+    return ctx;
+}
+
+void whisper_coreml_free(struct whisper_coreml_context * ctx) {
+    CFRelease(ctx->data);
+    delete ctx;
+}
+
+void whisper_coreml_encode(
+        const whisper_coreml_context * ctx,
+                               float * mel,
+                               float * out) {
+    MLMultiArray * inMultiArray = [
+        [MLMultiArray alloc] initWithDataPointer: mel
+                                           shape: @[@1, @80, @3000]
+                                        dataType: MLMultiArrayDataTypeFloat32
+                                         strides: @[@(240000), @(3000), @1]
+                                     deallocator: nil
+                                           error: nil
+    ];
+
+    CoremlEncoderOutput * outCoreML = [(__bridge id) ctx->data predictionFromMelSegment:inMultiArray error:nil];
+
+    MLMultiArray * outMA = outCoreML.output;
+
+    memcpy(out, outMA.dataPointer, outMA.count * sizeof(float));
+}
+
+#if __cplusplus
+}
+#endif
diff --git a/models/download-coreml-model.sh b/models/download-coreml-model.sh
new file mode 100755
index 0000000..d46789d
--- /dev/null
+++ b/models/download-coreml-model.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# This script downloads Whisper model files that have already been converted to Core ML format.
+# This way you don't have to convert them yourself.
+
+src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
+pfx="resolve/main/ggml"
+
+# get the path of this script
+function get_script_path() {
+    if [ -x "$(command -v realpath)" ]; then
+        echo "$(dirname $(realpath $0))"
+    else
+        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
+        echo "$ret"
+    fi
+}
+
+models_path="$(get_script_path)"
+
+# Whisper models
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
+
+# list available models
+function list_models {
+    printf "\n"
+    printf "  Available models:"
+    for model in "${models[@]}"; do
+        printf " $model"
+    done
+    printf "\n\n"
+}
+
+if [ "$#" -ne 1 ]; then
+    printf "Usage: $0 <model>\n"
+    list_models
+
+    exit 1
+fi
+
+model=$1
+
+if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
+    printf "Invalid model: $model\n"
+    list_models
+
+    exit 1
+fi
+
+# download Core ML model
+
+printf "Downloading Core ML model $model from '$src' ...\n"
+
+cd $models_path
+
+if [ -f "ggml-$model.mlmodel" ]; then
+    printf "Model $model already exists. Skipping download.\n"
+    exit 0
+fi
+
+if [ -x "$(command -v wget)" ]; then
+    wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
+elif [ -x "$(command -v curl)" ]; then
+    curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
+else
+    printf "Either wget or curl is required to download models.\n"
+    exit 1
+fi
+
+
+if [ $? -ne 0 ]; then
+    printf "Failed to download Core ML model $model \n"
+    printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
+    exit 1
+fi
+
+printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
+printf "Run the following command to compile it:\n\n"
+printf "  $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
+printf "You can now use it like this:\n\n"
+printf "  $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
+printf "\n"
diff --git a/whisper.cpp b/whisper.cpp
index 3a21581..ab242d7 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -1,5 +1,8 @@
 #define WHISPER_BUILD
 #include "whisper.h"
+#if WHISPER_USE_COREML
+#include "coreml/whisper-encoder.h"
+#endif
 
 #include "ggml.h"
 
@@ -594,6 +597,11 @@ struct whisper_context {
 
     int lang_id = 0; // english by default
 
+    std::string path_model; // populated by whisper_init_from_file()
+#ifdef WHISPER_USE_COREML
+    whisper_coreml_context * ctx_coreml;
+#endif
+
     // [EXPERIMENTAL] token-level timestamps data
     int64_t t_beg = 0;
     int64_t t_last = 0;
@@ -1696,6 +1704,9 @@ static bool whisper_encode(
     wctx.use_buf(ctx0, -1);
 
     // run the computation
+#ifdef WHISPER_USE_COREML
+    whisper_coreml_encode(wctx.ctx_coreml, (float *) mel->data, (float *) cur->data);
+#else
     {
         struct ggml_cgraph gf = {};
         gf.n_threads = n_threads;
@@ -1705,6 +1716,7 @@ static bool whisper_encode(
 
         //ggml_graph_print(&gf);
     }
+#endif
 
     // cur
     //{
@@ -2507,6 +2519,20 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
 // interface implementation
 //
 
+#ifdef WHISPER_USE_COREML
+// replace .bin with .mlmodelc
+static std::string whisper_get_coreml_path(std::string path_bin) {
+    auto pos = path_bin.rfind('.');
+    if (pos != std::string::npos) {
+        path_bin = path_bin.substr(0, pos);
+    }
+
+    path_bin += ".mlmodelc";
+
+    return path_bin;
+}
+#endif
+
 struct whisper_context * whisper_init_from_file(const char * path_model) {
     whisper_model_loader loader = {};
 
@@ -2519,6 +2545,7 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
     }
 
     loader.context = &fin;
+
     loader.read = [](void * ctx, void * output, size_t read_size) {
         std::ifstream * fin = (std::ifstream*)ctx;
         fin->read((char *)output, read_size);
@@ -2535,7 +2562,23 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
         fin->close();
     };
 
-    return whisper_init(&loader);
+    auto ctx = whisper_init(&loader);
+
+    if (ctx) {
+        ctx->path_model = path_model;
+#ifdef WHISPER_USE_COREML
+        const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
+        fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
+
+        ctx->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
+        if (!ctx->ctx_coreml) {
+            fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
+            return nullptr;
+        }
+#endif
+    }
+
+    return ctx;
 }
 
 struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
@@ -2607,6 +2650,10 @@ void whisper_free(struct whisper_context * ctx) {
                 ggml_free(ctx->decoders[i].kv_self.ctx);
             }
         }
+#ifdef WHISPER_USE_COREML
+        whisper_coreml_free(ctx->ctx_coreml);
+        ctx->ctx_coreml = nullptr;
+#endif
         delete ctx;
     }
 }