coreml : use Core ML encoder inference

2 years ago · b0ac915265
parent 72af0f5697
commit b0ac915265
9 changed files with 643 additions and 24 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,7 @@
 *.o
 *.a
 *.mlmodel
 *.mlmodelc
 .cache/
 .vs/
 .vscode/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -54,6 +54,8 @@ if (APPLE)
    option(WHISPER_NO_AVX              "whisper: disable AVX" OFF)
    option(WHISPER_NO_AVX2             "whisper: disable AVX2" OFF)
    option(WHISPER_NO_FMA              "whisper: disable FMA" OFF)
    option(WHISPER_COREML              "whisper: enable Core ML framework" OFF)
 else()
    option(WHISPER_SUPPORT_OPENBLAS    "whisper: support for OpenBLAS" OFF)
 endif()
@ -86,9 +88,12 @@ endif()
 find_package(Threads REQUIRED)
-# on APPLE - include Accelerate framework
+# on APPLE
-if (APPLE AND NOT WHISPER_NO_ACCELERATE)
+if (APPLE)
    # include Accelerate framework
    if (NOT WHISPER_NO_ACCELERATE)
        find_library(ACCELERATE_FRAMEWORK Accelerate)
        if (ACCELERATE_FRAMEWORK)
            message(STATUS "Accelerate framework found")
@ -97,6 +102,20 @@ if (APPLE AND NOT WHISPER_NO_ACCELERATE)
        else()
            message(WARNING "Accelerate framework not found")
        endif()
    endif()
    if (WHISPER_COREML)
        find_library(FOUNDATION_FRAMEWORK Foundation)
        find_library(COREML_FRAMEWORK CoreML)
        if (COREML_FRAMEWORK)
            message(STATUS "CoreML framework found")
            set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
        else()
            message(WARNING "CoreML framework not found")
        endif()
    endif()
 endif()
 if (WHISPER_SUPPORT_OPENBLAS)
@ -181,6 +200,33 @@ if (WHISPER_PERF)
    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
 endif()
 #
 # whisper.coreml - Core ML support
 #
 if (WHISPER_COREML)
    set(TARGET whisper.coreml)
    add_library(${TARGET}
        coreml/whisper-encoder.h
        coreml/whisper-encoder.mm
        coreml/whisper-encoder-impl.h
        coreml/whisper-encoder-impl.m
        )
    include(DefaultTargetOptions)
    target_include_directories(${TARGET} PUBLIC
        .
        )
    target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
    set_target_properties(${TARGET} PROPERTIES
        COMPILE_FLAGS "-fobjc-arc"
        )
 endif()
 #
 # whisper - this is the main library of the project
 #
@ -200,6 +246,10 @@ target_include_directories(${TARGET} PUBLIC
    .
    )
 if (WHISPER_COREML)
    target_link_libraries(${TARGET} PRIVATE whisper.coreml)
 endif()
 if (MSVC)
    target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
--- a/44
+++ b/44
@ -132,6 +132,10 @@ ifndef WHISPER_NO_ACCELERATE
 		LDFLAGS += -framework Accelerate
 	endif
 endif
 ifdef WHISPER_COREML
 	CXXFLAGS += -DWHISPER_USE_COREML
 	LDFLAGS  += -framework Foundation -framework CoreML
 endif
 ifdef WHISPER_OPENBLAS
 	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
 	LDFLAGS += -lopenblas
@ -184,11 +188,23 @@ ggml.o: ggml.c ggml.h
 whisper.o: whisper.cpp whisper.h
 	$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
-libwhisper.a: ggml.o whisper.o
+ifndef WHISPER_COREML
-	$(AR) rcs libwhisper.a ggml.o whisper.o
+WHISPER_OBJ = whisper.o
 else
 whisper-encoder.o: coreml/whisper-encoder.mm coreml/whisper-encoder.h
 	$(CXX) -O3 -I . -c coreml/whisper-encoder.mm -o whisper-encoder.o
 whisper-encoder-impl.o: coreml/whisper-encoder-impl.m coreml/whisper-encoder-impl.h
 	$(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder-impl.m -o whisper-encoder-impl.o
 WHISPER_OBJ = whisper.o whisper-encoder.o whisper-encoder-impl.o
 endif
 libwhisper.a: ggml.o $(WHISPER_OBJ)
 	$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
-libwhisper.so: ggml.o whisper.o
+libwhisper.so: ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
 clean:
 	rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
@ -202,21 +218,21 @@ CC_SDL=`sdl2-config --cflags --libs`
 SRC_COMMON = examples/common.cpp
 SRC_COMMON_SDL = examples/common-sdl.cpp
-main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
+main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
 	./main -h
-stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
-command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
-talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
-bench: examples/bench/bench.cpp ggml.o whisper.o
+bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
 #
 # Audio samples
--- a/coreml/whisper-encoder-impl.h
+++ b/coreml/whisper-encoder-impl.h
@ -0,0 +1,142 @@
 //
 // CoremlEncoder.h
 //
 // This file was automatically generated and should not be edited.
 //
 #import <Foundation/Foundation.h>
 #import <CoreML/CoreML.h>
 #include <stdint.h>
 #include <os/log.h>
 NS_ASSUME_NONNULL_BEGIN
 /// Model Prediction Input Type
 API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface CoremlEncoderInput : NSObject<MLFeatureProvider>
 /// melSegment as 1 × 80 × 3000 3-dimensional array of floats
@property (readwrite, nonatomic, strong) MLMultiArray * melSegment;
 - (instancetype)init NS_UNAVAILABLE;
 - (instancetype)initWithMelSegment:(MLMultiArray *)melSegment NS_DESIGNATED_INITIALIZER;
@end
 /// Model Prediction Output Type
 API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface CoremlEncoderOutput : NSObject<MLFeatureProvider>
 /// output as multidimensional array of floats
@property (readwrite, nonatomic, strong) MLMultiArray * output;
 - (instancetype)init NS_UNAVAILABLE;
 - (instancetype)initWithOutput:(MLMultiArray *)output NS_DESIGNATED_INITIALIZER;
@end
 /// Class for model loading and prediction
 API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface CoremlEncoder : NSObject
@property (readonly, nonatomic, nullable) MLModel * model;
 /**
    URL of the underlying .mlmodelc directory.
 */
 + (nullable NSURL *)URLOfModelInThisBundle;
 /**
    Initialize CoremlEncoder instance from an existing MLModel object.
    Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
    Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
 */
 - (instancetype)initWithMLModel:(MLModel *)model NS_DESIGNATED_INITIALIZER;
 /**
    Initialize CoremlEncoder instance with the model in this bundle.
 */
 - (nullable instancetype)init;
 /**
    Initialize CoremlEncoder instance with the model in this bundle.
    @param configuration The model configuration object
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
 */
 - (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
 /**
    Initialize CoremlEncoder instance from the model URL.
    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
 */
 - (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error;
 /**
    Initialize CoremlEncoder instance from the model URL.
    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
    @param configuration The model configuration object
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
 */
 - (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error;
 /**
    Construct CoremlEncoder instance asynchronously with configuration.
    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
    @param configuration The model configuration
    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
 */
 + (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
 /**
    Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
    @param modelURL The model URL.
    @param configuration The model configuration
    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
 */
 + (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
 /**
    Make a prediction using the standard interface
    @param input an instance of CoremlEncoderInput to predict from
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
    @return the prediction as CoremlEncoderOutput
 */
 - (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error;
 /**
    Make a prediction using the standard interface
    @param input an instance of CoremlEncoderInput to predict from
    @param options prediction options
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
    @return the prediction as CoremlEncoderOutput
 */
 - (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
 /**
    Make a prediction using the convenience interface
    @param melSegment as 1 × 80 × 3000 3-dimensional array of floats:
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
    @return the prediction as CoremlEncoderOutput
 */
 - (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error;
 /**
    Batch prediction
    @param inputArray array of CoremlEncoderInput instances to obtain predictions from
    @param options prediction options
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
    @return the predictions as NSArray<CoremlEncoderOutput *>
 */
 - (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
@end
 NS_ASSUME_NONNULL_END
--- a/coreml/whisper-encoder-impl.m
+++ b/coreml/whisper-encoder-impl.m
@ -0,0 +1,197 @@
 //
 // CoremlEncoder.m
 //
 // This file was automatically generated and should not be edited.
 //
 #if !__has_feature(objc_arc)
 #error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
 #endif
 #import "whisper-encoder-impl.h"
@implementation CoremlEncoderInput
 - (instancetype)initWithMelSegment:(MLMultiArray *)melSegment {
    self = [super init];
    if (self) {
        _melSegment = melSegment;
    }
    return self;
 }
 - (NSSet<NSString *> *)featureNames {
    return [NSSet setWithArray:@[@"melSegment"]];
 }
 - (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
    if ([featureName isEqualToString:@"melSegment"]) {
        return [MLFeatureValue featureValueWithMultiArray:self.melSegment];
    }
    return nil;
 }
@end
@implementation CoremlEncoderOutput
 - (instancetype)initWithOutput:(MLMultiArray *)output {
    self = [super init];
    if (self) {
        _output = output;
    }
    return self;
 }
 - (NSSet<NSString *> *)featureNames {
    return [NSSet setWithArray:@[@"output"]];
 }
 - (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
    if ([featureName isEqualToString:@"output"]) {
        return [MLFeatureValue featureValueWithMultiArray:self.output];
    }
    return nil;
 }
@end
@implementation CoremlEncoder
 /**
    URL of the underlying .mlmodelc directory.
 */
 + (nullable NSURL *)URLOfModelInThisBundle {
    NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"CoremlEncoder" ofType:@"mlmodelc"];
    if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load CoremlEncoder.mlmodelc in the bundle resource"); return nil; }
    return [NSURL fileURLWithPath:assetPath];
 }
 /**
    Initialize CoremlEncoder instance from an existing MLModel object.
    Usually the application does not use this initializer unless it makes a subclass of CoremlEncoder.
    Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
 */
 - (instancetype)initWithMLModel:(MLModel *)model {
    self = [super init];
    if (!self) { return nil; }
    _model = model;
    if (_model == nil) { return nil; }
    return self;
 }
 /**
    Initialize CoremlEncoder instance with the model in this bundle.
 */
 - (nullable instancetype)init {
    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
 }
 /**
    Initialize CoremlEncoder instance with the model in this bundle.
    @param configuration The model configuration object
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
 */
 - (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
 }
 /**
    Initialize CoremlEncoder instance from the model URL.
    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
 */
 - (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
    MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
    if (model == nil) { return nil; }
    return [self initWithMLModel:model];
 }
 /**
    Initialize CoremlEncoder instance from the model URL.
    @param modelURL URL to the .mlmodelc directory for CoremlEncoder.
    @param configuration The model configuration object
    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
 */
 - (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
    MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
    if (model == nil) { return nil; }
    return [self initWithMLModel:model];
 }
 /**
    Construct CoremlEncoder instance asynchronously with configuration.
    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
    @param configuration The model configuration
    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
 */
 + (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
    [self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
              configuration:configuration
          completionHandler:handler];
 }
 /**
    Construct CoremlEncoder instance asynchronously with URL of .mlmodelc directory and optional configuration.
    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
    @param modelURL The model URL.
    @param configuration The model configuration
    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid CoremlEncoder instance or NSError object.
 */
 + (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(CoremlEncoder * _Nullable model, NSError * _Nullable error))handler {
    [MLModel loadContentsOfURL:modelURL
                 configuration:configuration
             completionHandler:^(MLModel *model, NSError *error) {
        if (model != nil) {
            CoremlEncoder *typedModel = [[CoremlEncoder alloc] initWithMLModel:model];
            handler(typedModel, nil);
        } else {
            handler(nil, error);
        }
    }];
 }
 - (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
    return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
 }
 - (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
    id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
    if (!outFeatures) { return nil; }
    return [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
 }
 - (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error {
    CoremlEncoderInput *input_ = [[CoremlEncoderInput alloc] initWithMelSegment:melSegment];
    return [self predictionFromFeatures:input_ error:error];
 }
 - (nullable NSArray<CoremlEncoderOutput *> *)predictionsFromInputs:(NSArray<CoremlEncoderInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
    id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
    id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
    if (!outBatch) { return nil; }
    NSMutableArray<CoremlEncoderOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
    for (NSInteger i = 0; i < outBatch.count; i++) {
        id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
        CoremlEncoderOutput * result = [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
        [results addObject:result];
    }
    return results;
 }
@end
--- a/coreml/whisper-encoder.h
+++ b/coreml/whisper-encoder.h
@ -0,0 +1,22 @@
 // Wrapper of the Core ML Whisper Encoder model
 //
 // Code is derived from the work of Github user @wangchou
 // ref: https://github.com/wangchou/callCoreMLFromCpp
 #if __cplusplus
 extern "C" {
 #endif
 struct whisper_coreml_context;
 struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
 void whisper_coreml_free(struct whisper_coreml_context * ctx);
 void whisper_coreml_encode(
        const whisper_coreml_context * ctx,
                               float * mel,
                               float * out);
 #if __cplusplus
 }
 #endif
--- a/coreml/whisper-encoder.mm
+++ b/coreml/whisper-encoder.mm
@ -0,0 +1,61 @@
 #import "coreml/whisper-encoder.h"
 #import "coreml/whisper-encoder-impl.h"
 #import <CoreML/CoreML.h>
 #include <stdlib.h>
 #if __cplusplus
 extern "C" {
 #endif
 struct whisper_coreml_context {
    const void * data;
 };
 struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
    NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];
    NSURL * url_model = [NSURL fileURLWithPath: path_model_str];
    const void * data = CFBridgingRetain([[CoremlEncoder alloc] initWithContentsOfURL:url_model error:nil]);
    if (data == NULL) {
        return NULL;
    }
    whisper_coreml_context * ctx = new whisper_coreml_context;
    ctx->data = data;
    return ctx;
 }
 void whisper_coreml_free(struct whisper_coreml_context * ctx) {
    CFRelease(ctx->data);
    delete ctx;
 }
 void whisper_coreml_encode(
        const whisper_coreml_context * ctx,
                               float * mel,
                               float * out) {
    MLMultiArray * inMultiArray = [
        [MLMultiArray alloc] initWithDataPointer: mel
                                           shape: @[@1, @80, @3000]
                                        dataType: MLMultiArrayDataTypeFloat32
                                         strides: @[@(240000), @(3000), @1]
                                     deallocator: nil
                                           error: nil
    ];
    CoremlEncoderOutput * outCoreML = [(__bridge id) ctx->data predictionFromMelSegment:inMultiArray error:nil];
    MLMultiArray * outMA = outCoreML.output;
    memcpy(out, outMA.dataPointer, outMA.count * sizeof(float));
 }
 #if __cplusplus
 }
 #endif
--- a/models/download-coreml-model.sh
+++ b/models/download-coreml-model.sh
@ -0,0 +1,82 @@
 #!/bin/bash
 # This script downloads Whisper model files that have already been converted to Core ML format.
 # This way you don't have to convert them yourself.
 src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
 pfx="resolve/main/ggml"
 # get the path of this script
 function get_script_path() {
    if [ -x "$(command -v realpath)" ]; then
        echo "$(dirname $(realpath $0))"
    else
        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
        echo "$ret"
    fi
 }
 models_path="$(get_script_path)"
 # Whisper models
 models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
 # list available models
 function list_models {
    printf "\n"
    printf "  Available models:"
    for model in "${models[@]}"; do
        printf " $model"
    done
    printf "\n\n"
 }
 if [ "$#" -ne 1 ]; then
    printf "Usage: $0 <model>\n"
    list_models
    exit 1
 fi
 model=$1
 if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
    printf "Invalid model: $model\n"
    list_models
    exit 1
 fi
 # download Core ML model
 printf "Downloading Core ML model $model from '$src' ...\n"
 cd $models_path
 if [ -f "ggml-$model.mlmodel" ]; then
    printf "Model $model already exists. Skipping download.\n"
    exit 0
 fi
 if [ -x "$(command -v wget)" ]; then
    wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
 elif [ -x "$(command -v curl)" ]; then
    curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
 else
    printf "Either wget or curl is required to download models.\n"
    exit 1
 fi
 if [ $? -ne 0 ]; then
    printf "Failed to download Core ML model $model \n"
    printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
    exit 1
 fi
 printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
 printf "Run the following command to compile it:\n\n"
 printf "  $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
 printf "You can now use it like this:\n\n"
 printf "  $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
 printf "\n"
--- a/whisper.cpp
+++ b/whisper.cpp
@ -1,5 +1,8 @@
 #define WHISPER_BUILD
 #include "whisper.h"
 #if WHISPER_USE_COREML
 #include "coreml/whisper-encoder.h"
 #endif
 #include "ggml.h"
@ -594,6 +597,11 @@ struct whisper_context {
    int lang_id = 0; // english by default
    std::string path_model; // populated by whisper_init_from_file()
 #ifdef WHISPER_USE_COREML
    whisper_coreml_context * ctx_coreml;
 #endif
    // [EXPERIMENTAL] token-level timestamps data
    int64_t t_beg = 0;
    int64_t t_last = 0;
@ -1696,6 +1704,9 @@ static bool whisper_encode(
    wctx.use_buf(ctx0, -1);
    // run the computation
 #ifdef WHISPER_USE_COREML
    whisper_coreml_encode(wctx.ctx_coreml, (float *) mel->data, (float *) cur->data);
 #else
    {
        struct ggml_cgraph gf = {};
        gf.n_threads = n_threads;
@ -1705,6 +1716,7 @@ static bool whisper_encode(
        //ggml_graph_print(&gf);
    }
 #endif
    // cur
    //{
@ -2507,6 +2519,20 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
 // interface implementation
 //
 #ifdef WHISPER_USE_COREML
 // replace .bin with .mlmodelc
 static std::string whisper_get_coreml_path(std::string path_bin) {
    auto pos = path_bin.rfind('.');
    if (pos != std::string::npos) {
        path_bin = path_bin.substr(0, pos);
    }
    path_bin += ".mlmodelc";
    return path_bin;
 }
 #endif
 struct whisper_context * whisper_init_from_file(const char * path_model) {
    whisper_model_loader loader = {};
@ -2519,6 +2545,7 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
    }
    loader.context = &fin;
    loader.read = [](void * ctx, void * output, size_t read_size) {
        std::ifstream * fin = (std::ifstream*)ctx;
        fin->read((char *)output, read_size);
@ -2535,7 +2562,23 @@ struct whisper_context * whisper_init_from_file(const char * path_model) {
        fin->close();
    };
-    return whisper_init(&loader);
+    auto ctx = whisper_init(&loader);
    if (ctx) {
        ctx->path_model = path_model;
 #ifdef WHISPER_USE_COREML
        const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
        fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
        ctx->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
        if (!ctx->ctx_coreml) {
            fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
            return nullptr;
        }
 #endif
    }
    return ctx;
 }
 struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
@ -2607,6 +2650,10 @@ void whisper_free(struct whisper_context * ctx) {
                ggml_free(ctx->decoders[i].kv_self.ctx);
            }
        }
 #ifdef WHISPER_USE_COREML
        whisper_coreml_free(ctx->ctx_coreml);
        ctx->ctx_coreml = nullptr;
 #endif
        delete ctx;
    }
 }