diff --git a/bindings/go/whisper.go b/bindings/go/whisper.go index 9381879..8d12fed 100644 --- a/bindings/go/whisper.go +++ b/bindings/go/whisper.go @@ -91,7 +91,7 @@ var ( func Whisper_init(path string) *Context { cPath := C.CString(path) defer C.free(unsafe.Pointer(cPath)) - if ctx := C.whisper_init(cPath); ctx != nil { + if ctx := C.whisper_init_from_file(cPath); ctx != nil { return (*Context)(ctx) } else { return nil diff --git a/bindings/javascript/emscripten.cpp b/bindings/javascript/emscripten.cpp index bda9630..789ad8b 100644 --- a/bindings/javascript/emscripten.cpp +++ b/bindings/javascript/emscripten.cpp @@ -20,7 +20,7 @@ struct whisper_context * g_context; EMSCRIPTEN_BINDINGS(whisper) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { if (g_context == nullptr) { - g_context = whisper_init(path_model.c_str()); + g_context = whisper_init_from_file(path_model.c_str()); if (g_context != nullptr) { return true; } else { diff --git a/examples/bench.wasm/emscripten.cpp b/examples/bench.wasm/emscripten.cpp index 2e63315..9594147 100644 --- a/examples/bench.wasm/emscripten.cpp +++ b/examples/bench.wasm/emscripten.cpp @@ -52,7 +52,7 @@ EMSCRIPTEN_BINDINGS(bench) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { for (size_t i = 0; i < g_contexts.size(); ++i) { if (g_contexts[i] == nullptr) { - g_contexts[i] = whisper_init(path_model.c_str()); + g_contexts[i] = whisper_init_from_file(path_model.c_str()); if (g_contexts[i] != nullptr) { if (g_worker.joinable()) { g_worker.join(); diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 3ab5077..2fd2423 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -53,7 +53,7 @@ int main(int argc, char ** argv) { // whisper init - struct whisper_context * ctx = whisper_init(params.model.c_str()); + struct whisper_context * ctx = whisper_init_from_file(params.model.c_str()); { fprintf(stderr, "\n"); diff --git a/examples/command.wasm/emscripten.cpp b/examples/command.wasm/emscripten.cpp index d4bbb21..f2ba81e 100644 --- a/examples/command.wasm/emscripten.cpp +++ b/examples/command.wasm/emscripten.cpp @@ -324,7 +324,7 @@ EMSCRIPTEN_BINDINGS(command) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { for (size_t i = 0; i < g_contexts.size(); ++i) { if (g_contexts[i] == nullptr) { - g_contexts[i] = whisper_init(path_model.c_str()); + g_contexts[i] = whisper_init_from_file(path_model.c_str()); if (g_contexts[i] != nullptr) { g_running = true; if (g_worker.joinable()) { diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 4558a67..3dae3a5 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -931,7 +931,7 @@ int main(int argc, char ** argv) { // whisper init - struct whisper_context * ctx = whisper_init(params.model.c_str()); + struct whisper_context * ctx = whisper_init_from_file(params.model.c_str()); // print some info about the processing { diff --git a/examples/main/main.cpp b/examples/main/main.cpp index d387a77..48e0292 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -478,7 +478,7 @@ int main(int argc, char ** argv) { // whisper init - struct whisper_context * ctx = whisper_init(params.model.c_str()); + struct whisper_context * ctx = whisper_init_from_file(params.model.c_str()); if (ctx == nullptr) { fprintf(stderr, "error: failed to initialize whisper context\n"); diff --git a/examples/stream.wasm/emscripten.cpp b/examples/stream.wasm/emscripten.cpp index b75eee3..e4cdf63 100644 --- a/examples/stream.wasm/emscripten.cpp +++ b/examples/stream.wasm/emscripten.cpp @@ -129,7 +129,7 @@ EMSCRIPTEN_BINDINGS(stream) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { for (size_t i = 0; i < g_contexts.size(); ++i) { if (g_contexts[i] == nullptr) { - g_contexts[i] = whisper_init(path_model.c_str()); + g_contexts[i] = whisper_init_from_file(path_model.c_str()); if (g_contexts[i] != nullptr) { g_running = true; if (g_worker.joinable()) { diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 9caa614..c7aa871 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -456,7 +456,7 @@ int main(int argc, char ** argv) { exit(0); } - struct whisper_context * ctx = whisper_init(params.model.c_str()); + struct whisper_context * ctx = whisper_init_from_file(params.model.c_str()); std::vector pcmf32 (n_samples_30s, 0.0f); std::vector pcmf32_old(n_samples_30s, 0.0f); diff --git a/examples/talk.wasm/emscripten.cpp b/examples/talk.wasm/emscripten.cpp index c82f469..1ea9702 100644 --- a/examples/talk.wasm/emscripten.cpp +++ b/examples/talk.wasm/emscripten.cpp @@ -271,7 +271,7 @@ EMSCRIPTEN_BINDINGS(talk) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { for (size_t i = 0; i < g_contexts.size(); ++i) { if (g_contexts[i] == nullptr) { - g_contexts[i] = whisper_init(path_model.c_str()); + g_contexts[i] = whisper_init_from_file(path_model.c_str()); if (g_contexts[i] != nullptr) { g_running = true; if (g_worker.joinable()) { diff --git a/examples/talk/talk.cpp b/examples/talk/talk.cpp index ec57a95..55cd46a 100644 --- a/examples/talk/talk.cpp +++ b/examples/talk/talk.cpp @@ -498,7 +498,7 @@ int main(int argc, char ** argv) { // whisper init - struct whisper_context * ctx_wsp = whisper_init(params.model_wsp.c_str()); + struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str()); // gpt init diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt index 8664440..d741748 100644 --- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt @@ -64,16 +64,22 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { private suspend fun copyAssets() = withContext(Dispatchers.IO) { modelsPath.mkdirs() samplesPath.mkdirs() - application.copyData("models", modelsPath, ::printMessage) + //application.copyData("models", modelsPath, ::printMessage) application.copyData("samples", samplesPath, ::printMessage) printMessage("All data copied to working directory.\n") } private suspend fun loadBaseModel() = withContext(Dispatchers.IO) { printMessage("Loading model...\n") - val firstModel = modelsPath.listFiles()!!.first() - whisperContext = WhisperContext.createContext(firstModel.absolutePath) - printMessage("Loaded model ${firstModel.name}.\n") + val models = application.assets.list("models/") + if (models != null) { + val inputstream = application.assets.open("models/" + models[0]) + whisperContext = WhisperContext.createContextFromInputStream(inputstream) + printMessage("Loaded model ${models[0]}.\n") + } + + //val firstModel = modelsPath.listFiles()!!.first() + //whisperContext = WhisperContext.createContextFromFile(firstModel.absolutePath) } fun transcribeSample() = viewModelScope.launch { diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt index a6dfdcc..edd041a 100644 --- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt @@ -4,6 +4,7 @@ import android.os.Build import android.util.Log import kotlinx.coroutines.* import java.io.File +import java.io.InputStream import java.util.concurrent.Executors private const val LOG_TAG = "LibWhisper" @@ -39,13 +40,22 @@ class WhisperContext private constructor(private var ptr: Long) { } companion object { - fun createContext(filePath: String): WhisperContext { + fun createContextFromFile(filePath: String): WhisperContext { val ptr = WhisperLib.initContext(filePath) if (ptr == 0L) { throw java.lang.RuntimeException("Couldn't create context with path $filePath") } return WhisperContext(ptr) } + + fun createContextFromInputStream(stream: InputStream): WhisperContext { + val ptr = WhisperLib.initContextFromInputStream(stream) + + if (ptr == 0L) { + throw java.lang.RuntimeException("Couldn't create context from input stream") + } + return WhisperContext(ptr) + } } } @@ -76,6 +86,7 @@ private class WhisperLib { } // JNI methods + external fun initContextFromInputStream(inputStream: InputStream): Long external fun initContext(modelPath: String): Long external fun freeContext(contextPtr: Long) external fun fullTranscribe(contextPtr: Long, audioData: FloatArray) diff --git a/examples/whisper.android/app/src/main/jni/whisper/jni.c b/examples/whisper.android/app/src/main/jni/whisper/jni.c index e3fe695..0fd2897 100644 --- a/examples/whisper.android/app/src/main/jni/whisper/jni.c +++ b/examples/whisper.android/app/src/main/jni/whisper/jni.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "whisper.h" #define UNUSED(x) (void)(x) @@ -17,13 +18,86 @@ static inline int max(int a, int b) { return (a > b) ? a : b; } +struct input_stream_context { + size_t offset; + JNIEnv * env; + jobject thiz; + jobject input_stream; + + jmethodID mid_available; + jmethodID mid_read; +}; + +size_t inputStreamRead(void * ctx, void * output, size_t read_size) { + struct input_stream_context* is = (struct input_stream_context*)ctx; + + jint avail_size = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available); + jint size_to_copy = read_size < avail_size ? (jint)read_size : avail_size; + + jbyteArray byte_array = (*is->env)->NewByteArray(is->env, size_to_copy); + + jint n_read = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_read, byte_array, 0, size_to_copy); + + if (size_to_copy != read_size || size_to_copy != n_read) { + LOGI("Insufficient Read: Req=%zu, ToCopy=%d, Available=%d", read_size, size_to_copy, n_read); + } + + jbyte* byte_array_elements = (*is->env)->GetByteArrayElements(is->env, byte_array, NULL); + memcpy(output, byte_array_elements, size_to_copy); + (*is->env)->ReleaseByteArrayElements(is->env, byte_array, byte_array_elements, JNI_ABORT); + + (*is->env)->DeleteLocalRef(is->env, byte_array); + + is->offset += size_to_copy; + + return size_to_copy; +} +bool inputStreamEof(void * ctx) { + struct input_stream_context* is = (struct input_stream_context*)ctx; + + jint result = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available); + return result <= 0; +} +void inputStreamClose(void * ctx) { + +} + +JNIEXPORT jlong JNICALL +Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromInputStream( + JNIEnv *env, jobject thiz, jobject input_stream) { + UNUSED(thiz); + + struct whisper_context *context = NULL; + struct whisper_model_loader loader = {}; + struct input_stream_context inp_ctx = {}; + + inp_ctx.offset = 0; + inp_ctx.env = env; + inp_ctx.thiz = thiz; + inp_ctx.input_stream = input_stream; + + jclass cls = (*env)->GetObjectClass(env, input_stream); + inp_ctx.mid_available = (*env)->GetMethodID(env, cls, "available", "()I"); + inp_ctx.mid_read = (*env)->GetMethodID(env, cls, "read", "([BII)I"); + + loader.context = &inp_ctx; + loader.read = inputStreamRead; + loader.eof = inputStreamEof; + loader.close = inputStreamClose; + + loader.eof(loader.context); + + context = whisper_init(&loader); + return (jlong) context; +} + JNIEXPORT jlong JNICALL Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext( JNIEnv *env, jobject thiz, jstring model_path_str) { UNUSED(thiz); struct whisper_context *context = NULL; const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL); - context = whisper_init(model_path_chars); + context = whisper_init_from_file(model_path_chars); (*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars); return (jlong) context; } diff --git a/examples/whisper.android/local.properties b/examples/whisper.android/local.properties deleted file mode 100644 index cd5e215..0000000 --- a/examples/whisper.android/local.properties +++ /dev/null @@ -1,10 +0,0 @@ -## This file is automatically generated by Android Studio. -# Do not modify this file -- YOUR CHANGES WILL BE ERASED! -# -# This file should *NOT* be checked into Version Control Systems, -# as it contains information specific to your local configuration. -# -# Location of the SDK. This is only used by Gradle. -# For customization when using a Version Control System, please read the -# header note. -sdk.dir=/Users/kevin/Library/Android/sdk \ No newline at end of file diff --git a/examples/whisper.objc/whisper.objc/ViewController.m b/examples/whisper.objc/whisper.objc/ViewController.m index d6aef36..8a1e876 100644 --- a/examples/whisper.objc/whisper.objc/ViewController.m +++ b/examples/whisper.objc/whisper.objc/ViewController.m @@ -61,7 +61,7 @@ void AudioInputCallback(void * inUserData, NSLog(@"Loading model from %@", modelPath); // create ggml context - stateInp.ctx = whisper_init([modelPath UTF8String]); + stateInp.ctx = whisper_init_from_file([modelPath UTF8String]); // check if the model was loaded successfully if (stateInp.ctx == NULL) { diff --git a/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift b/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift index 9adfb42..e9645b3 100644 --- a/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift +++ b/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift @@ -55,7 +55,7 @@ actor WhisperContext { } static func createContext(path: String) throws -> WhisperContext { - let context = whisper_init(path) + let context = whisper_init_from_file(path) if let context { return WhisperContext(context: context) } else { diff --git a/examples/whisper.wasm/emscripten.cpp b/examples/whisper.wasm/emscripten.cpp index 33ae0a1..f92d814 100644 --- a/examples/whisper.wasm/emscripten.cpp +++ b/examples/whisper.wasm/emscripten.cpp @@ -18,7 +18,7 @@ EMSCRIPTEN_BINDINGS(whisper) { for (size_t i = 0; i < g_contexts.size(); ++i) { if (g_contexts[i] == nullptr) { - g_contexts[i] = whisper_init(path_model.c_str()); + g_contexts[i] = whisper_init_from_file(path_model.c_str()); if (g_contexts[i] != nullptr) { return i + 1; } else { diff --git a/whisper.cpp b/whisper.cpp index e8d9f0c..433b735 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -437,8 +437,8 @@ struct whisper_context { }; template -static void read_safe(std::ifstream& fin, T& dest) { - fin.read((char*)& dest, sizeof(T)); +static void read_safe(whisper_model_loader * loader, T & dest) { + loader->read(loader->context, &dest, sizeof(T)); } // load the model from a ggml file @@ -452,24 +452,18 @@ static void read_safe(std::ifstream& fin, T& dest) { // // see the convert-pt-to-ggml.py script for details // -static bool whisper_model_load(const std::string & fname, whisper_context & wctx) { - fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str()); +static bool whisper_model_load(struct whisper_model_loader * loader, whisper_context & wctx) { + fprintf(stderr, "%s: loading model\n", __func__); auto & model = wctx.model; auto & vocab = wctx.vocab; - auto fin = std::ifstream(fname, std::ios::binary); - if (!fin) { - fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); - return false; - } - // verify magic { uint32_t magic; - read_safe(fin, magic); + read_safe(loader, magic); if (magic != 0x67676d6c) { - fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); + fprintf(stderr, "%s: invalid model data (bad magic)\n", __func__); return false; } } @@ -478,17 +472,17 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx { auto & hparams = model.hparams; - read_safe(fin, hparams.n_vocab); - read_safe(fin, hparams.n_audio_ctx); - read_safe(fin, hparams.n_audio_state); - read_safe(fin, hparams.n_audio_head); - read_safe(fin, hparams.n_audio_layer); - read_safe(fin, hparams.n_text_ctx); - read_safe(fin, hparams.n_text_state); - read_safe(fin, hparams.n_text_head); - read_safe(fin, hparams.n_text_layer); - read_safe(fin, hparams.n_mels); - read_safe(fin, hparams.f16); + read_safe(loader, hparams.n_vocab); + read_safe(loader, hparams.n_audio_ctx); + read_safe(loader, hparams.n_audio_state); + read_safe(loader, hparams.n_audio_head); + read_safe(loader, hparams.n_audio_layer); + read_safe(loader, hparams.n_text_ctx); + read_safe(loader, hparams.n_text_state); + read_safe(loader, hparams.n_text_head); + read_safe(loader, hparams.n_text_layer); + read_safe(loader, hparams.n_mels); + read_safe(loader, hparams.f16); assert(hparams.n_text_state == hparams.n_audio_state); @@ -536,17 +530,17 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx { auto & filters = wctx.model.filters; - read_safe(fin, filters.n_mel); - read_safe(fin, filters.n_fft); + read_safe(loader, filters.n_mel); + read_safe(loader, filters.n_fft); filters.data.resize(filters.n_mel * filters.n_fft); - fin.read((char *) filters.data.data(), filters.data.size() * sizeof(float)); + loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float)); } // load vocab { int32_t n_vocab = 0; - read_safe(fin, n_vocab); + read_safe(loader, n_vocab); //if (n_vocab != model.hparams.n_vocab) { // fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", @@ -561,11 +555,11 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx for (int i = 0; i < n_vocab; i++) { uint32_t len; - read_safe(fin, len); + read_safe(loader, len); if (len > 0) { tmp.resize(len); - fin.read(&tmp[0], tmp.size()); // read to buffer + loader->read(loader->context, &tmp[0], tmp.size()); // read to buffer word.assign(&tmp[0], tmp.size()); } else { // seems like we have an empty-string token in multi-language models (i = 50256) @@ -1017,24 +1011,24 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx int32_t length; int32_t ftype; - read_safe(fin, n_dims); - read_safe(fin, length); - read_safe(fin, ftype); + read_safe(loader, n_dims); + read_safe(loader, length); + read_safe(loader, ftype); - if (fin.eof()) { + if (loader->eof(loader->context)) { break; } int32_t nelements = 1; int32_t ne[3] = { 1, 1, 1 }; for (int i = 0; i < n_dims; ++i) { - read_safe(fin, ne[i]); + read_safe(loader, ne[i]); nelements *= ne[i]; } std::string name; std::vector tmp(length); // create a buffer - fin.read(&tmp[0], tmp.size()); // read to buffer + loader->read(loader->context, &tmp[0], tmp.size()); // read to buffer name.assign(&tmp[0], tmp.size()); if (model.tensors.find(name) == model.tensors.end()) { @@ -1062,7 +1056,7 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx return false; } - fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + loader->read(loader->context, tensor->data, ggml_nbytes(tensor)); //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0); total_size += ggml_nbytes(tensor); @@ -1079,8 +1073,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx } } - fin.close(); - return true; } @@ -2240,7 +2232,74 @@ static std::vector tokenize(const whisper_vocab & vocab, cons // interface implementation // -struct whisper_context * whisper_init(const char * path_model) { +struct whisper_context * whisper_init_from_file(const char * path_model) { + whisper_model_loader loader = {}; + + fprintf(stderr, "%s: loading model from '%s'\n", __func__, path_model); + + auto fin = std::ifstream(path_model, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, path_model); + return nullptr; + } + + loader.context = &fin; + loader.read = [](void * ctx, void * output, size_t read_size) { + std::ifstream * fin = (std::ifstream*)ctx; + fin->read((char *)output, read_size); + return read_size; + }; + + loader.eof = [](void * ctx) { + std::ifstream * fin = (std::ifstream*)ctx; + return fin->eof(); + }; + + loader.close = [](void * ctx) { + std::ifstream * fin = (std::ifstream*)ctx; + fin->close(); + }; + + return whisper_init(&loader); +} + +struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) { + struct buf_context { + uint8_t* buffer; + size_t size; + size_t current_offset; + }; + + buf_context ctx = { reinterpret_cast(buffer), buffer_size, 0 }; + whisper_model_loader loader = {}; + + fprintf(stderr, "%s: loading model from buffer\n", __func__); + + loader.context = &ctx; + + loader.read = [](void * ctx, void * output, size_t read_size) { + buf_context * buf = reinterpret_cast(ctx); + + size_t size_to_copy = buf->current_offset + read_size < buf->size ? read_size : buf->size - buf->current_offset; + + memcpy(output, buf->buffer + buf->current_offset, size_to_copy); + buf->current_offset += size_to_copy; + + return size_to_copy; + }; + + loader.eof = [](void * ctx) { + buf_context * buf = reinterpret_cast(ctx); + + return buf->current_offset >= buf->size; + }; + + loader.close = [](void * /*ctx*/) { }; + + return whisper_init(&loader); +} + +struct whisper_context * whisper_init(struct whisper_model_loader * loader) { ggml_time_init(); whisper_context * ctx = new whisper_context; @@ -2249,14 +2308,17 @@ struct whisper_context * whisper_init(const char * path_model) { ctx->t_start_us = t_start_us; - if (!whisper_model_load(path_model, *ctx)) { - fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, path_model); + if (!whisper_model_load(loader, *ctx)) { + loader->close(loader->context); + fprintf(stderr, "%s: failed to load model\n", __func__); delete ctx; return nullptr; } ctx->t_load_us = ggml_time_us() - t_start_us; + loader->close(loader->context); + return ctx; } diff --git a/whisper.h b/whisper.h index 8cb16ca..582138f 100644 --- a/whisper.h +++ b/whisper.h @@ -1,6 +1,7 @@ #ifndef WHISPER_H #define WHISPER_H +#include #include #include @@ -40,7 +41,7 @@ extern "C" { // // ... // - // struct whisper_context * ctx = whisper_init("/path/to/ggml-base.en.bin"); + // struct whisper_context * ctx = whisper_init_from_file("/path/to/ggml-base.en.bin"); // // if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) { // fprintf(stderr, "failed to process audio\n"); @@ -84,9 +85,20 @@ extern "C" { float vlen; // voice length of the token } whisper_token_data; - // Allocates all memory needed for the model and loads the model from the given file. - // Returns NULL on failure. - WHISPER_API struct whisper_context * whisper_init(const char * path_model); + typedef struct whisper_model_loader { + void * context; + + size_t (*read)(void * ctx, void * output, size_t read_size); + bool (*eof)(void * ctx); + void (*close)(void * ctx); + } whisper_model_loader; + + // Various function to load a ggml whisper model. + // Allocates (almost) all memory needed for the model. + // Return NULL on failure + WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model); + WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size); + WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader); // Frees all memory allocated by the model. WHISPER_API void whisper_free(struct whisper_context * ctx);