Node.js package (#260)
* npm : preparing infra for node package * npm : package infra ready * npm : initial version ready * npm : change name to whisper.cpp whisper.js is takenpull/271/head
parent
aa6adda26e
commit
f309f97df6
@ -1,3 +1,19 @@
|
|||||||
if (EMSCRIPTEN)
|
if (EMSCRIPTEN)
|
||||||
add_subdirectory(javascript)
|
add_subdirectory(javascript)
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
|
||||||
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
|
||||||
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
|
||||||
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
|
||||||
|
COMMAND npm publish
|
||||||
|
COMMAND touch publish.log
|
||||||
|
COMMENT "Publishing npm module v${PROJECT_VERSION}"
|
||||||
|
VERBATIM
|
||||||
|
)
|
||||||
|
|
||||||
|
add_custom_target(publish-npm
|
||||||
|
DEPENDS javascript/publish.log
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
# whisper.cpp
|
||||||
|
|
||||||
|
Node.js package for Whisper speech recognition
|
||||||
|
|
||||||
|
For sample usage check [tests/test-whisper.js](/tests/test-whisper.js)
|
@ -0,0 +1 @@
|
|||||||
|
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance||{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};
|
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"name": "whisper.cpp",
|
||||||
|
"version": "@PROJECT_VERSION@",
|
||||||
|
"description": "Whisper speech recognition",
|
||||||
|
"main": "whisper.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"todo: add tests\" && exit 0"
|
||||||
|
},
|
||||||
|
"repository": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "git+https://github.com/ggerganov/whisper.cpp"
|
||||||
|
},
|
||||||
|
"keywords": [
|
||||||
|
"openai",
|
||||||
|
"whisper",
|
||||||
|
"speech-to-text",
|
||||||
|
"speech-recognition",
|
||||||
|
"transformer"
|
||||||
|
],
|
||||||
|
"author": "Georgi Gerganov",
|
||||||
|
"license": "MIT",
|
||||||
|
"bugs": {
|
||||||
|
"url": "https://github.com/ggerganov/whisper.cpp/issues"
|
||||||
|
},
|
||||||
|
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"name": "whisper.cpp",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Whisper speech recognition",
|
||||||
|
"main": "whisper.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"todo: add tests\" && exit 0"
|
||||||
|
},
|
||||||
|
"repository": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "git+https://github.com/ggerganov/whisper.cpp"
|
||||||
|
},
|
||||||
|
"keywords": [
|
||||||
|
"openai",
|
||||||
|
"whisper",
|
||||||
|
"speech-to-text",
|
||||||
|
"speech-recognition",
|
||||||
|
"transformer"
|
||||||
|
],
|
||||||
|
"author": "Georgi Gerganov",
|
||||||
|
"license": "MIT",
|
||||||
|
"bugs": {
|
||||||
|
"url": "https://github.com/ggerganov/whisper.cpp/issues"
|
||||||
|
},
|
||||||
|
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,108 @@
|
|||||||
|
#include "whisper.h"
|
||||||
|
|
||||||
|
#include <emscripten.h>
|
||||||
|
#include <emscripten/bind.h>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
std::thread g_worker;
|
||||||
|
|
||||||
|
std::vector<struct whisper_context *> g_contexts(4, nullptr);
|
||||||
|
|
||||||
|
EMSCRIPTEN_BINDINGS(whisper) {
|
||||||
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
|
if (g_worker.joinable()) {
|
||||||
|
g_worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||||
|
if (g_contexts[i] == nullptr) {
|
||||||
|
g_contexts[i] = whisper_init(path_model.c_str());
|
||||||
|
if (g_contexts[i] != nullptr) {
|
||||||
|
return i + 1;
|
||||||
|
} else {
|
||||||
|
return (size_t) 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (size_t) 0;
|
||||||
|
}));
|
||||||
|
|
||||||
|
emscripten::function("free", emscripten::optional_override([](size_t index) {
|
||||||
|
if (g_worker.joinable()) {
|
||||||
|
g_worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
--index;
|
||||||
|
|
||||||
|
if (index < g_contexts.size()) {
|
||||||
|
whisper_free(g_contexts[index]);
|
||||||
|
g_contexts[index] = nullptr;
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
|
||||||
|
if (g_worker.joinable()) {
|
||||||
|
g_worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
--index;
|
||||||
|
|
||||||
|
if (index >= g_contexts.size()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_contexts[index] == nullptr) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
|
||||||
|
|
||||||
|
params.print_realtime = true;
|
||||||
|
params.print_progress = false;
|
||||||
|
params.print_timestamps = true;
|
||||||
|
params.print_special = false;
|
||||||
|
params.translate = translate;
|
||||||
|
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
|
||||||
|
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
|
||||||
|
params.offset_ms = 0;
|
||||||
|
|
||||||
|
std::vector<float> pcmf32;
|
||||||
|
const int n = audio["length"].as<int>();
|
||||||
|
|
||||||
|
emscripten::val heap = emscripten::val::module_property("HEAPU8");
|
||||||
|
emscripten::val memory = heap["buffer"];
|
||||||
|
|
||||||
|
pcmf32.resize(n);
|
||||||
|
|
||||||
|
emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
|
||||||
|
memoryView.call<void>("set", audio);
|
||||||
|
|
||||||
|
// print system information
|
||||||
|
{
|
||||||
|
printf("system_info: n_threads = %d / %d | %s\n",
|
||||||
|
params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
|
||||||
|
|
||||||
|
printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
|
||||||
|
__func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
||||||
|
params.n_threads, 1,
|
||||||
|
params.language,
|
||||||
|
params.translate ? "translate" : "transcribe");
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the worker
|
||||||
|
{
|
||||||
|
g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32)]() {
|
||||||
|
whisper_reset_timings(g_contexts[index]);
|
||||||
|
whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());
|
||||||
|
whisper_print_timings(g_contexts[index]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}));
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
var factory = require('../bindings/javascript/whisper.js')
|
||||||
|
|
||||||
|
factory().then(function(whisper) {
|
||||||
|
var fs = require('fs');
|
||||||
|
|
||||||
|
// to avoid reading WAV files and depending on some 3rd-party package, we read
|
||||||
|
// 32-bit float PCM directly. to genereate it:
|
||||||
|
//
|
||||||
|
// $ ffmpeg -i samples/jfk.wav -f f32le -acodec pcm_f32le samples/jfk.pcmf32
|
||||||
|
//
|
||||||
|
let fname_wav = "../samples/jfk.pcmf32";
|
||||||
|
let fname_model = "../models/ggml-base.en.bin";
|
||||||
|
|
||||||
|
// init whisper
|
||||||
|
{
|
||||||
|
// read binary data from file
|
||||||
|
var model_data = fs.readFileSync(fname_model);
|
||||||
|
if (model_data == null) {
|
||||||
|
console.log("whisper: failed to read model file");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// write binary data to WASM memory
|
||||||
|
whisper.FS_createDataFile("/", "whisper.bin", model_data, true, true);
|
||||||
|
|
||||||
|
// init the model
|
||||||
|
var ret = whisper.init("whisper.bin");
|
||||||
|
if (ret == false) {
|
||||||
|
console.log('whisper: failed to init');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// transcribe wav file
|
||||||
|
{
|
||||||
|
// read raw binary data
|
||||||
|
var pcm_data = fs.readFileSync(fname_wav);
|
||||||
|
if (pcm_data == null) {
|
||||||
|
console.log("whisper: failed to read wav file");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert to 32-bit float array
|
||||||
|
var pcm = new Float32Array(pcm_data.buffer);
|
||||||
|
|
||||||
|
// transcribe
|
||||||
|
var ret = whisper.full_default(pcm, "en", false);
|
||||||
|
if (ret != 0) {
|
||||||
|
console.log("whisper: failed to transcribe");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// free memory
|
||||||
|
{
|
||||||
|
whisper.free();
|
||||||
|
}
|
||||||
|
});
|
Loading…
Reference in new issue