<!doctype html>
< html lang = "en-us" >
< head >
< title > whisper.cpp : WASM example< / title >
< style >
#output {
width: 100%;
height: 100%;
margin: 0 auto;
margin-top: 10px;
border-left: 0px;
border-right: 0px;
padding-left: 0px;
padding-right: 0px;
display: block;
background-color: black;
color: white;
font-size: 10px;
font-family: 'Lucida Console', Monaco, monospace;
outline: none;
white-space: pre;
overflow-wrap: normal;
overflow-x: scroll;
}
< / style >
< / head >
< body >
< div id = "main-container" >
< b > Minimal < a href = "https://github.com/ggerganov/whisper.cpp" > whisper.cpp< / a > example running fully in the browser< / b >
< br > < br >
Usage instructions:< br >
< ul >
< li > Load a ggml model file (you can obtain one from < a href = "https://ggml.ggerganov.com/" > here< / a > , recommended: < b > tiny< / b > or < b > base< / b > )< / li >
< li > Select audio file to transcribe or record audio from the microphone (sample: < a href = "https://whisper.ggerganov.com/jfk.wav" > jfk.wav< / a > )< / li >
< li > Click on the "Transcribe" button to start the transcription< / li >
< / ul >
Note that the computation is quite heavy and may take a few seconds to complete.< br >
The transcription results will be displayed in the text area below.< br > < br >
< b > Important: your browser must support WASM SIMD instructions for this to work.< / b >
< br > < br > < hr >
< div id = "model" >
Model:
< button id = "fetch-whisper-tiny-en" onclick = "loadWhisper('tiny.en')" > tiny.en (75 MB)< / button >
< button id = "fetch-whisper-tiny" onclick = "loadWhisper('tiny')" > tiny (75 MB)< / button >
< button id = "fetch-whisper-base-en" onclick = "loadWhisper('base.en')" > base.en (142 MB)< / button >
< button id = "fetch-whisper-base" onclick = "loadWhisper('base')" > base (142 MB)< / button >
< span id = "fetch-whisper-progress" > < / span >
< input type = "file" id = "file" name = "file" onchange = "loadFile(event, 'whisper.bin')" / >
< / div >
< br >
<!-- radio button to select between file upload or microphone -->
< div id = "input" >
Input:
< input type = "radio" id = "file" name = "input" value = "file" checked = "checked" onchange = "changeInput('file')" / > File
< input type = "radio" id = "mic" name = "input" value = "mic" onchange = "changeInput('mic')" / > Microphone
< / div >
< br >
< div id = "input_file" >
Audio file:
< input type = "file" id = "file" name = "file" onchange = "loadAudio(event)" / >
< / div >
< div id = "input_mic" style = "display: none;" >
Microphone:
< button id = "start" onclick = "startRecording()" > Start< / button >
< button id = "stop" onclick = "stopRecording()" disabled > Stop< / button >
<!-- progress bar to show recording progress -->
< br > < br >
< div id = "progress" style = "display: none;" >
< div id = "progress-bar" style = "width: 0%; height: 10px; background-color: #4CAF50;" > < / div >
< div id = "progress-text" > 0%< / div >
< / div >
< / div >
< audio controls = "controls" id = "audio" loop hidden >
Your browser does not support the < audio> tag.
< source id = "source" src = "" type = "audio/wav" / >
< / audio >
< hr > < br >
< table >
< tr >
< td >
Language:
< select id = "language" name = "language" >
< option value = "en" > English< / option >
< option value = "ar" > Arabic< / option >
< option value = "hy" > Armenian< / option >
< option value = "az" > Azerbaijani< / option >
< option value = "eu" > Basque< / option >
< option value = "be" > Belarusian< / option >
< option value = "bn" > Bengali< / option >
< option value = "bg" > Bulgarian< / option >
< option value = "ca" > Catalan< / option >
< option value = "zh" > Chinese< / option >
< option value = "hr" > Croatian< / option >
< option value = "cs" > Czech< / option >
< option value = "da" > Danish< / option >
< option value = "nl" > Dutch< / option >
< option value = "en" > English< / option >
< option value = "et" > Estonian< / option >
< option value = "tl" > Filipino< / option >
< option value = "fi" > Finnish< / option >
< option value = "fr" > French< / option >
< option value = "gl" > Galician< / option >
< option value = "ka" > Georgian< / option >
< option value = "de" > German< / option >
< option value = "el" > Greek< / option >
< option value = "gu" > Gujarati< / option >
< option value = "iw" > Hebrew< / option >
< option value = "hi" > Hindi< / option >
< option value = "hu" > Hungarian< / option >
< option value = "is" > Icelandic< / option >
< option value = "id" > Indonesian< / option >
< option value = "ga" > Irish< / option >
< option value = "it" > Italian< / option >
< option value = "ja" > Japanese< / option >
< option value = "kn" > Kannada< / option >
< option value = "ko" > Korean< / option >
< option value = "la" > Latin< / option >
< option value = "lv" > Latvian< / option >
< option value = "lt" > Lithuanian< / option >
< option value = "mk" > Macedonian< / option >
< option value = "ms" > Malay< / option >
< option value = "mt" > Maltese< / option >
< option value = "no" > Norwegian< / option >
< option value = "fa" > Persian< / option >
< option value = "pl" > Polish< / option >
< option value = "pt" > Portuguese< / option >
< option value = "ro" > Romanian< / option >
< option value = "ru" > Russian< / option >
< option value = "sr" > Serbian< / option >
< option value = "sk" > Slovak< / option >
< option value = "sl" > Slovenian< / option >
< option value = "es" > Spanish< / option >
< option value = "sw" > Swahili< / option >
< option value = "sv" > Swedish< / option >
< option value = "ta" > Tamil< / option >
< option value = "te" > Telugu< / option >
< option value = "th" > Thai< / option >
< option value = "tr" > Turkish< / option >
< option value = "uk" > Ukrainian< / option >
< option value = "ur" > Urdu< / option >
< option value = "vi" > Vietnamese< / option >
< option value = "cy" > Welsh< / option >
< option value = "yi" > Yiddish< / option >
< / select >
< / td >
< td >
< button onclick = "onProcess(false);" > Transcribe< / button >
< / td >
< td >
< button onclick = "onProcess(true);" > Translate< / button >
< / td >
< / tr >
< / table >
< br >
<!-- textarea with height filling the rest of the page -->
< textarea id = "output" rows = "20" > < / textarea >
< br > < br >
< div class = "cell-version" >
< span >
|
Build time: < span class = "nav-link" > @GIT_DATE@< / span > |
Commit hash: < a class = "nav-link" href = "https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@" > @GIT_SHA1@< / a > |
Commit subject: < span class = "nav-link" > @GIT_COMMIT_SUBJECT@< / span > |
< a class = "nav-link" href = "https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm" > Source Code< / a > |
< / span >
< / div >
< / div >
< script type = 'text/javascript' >
// TODO: convert audio buffer to WAV
function setAudio(audio) {
//if (audio) {
// // convert to 16-bit PCM
// var blob = new Blob([audio], { type: 'audio/wav' });
// var url = URL.createObjectURL(blob);
// document.getElementById('source').src = url;
// document.getElementById('audio').hidden = false;
// document.getElementById('audio').loop = false;
// document.getElementById('audio').load();
//} else {
// document.getElementById('audio').hidden = true;
//}
}
function changeInput(input) {
if (input == 'file') {
document.getElementById('input_file').style.display = 'block';
document.getElementById('input_mic').style.display = 'none';
document.getElementById('progress').style.display = 'none';
} else {
document.getElementById('input_file').style.display = 'none';
document.getElementById('input_mic').style.display = 'block';
document.getElementById('progress').style.display = 'block';
}
}
var printTextarea = (function() {
var element = document.getElementById('output');
if (element) element.alue = ''; // clear browser cache
return function(text) {
if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
console.log(text);
if (element) {
element.value += text + "\n";
element.scrollTop = element.scrollHeight; // focus on bottom
}
};
})();
var Module = {
print: printTextarea,
printErr: printTextarea,
setStatus: function(text) {
printTextarea('js: ' + text);
},
monitorRunDependencies: function(left) {
}
};
const kMaxAudio_s = 120;
const kSampleRate = 16000;
window.AudioContext = window.AudioContext || window.webkitAudioContext;
window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
// web audio context
var context = null;
// audio data
var audio = null;
// the whisper instance
var instance = null;
var model_fname = '';
// helper function
function convertTypedArray(src, type) {
var buffer = new ArrayBuffer(src.byteLength);
var baseView = new src.constructor(buffer).set(src);
return new type(buffer);
}
//
// load model
//
let dbVersion = 1
let dbName = 'whisper.ggerganov.com';
let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
function storeFS(fname, buf) {
// write to WASM file using FS_createDataFile
// if the file exists, delete it
try {
Module.FS_unlink(fname);
} catch (e) {
// ignore
}
Module.FS_createDataFile("/", fname, buf, true, true);
model_fname = fname;
printTextarea('js: stored model: ' + fname + ' size: ' + buf.length);
}
function loadFile(event, fname) {
var file = event.target.files[0] || null;
if (file == null) {
return;
}
printTextarea("js: loading model: " + file.name + ", size: " + file.size + " bytes");
printTextarea('js: please wait ...');
var reader = new FileReader();
reader.onload = function(event) {
var buf = new Uint8Array(reader.result);
storeFS(fname, buf);
}
reader.readAsArrayBuffer(file);
document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
document.getElementById('fetch-whisper-base-en').style.display = 'none';
document.getElementById('fetch-whisper-tiny').style.display = 'none';
document.getElementById('fetch-whisper-base').style.display = 'none';
}
// fetch a remote file from remote URL using the Fetch API
async function fetchRemote(url, elProgress) {
printTextarea('js: downloading with fetch()...');
const response = await fetch(
url,
{
method: 'GET',
headers: {
'Content-Type': 'application/octet-stream',
},
}
);
if (!response.ok) {
printTextarea('js: failed to fetch ' + url);
return;
}
const contentLength = response.headers.get('content-length');
const total = parseInt(contentLength, 10);
const reader = response.body.getReader();
var chunks = [];
var receivedLength = 0;
var progressLast = -1;
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
chunks.push(value);
receivedLength += value.length;
if (contentLength) {
// update progress bar element with the new percentage
elProgress.innerHTML = Math.round((receivedLength / total) * 100) + '%';
var progressCur = Math.round((receivedLength / total) * 10);
if (progressCur != progressLast) {
printTextarea('js: fetching ' + 10*progressCur + '% ...');
progressLast = progressCur;
}
}
}
var chunksAll = new Uint8Array(receivedLength);
var position = 0;
for (var chunk of chunks) {
chunksAll.set(chunk, position);
position += chunk.length;
}
return chunksAll;
}
// load remote data
// - check if the data is already in the IndexedDB
// - if not, fetch it from the remote URL and store it in the IndexedDB
// - store it in WASM memory
function loadRemote(url, dst, elProgress, size_mb) {
// query the storage quota and print it
navigator.storage.estimate().then(function (estimate) {
printTextarea('js: storage quota: ' + estimate.quota + ' bytes');
printTextarea('js: storage usage: ' + estimate.usage + ' bytes');
});
// check if the data is already in the IndexedDB
var request = indexedDB.open(dbName, dbVersion);
request.onupgradeneeded = function (event) {
var db = event.target.result;
if (db.version == 1) {
var objectStore = db.createObjectStore('models', { autoIncrement: false });
printTextarea('js: created IndexedDB ' + db.name + ' version ' + db.version);
} else {
// clear the database
var objectStore = event.currentTarget.transaction.objectStore('models');
objectStore.clear();
printTextarea('js: cleared IndexedDB ' + db.name + ' version ' + db.version);
}
};
request.onsuccess = function (event) {
var db = event.target.result;
var transaction = db.transaction(['models'], 'readonly');
var objectStore = transaction.objectStore('models');
var request = objectStore.get(url);
request.onsuccess = function (event) {
if (request.result) {
printTextarea('js: "' + url + '" is already in the IndexedDB');
storeFS(dst, request.result);
} else {
// data is not in the IndexedDB
printTextarea('js: "' + url + '" is not in the IndexedDB');
// alert and ask the user to confirm
if (!confirm('You are about to download ' + size_mb + ' MB of data.\nThe model data will be cached in the browser for future use.\n\nPress OK to continue.')) {
var el;
el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-tiny'); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block';
return;
}
fetchRemote(url, elProgress).then(function (data) {
if (data) {
// store the data in the IndexedDB
var request = indexedDB.open(dbName, dbVersion);
request.onsuccess = function (event) {
var db = event.target.result;
var transaction = db.transaction(['models'], 'readwrite');
var objectStore = transaction.objectStore('models');
var request = objectStore.put(data, url);
request.onsuccess = function (event) {
printTextarea('js: "' + url + '" stored in the IndexedDB');
storeFS(dst, data);
};
request.onerror = function (event) {
printTextarea('js: failed to store "' + url + '" in the IndexedDB');
};
};
}
});
}
};
request.onerror = function (event) {
printTextarea('js: failed to get data from the IndexedDB');
};
};
request.onerror = function (event) {
printTextarea('js: failed to open IndexedDB');
};
request.onblocked = function (event) {
printTextarea('js: failed to open IndexedDB: blocked');
};
request.onabort = function (event) {
printTextarea('js: failed to open IndexedDB: abort');
};
}
function loadWhisper(model) {
let urls = {
'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
'tiny': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.bin',
'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
'base': 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
};
let sizes = {
'tiny.en': 75,
'tiny': 75,
'base.en': 142,
'base': 142,
};
let url = urls[model];
let dst = 'whisper.bin';
let el = document.getElementById('fetch-whisper-progress');
let size_mb = sizes[model];
model_whisper = model;
document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
document.getElementById('fetch-whisper-base-en').style.display = 'none';
document.getElementById('fetch-whisper-tiny').style.display = 'none';
document.getElementById('fetch-whisper-base').style.display = 'none';
loadRemote(url, dst, el, size_mb);
}
//
// audio file
//
function loadAudio(event) {
if (!context) {
context = new AudioContext({sampleRate: 16000});
}
var file = event.target.files[0] || null;
if (file == null) {
return;
}
printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
printTextarea('js: please wait ...');
var reader = new FileReader();
reader.onload = function(event) {
var buf = new Uint8Array(reader.result);
context.decodeAudioData(buf.buffer, function(audioBuffer) {
var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
var source = offlineContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(offlineContext.destination);
source.start(0);
offlineContext.startRendering().then(function(renderedBuffer) {
audio = renderedBuffer.getChannelData(0);
printTextarea('js: audio loaded, size: ' + audio.length);
// truncate to first 30 seconds
if (audio.length > kMaxAudio_s*kSampleRate) {
audio = audio.slice(0, kMaxAudio_s*kSampleRate);
printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
}
setAudio(audio);
});
}, function(e) {
printTextarea('js: error decoding audio: ' + e);
audio = null;
setAudio(audio);
});
}
reader.readAsArrayBuffer(file);
}
//
// microphone
//
var mediaRecorder = null;
var doRecording = false;
var startTime = 0;
function stopRecording() {
doRecording = false;
}
// record up to kMaxAudio_s seconds of audio from the microphone
// check if doRecording is false every 1000 ms and stop recording if so
// update progress information
function startRecording() {
if (!context) {
context = new AudioContext({sampleRate: 16000});
}
document.getElementById('start').disabled = true;
document.getElementById('stop').disabled = false;
document.getElementById('progress-bar').style.width = '0%';
document.getElementById('progress-text').innerHTML = '0%';
doRecording = true;
startTime = Date.now();
var chunks = [];
var stream = null;
navigator.mediaDevices.getUserMedia({audio: true, video: false})
.then(function(s) {
stream = s;
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = function(e) {
chunks.push(e.data);
};
mediaRecorder.onstop = function(e) {
var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
chunks = [];
document.getElementById('start').disabled = false;
document.getElementById('stop').disabled = true;
var reader = new FileReader();
reader.onload = function(event) {
var buf = new Uint8Array(reader.result);
context.decodeAudioData(buf.buffer, function(audioBuffer) {
var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
var source = offlineContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(offlineContext.destination);
source.start(0);
offlineContext.startRendering().then(function(renderedBuffer) {
audio = renderedBuffer.getChannelData(0);
printTextarea('js: audio recorded, size: ' + audio.length);
// truncate to first 30 seconds
if (audio.length > kMaxAudio_s*kSampleRate) {
audio = audio.slice(0, kMaxAudio_s*kSampleRate);
printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
}
setAudio(audio);
});
}, function(e) {
printTextarea('js: error decoding audio: ' + e);
audio = null;
setAudio(audio);
});
}
reader.readAsArrayBuffer(blob);
};
mediaRecorder.start();
})
.catch(function(err) {
printTextarea('js: error getting audio stream: ' + err);
});
var interval = setInterval(function() {
if (!doRecording) {
clearInterval(interval);
mediaRecorder.stop();
stream.getTracks().forEach(function(track) {
track.stop();
});
}
document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%';
document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%';
}, 1000);
printTextarea('js: recording ...');
setTimeout(function() {
if (doRecording) {
printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
stopRecording();
}
}, kMaxAudio_s*1000);
}
//
// transcribe
//
function onProcess(translate) {
if (!instance) {
instance = Module.init('whisper.bin');
if (instance) {
printTextarea("js: whisper initialized, instance: " + instance);
document.getElementById('model').innerHTML = 'Model loaded: ' + model_fname;
}
}
if (!instance) {
printTextarea("js: failed to initialize whisper");
return;
}
if (!audio) {
printTextarea("js: no audio data");
return;
}
if (instance) {
printTextarea('');
printTextarea('js: processing - this might take a while ...');
printTextarea('js: the page will be unresponsive until the processing is completed');
printTextarea('');
setTimeout(function() {
var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
console.log('js: full_default returned: ' + ret);
if (ret) {
printTextarea("js: whisper returned: " + ret);
}
}, 100);
}
}
< / script >
< script type = "text/javascript" src = "whisper.js" > < / script >
< / body >
< / html >