talk : polishing the UI

3 years ago · b796c29f40
parent 6f110d5425
commit b796c29f40
1 changed files with 132 additions and 56 deletions
--- a/examples/talk/index-tmpl.html
+++ b/examples/talk/index-tmpl.html
@ -1,7 +1,7 @@
 <!doctype html>
 <html lang="en-us">
    <head>
-        <title>talk : GPT-2 meets Whisper in WebAssembly</title>
+        <title>Talk - GPT-2 meets Whisper in WebAssembly</title>

        <style>
            #output {
@ -27,29 +27,50 @@
    </head>
    <body>
        <div id="main-container">
-            <b>talk : GPT-2 meets Whisper in WebAssembly</b>
+            <b>Talk - GPT-2 meets Whisper in WebAssembly</b>

            <br><br>

-            WIP IN PROGRESS
+            On this page you can talk with an AI entity. It uses:

-            <br><br><hr>
+            <ul>
+                <li><a href="https://github.com/ggerganov/whisper.cpp">OpenAI's Whisper</a> model to listen to you as you speak in the microphone</li>
+                <li><a href="https://github.com/ggerganov/ggml/tree/master/examples/gpt-2">OpenAI's GPT-2</a> model to generate a text response</li>
+                <li><a href="https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API">Web Speech API</a> to speak the response to you through the speakers</li>
+            </ul>
+
+            All of this runs <b>locally in your browser</b> using WebAssembly.<br>
+            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/talk.wasm">GitHub</a>.
+
+            <br><br>
+
+            The page does some heavy computations, so make sure:
+
+            <ul>
+                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
+                <li>To use a fast desktop or laptop computer (e.g. not a mobile phone)</li>
+                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
+            </ul>
+
+            <hr>
+
+            <br>

            <div id="model-whisper">
-                Whisper Model:
+                <span id="model-whisper-status">Whisper model:</span>
                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
                <span id="fetch-whisper-progress"></span>

                <!--
-                <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
+                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
                -->
            </div>

            <br>

            <div id="model-gpt-2">
-                GPT-2 Model:
+                <span id="model-gpt-2-status">GPT-2 model:</span>
                <button id="fetch-gpt-2-small" onclick="loadGPT2('small')">small 117M (240 MB)</button>
                <!--<button id="fetch-gpt-2-medium" onclick="loadGPT2('medium')">medium 345M (720 MB)</button>-->
                <span id="fetch-gpt-2-progress"></span>
@ -64,14 +85,14 @@
            <div id="input_mic">
                <button id="start" onclick="onStart()">Start</button>
                <button id="stop" onclick="onStop()" disabled>Stop</button>
-                <button id="speak" onclick="speakTest()">Speak</button>
+                <select id="voice" onchange="onVoiceChange()">
+                    <option value="0">Default</option>
+                </select>
+                <button id="speak" onclick="onSpeak('Hello')">Say Hello</button>
                <button id="speak" onclick="clearCache()">Clear Cache</button>
            </div>

-            <audio controls="controls" id="audio" loop hidden>
-                Your browser does not support the &lt;audio&gt; tag.
-                <source id="source" src="" type="audio/wav" />
-            </audio>
+            <br>

            <hr><br>

@ -94,21 +115,6 @@
        </div>

        <script type='text/javascript'>
-            // TODO: convert audio buffer to WAV
-            function setAudio(audio) {
-                //if (audio) {
-                //    // convert to 16-bit PCM
-                //    var blob = new Blob([audio], { type: 'audio/wav' });
-                //    var url = URL.createObjectURL(blob);
-                //    document.getElementById('source').src = url;
-                //    document.getElementById('audio').hidden = false;
-                //    document.getElementById('audio').loop = false;
-                //    document.getElementById('audio').load();
-                //} else {
-                //    document.getElementById('audio').hidden = true;
-                //}
-            }
-
            function changeInput(input) {
                if (input == 'file') {
                    document.getElementById('input_file').style.display = 'block';
@ -132,16 +138,6 @@
                    };
                })();

-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                }
-            };
-
            const kMaxAudio_s = 10;
            const kRestartRecording_s = 15;
            const kSampleRate = 16000;
@ -159,8 +155,50 @@
            // the talk instance
            var instance = null;

+            // model names
+            var model_whisper = null;
+            var model_gpt_2 = null;
+
            // speech synthesis
            const synth = window.speechSynthesis;
+            var voice = null;
+
+            var Module = {
+                print: printTextarea,
+                printErr: printTextarea,
+                setStatus: function(text) {
+                    printTextarea('js: ' + text);
+                },
+                monitorRunDependencies: function(left) {
+                },
+                preRun: function() {
+                    printTextarea('js: preparing ...');
+                },
+                postRun: function() {
+                    printTextarea('js: loaded successfully!');
+
+                    // populate the voice list
+                    var voices = synth.getVoices();
+                    var el = document.getElementById('voice');
+
+                    var n = 0;
+                    voices.forEach(function(voice, i) {
+                        if (!voice.lang.startsWith('en')) return;
+                        var option = document.createElement('option');
+                        option.value = i;
+                        option.innerHTML = voice.name + ' (' + voice.lang + ')';
+                        el.appendChild(option);
+                        n++;
+                    });
+
+                    // select random voice
+                    if (n > 0) {
+                        var i = Math.floor(Math.random() * n);
+                        el.selectedIndex = i;
+                        voice = voices[document.getElementById('voice').options[i].value];
+                    }
+                }
+            };

            // helper function
            function convertTypedArray(src, type) {
@ -185,6 +223,12 @@
                Module.FS_createDataFile("/", fname, buf, true, true);

                printTextarea('js: stored model: ' + fname + ' size: ' + buf.length);
+
+                if (fname == 'whisper.bin') {
+                    document.getElementById('model-whisper').innerHTML = 'Whisper model: loaded "' + model_whisper + '"!';
+                } else if (fname == 'gpt-2.bin') {
+                    document.getElementById('model-gpt-2').innerHTML = 'GPT-2 model: loaded "' + model_gpt_2 + '"!';
+                }
            }

            let dbVersion = 1
@ -230,9 +274,10 @@

                    if (contentLength) {
                        // update progress bar element with the new percentage
+                        elProgress.innerHTML = Math.round((receivedLength / total) * 100) + '%';
+
                        var progressCur = Math.round((receivedLength / total) * 10);
                        if (progressCur != progressLast) {
-                            elProgress.innerHTML = 10*progressCur + '%';
                            printTextarea('js: fetching ' + 10*progressCur + '% ...');
                            progressLast = progressCur;
                        }
@ -253,7 +298,7 @@
            // - check if the data is already in the IndexedDB
            // - if not, fetch it from the remote URL and store it in the IndexedDB
            // - store it in WASM memory
-            function loadRemote(url, dst, elProgress) {
+            function loadRemote(url, dst, elProgress, size_mb) {
                // query the storage quota and print it
                navigator.storage.estimate().then(function (estimate) {
                    printTextarea('js: storage quota: ' + estimate.quota + ' bytes');
@ -290,6 +335,14 @@
                            // data is not in the IndexedDB
                            printTextarea('js: "' + url + '" is not in the IndexedDB');

+                            // alert and ask the user to confirm
+                            if (!confirm('You are about to download ' + size_mb + ' MB of data.\nThe model data will be cached in the browser for future use.\n\nPress OK to continue.')) {
+                                document.getElementById('fetch-whisper-tiny-en').style.display = 'inline-block';
+                                document.getElementById('fetch-whisper-base-en').style.display = 'inline-block';
+                                document.getElementById('fetch-gpt-2-small').style.display = 'inline-block';
+                                return;
+                            }
+
                            fetchRemote(url, elProgress).then(function (data) {
                                if (data) {
                                    // store the data in the IndexedDB
@ -338,11 +391,23 @@
                    'base.en': 'https://talk.ggerganov.com/ggml-model-whisper-base.en.bin',
                };

-                let url = urls[model];
-                let dst = 'whisper.bin';
-                let el  = document.getElementById('fetch-whisper-progress');
+                let sizes = {
+                    'tiny.en': 75,
+                    'base.en': 142,
+                };
+
+                let url     = urls[model];
+                let dst     = 'whisper.bin';
+                let el      = document.getElementById('fetch-whisper-progress');
+                let size_mb = sizes[model];
+
+                model_whisper = model;
+
+                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
+                document.getElementById('fetch-whisper-base-en').style.display = 'none';
+                document.getElementById('model-whisper-status').innerHTML = 'Whisper model: loading "' + model + '" ... ';

-                loadRemote(url, dst, el);
+                loadRemote(url, dst, el, size_mb);
            }

            function loadGPT2(model) {
@ -351,11 +416,22 @@
                    'medium': 'https://talk.ggerganov.com/ggml-model-gpt-2-345M.bin',
                };

-                let url = urls[model];
-                let dst = 'gpt-2.bin';
-                let el  = document.getElementById('fetch-gpt-2-progress');
+                let sizes = {
+                    'small':  240,
+                    'medium': 712,
+                };
+
+                let url     = urls[model];
+                let dst     = 'gpt-2.bin';
+                let el      = document.getElementById('fetch-gpt-2-progress');
+                let size_mb = sizes[model];
+
+                model_gpt_2 = model;

-                loadRemote(url, dst, el);
+                document.getElementById('fetch-gpt-2-small').style.display = 'none';
+                document.getElementById('model-gpt-2-status').innerHTML = 'GPT-2 model: loading "' + model + '" ... ';
+
+                loadRemote(url, dst, el, size_mb);
            }

            //
@ -420,13 +496,9 @@
                                        if (instance) {
                                            Module.set_audio(instance, audioAll);
                                        }
-
-                                        setAudio(audio);
                                    });
                                }, function(e) {
-                                    printTextarea('js: error decoding audio: ' + e);
                                    audio = null;
-                                    setAudio(audio);
                                });
                            }

@ -482,14 +554,11 @@
            // speak
            //

-            var voice = null;
-
            function onSpeak(text) {
                var voices = synth.getVoices();
                var msg = new SpeechSynthesisUtterance(text);

                if (voice == null) {
-                    //voice = voices[Math.floor(Math.random() * 2)];
                    voice = voices[0];
                }

@ -511,8 +580,10 @@

            async function clearCache() {
                if (confirm('Are you sure you want to clear the cache?\nAll the models will be downloaded again.')) {
-                    const dbs = await window.indexedDB.databases();
-                    dbs.forEach(db => { window.indexedDB.deleteDatabase(db.name) });
+                    //const dbs = await indexedDB.databases();
+                    //dbs.forEach(db => { indexedDB.deleteDatabase(db.name) });
+
+                    indexedDB.deleteDatabase(dbName);
                }
            }

@ -558,6 +629,11 @@
                }
            }

+            function onVoiceChange() {
+                printTextarea('js: voice changed to: ' + document.getElementById('voice').value);
+                voice = synth.getVoices()[document.getElementById('voice').value];
+            }
+
        </script>
        <script type="text/javascript" src="talk.js"></script>
    </body>