Merge branch 'ggerganov:master' into master

3 years ago · e4bd5d8ccc
parent c1808cd641 78d13257be
commit e4bd5d8ccc
33 changed files with 1609 additions and 196 deletions
--- a/11
+++ b/11
@ -60,8 +60,7 @@ ifeq ($(UNAME_M),x86_64)
 		ifneq (,$(findstring AVX2,$(AVX2_M)))
 			CFLAGS += -mavx2
 		endif
-	endif
+	else ifeq ($(UNAME_S),Linux)
 	ifeq ($(UNAME_S),Linux)
 		AVX1_M := $(shell grep "avx " /proc/cpuinfo)
 		ifneq (,$(findstring avx,$(AVX1_M)))
 			CFLAGS += -mavx
@ -207,3 +206,11 @@ tiny.en tiny base.en base small.en small medium.en medium large: main
 		./main -m models/ggml-$@.bin -f $$f ; \
 		echo "" ; \
 	done
 #
 # Tests
 #
 .PHONY: tests
 tests:
 	bash ./tests/run-tests.sh
--- a/README.md
+++ b/README.md
@ -36,7 +36,7 @@ As an example, here is a video of running the model on an iPhone 13 device - ful
 https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
-You can also easily make your own offline voice assistant application:
+You can also easily make your own offline voice assistant application: [command](examples/command)
 https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
@ -454,6 +454,25 @@ in [models](models).
 ## Examples
-There are various examples of using the library for different projects in the [examples](examples) folder. Check them out!
+There are various examples of using the library for different projects in the [examples](examples) folder.
-
+Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
-## [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126)
+
 | Example | Web | Description |
 | ---     | --- | ---         |
 | [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
 | [bench](examples/bench) | | Benchmark the performance of Whisper on your machine |
 | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
 | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
 | | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot in your browser |
 | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
 | [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
 | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
 | [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
 | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
 ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
 If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
 You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category
 to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
 [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion.
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -21,6 +21,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 if (EMSCRIPTEN)
    add_subdirectory(whisper.wasm)
    add_subdirectory(stream.wasm)
    add_subdirectory(command.wasm)
    add_subdirectory(talk.wasm)
 else()
    add_subdirectory(main)
--- a/examples/command.wasm/CMakeLists.txt
+++ b/examples/command.wasm/CMakeLists.txt
@ -0,0 +1,47 @@
 #
 # libcommand
 #
 set(TARGET libcommand)
 add_executable(${TARGET}
    emscripten.cpp
    )
 target_link_libraries(${TARGET} PRIVATE
    whisper
    )
 unset(EXTRA_FLAGS)
 if (WHISPER_WASM_SINGLE_FILE)
    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
    message(STATUS "Embedding WASM inside command.js")
    add_custom_command(
        TARGET ${TARGET} POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy
        ${CMAKE_BINARY_DIR}/bin/libcommand.js
        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/command.wasm/command.js
        )
 endif()
 set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
    --bind \
    -s USE_PTHREADS=1 \
    -s PTHREAD_POOL_SIZE=8 \
    -s INITIAL_MEMORY=1024MB \
    -s TOTAL_MEMORY=1024MB \
    -s FORCE_FILESYSTEM=1 \
    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
    ${EXTRA_FLAGS} \
    ")
 #
 # command.wasm
 #
 set(TARGET command.wasm)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
--- a/examples/command.wasm/README.md
+++ b/examples/command.wasm/README.md
@ -0,0 +1,23 @@
 # command.wasm
 This is a basic Voice Assistant example that accepts voice commands from the microphone.
 It runs in fully in the browser via WebAseembly.
 Online demo: https://whisper.ggerganov.com/command/
 Terminal version: [examples/command](/examples/command)
 ## Build instructions
 ```bash
 # build using Emscripten (v3.1.2)
 git clone https://github.com/ggerganov/whisper.cpp
 cd whisper.cpp
 mkdir build-em && cd build-em
 emcmake cmake ..
 make -j
 # copy the produced page to your HTTP path
 cp bin/command.wasm/*       /path/to/html/
 cp bin/libcommand.worker.js /path/to/html/
 ```
--- a/examples/command.wasm/emscripten.cpp
+++ b/examples/command.wasm/emscripten.cpp
@ -0,0 +1,408 @@
 #include "ggml.h"
 #include "whisper.h"
 #include <emscripten.h>
 #include <emscripten/bind.h>
 #include <atomic>
 #include <cmath>
 #include <mutex>
 #include <string>
 #include <thread>
 #include <vector>
 #include <regex>
 constexpr int N_THREAD = 8;
 std::vector<struct whisper_context *> g_contexts(4, nullptr);
 std::mutex  g_mutex;
 std::thread g_worker;
 std::atomic<bool> g_running(false);
 std::string g_status        = "";
 std::string g_status_forced = "";
 std::string g_transcribed   = "";
 std::vector<float> g_pcmf32;
 static std::string trim(const std::string & s) {
    std::regex e("^\\s+|\\s+$");
    return std::regex_replace(s, e, "");
 }
 static void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
    const float rc = 1.0f / (2.0f * M_PI * cutoff);
    const float dt = 1.0f / sample_rate;
    const float alpha = dt / (rc + dt);
    float y = data[0];
    for (size_t i = 1; i < data.size(); i++) {
        y = alpha * (y + data[i] - data[i - 1]);
        data[i] = y;
    }
 }
 // compute similarity between two strings using Levenshtein distance
 static float similarity(const std::string & s0, const std::string & s1) {
    const size_t len0 = s0.size() + 1;
    const size_t len1 = s1.size() + 1;
    std::vector<int> col(len1, 0);
    std::vector<int> prevCol(len1, 0);
    for (size_t i = 0; i < len1; i++) {
        prevCol[i] = i;
    }
    for (size_t i = 0; i < len0; i++) {
        col[0] = i;
        for (size_t j = 1; j < len1; j++) {
            col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
        }
        col.swap(prevCol);
    }
    const float dist = prevCol[len1 - 1];
    return 1.0f - (dist / std::max(s0.size(), s1.size()));
 }
 void command_set_status(const std::string & status) {
    std::lock_guard<std::mutex> lock(g_mutex);
    g_status = status;
 }
 bool command_vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
    const int n_samples      = pcmf32.size();
    const int n_samples_last = (sample_rate * last_ms) / 1000;
    if (n_samples_last >= n_samples) {
        // not enough samples - assume no speech
        return false;
    }
    if (freq_thold > 0.0f) {
        high_pass_filter(pcmf32, freq_thold, sample_rate);
    }
    float energy_all  = 0.0f;
    float energy_last = 0.0f;
    for (size_t i = 0; i < n_samples; i++) {
        energy_all += fabsf(pcmf32[i]);
        if (i >= n_samples - n_samples_last) {
            energy_last += fabsf(pcmf32[i]);
        }
    }
    energy_all  /= n_samples;
    energy_last /= n_samples_last;
    if (verbose) {
        fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
    }
    if (energy_last > vad_thold*energy_all) {
        return false;
    }
    return true;
 }
 std::string command_transcribe(whisper_context * ctx, const whisper_full_params & wparams, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
    const auto t_start = std::chrono::high_resolution_clock::now();
    prob = 0.0f;
    t_ms = 0;
    if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
        return "";
    }
    int prob_n = 0;
    std::string result;
    const int n_segments = whisper_full_n_segments(ctx);
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(ctx, i);
        result += text;
        const int n_tokens = whisper_full_n_tokens(ctx, i);
        for (int j = 0; j < n_tokens; ++j) {
            const auto token = whisper_full_get_token_data(ctx, i, j);
            prob += token.p;
            ++prob_n;
        }
    }
    if (prob_n > 0) {
        prob /= prob_n;
    }
    const auto t_end = std::chrono::high_resolution_clock::now();
    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
    return result;
 }
 void command_get_audio(int ms, int sample_rate, std::vector<float> & audio) {
    const int64_t n_samples = (ms * sample_rate) / 1000;
    int64_t n_take = 0;
    if (g_pcmf32.size() < n_samples) {
        n_take = g_pcmf32.size();
    } else {
        n_take = n_samples;
    }
    audio.resize(n_take);
    std::copy(g_pcmf32.end() - n_take, g_pcmf32.end(), audio.begin());
 }
 void command_main(size_t index) {
    command_set_status("loading data ...");
    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
    wparams.offset_ms        = 0;
    wparams.translate        = false;
    wparams.no_context       = true;
    wparams.single_segment   = true;
    wparams.print_realtime   = false;
    wparams.print_progress   = false;
    wparams.print_timestamps = true;
    wparams.print_special    = false;
    wparams.max_tokens       = 32;
    wparams.audio_ctx        = 768; // partial encoder context for better performance
    wparams.language         = "en";
    printf("command: using %d threads\n", wparams.n_threads);
    bool is_running   = true;
    bool have_prompt  = false;
    bool ask_prompt   = true;
    bool print_energy = false;
    float prob0 = 0.0f;
    float prob  = 0.0f;
    std::vector<float> pcmf32_cur;
    std::vector<float> pcmf32_prompt;
    const std::string k_prompt = "Ok Whisper, start listening for commands.";
    // whisper context
    auto & ctx = g_contexts[index];
    const int32_t vad_ms     = 2000;
    const int32_t prompt_ms  = 5000;
    const int32_t command_ms = 4000;
    const float vad_thold  = 0.1f;
    const float freq_thold = -1.0f;
    while (g_running) {
        // delay
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
        if (ask_prompt) {
            fprintf(stdout, "\n");
            fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
            fprintf(stdout, "\n");
            {
                char txt[1024];
                snprintf(txt, sizeof(txt), "Say the following phrase: '%s'", k_prompt.c_str());
                command_set_status(txt);
            }
            ask_prompt = false;
        }
        int64_t t_ms = 0;
        {
            command_get_audio(vad_ms, WHISPER_SAMPLE_RATE, pcmf32_cur);
            if (command_vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) {
                fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
                command_set_status("Speech detected! Processing ...");
                if (!have_prompt) {
                    command_get_audio(prompt_ms, WHISPER_SAMPLE_RATE, pcmf32_cur);
                    const auto txt = ::trim(::command_transcribe(ctx, wparams, pcmf32_cur, prob0, t_ms));
                    fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
                    const float sim = similarity(txt, k_prompt);
                    if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
                        fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
                        ask_prompt = true;
                    } else {
                        fprintf(stdout, "\n");
                        fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
                        fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
                        fprintf(stdout, "\n");
                        {
                            char txt[1024];
                            snprintf(txt, sizeof(txt), "Success! Waiting for voice commands ...");
                            command_set_status(txt);
                        }
                        // save the audio for the prompt
                        pcmf32_prompt = pcmf32_cur;
                        have_prompt = true;
                    }
                } else {
                    command_get_audio(command_ms, WHISPER_SAMPLE_RATE, pcmf32_cur);
                    // prepend the prompt audio
                    pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
                    const auto txt = ::trim(::command_transcribe(ctx, wparams, pcmf32_cur, prob, t_ms));
                    prob = 100.0f*(prob - prob0);
                    fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
                    // find the prompt in the text
                    float best_sim = 0.0f;
                    size_t best_len = 0;
                    for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
                        const auto prompt = txt.substr(0, n);
                        const float sim = similarity(prompt, k_prompt);
                        //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
                        if (sim > best_sim) {
                            best_sim = sim;
                            best_len = n;
                        }
                    }
                    const std::string command = ::trim(txt.substr(best_len));
                    fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
                    fprintf(stdout, "\n");
                    {
                        char txt[1024];
                        snprintf(txt, sizeof(txt), "Command '%s', (t = %d ms)", command.c_str(), (int) t_ms);
                        command_set_status(txt);
                    }
                    {
                        std::lock_guard<std::mutex> lock(g_mutex);
                        g_transcribed = command;
                    }
                }
                g_pcmf32.clear();
            }
        }
    }
    if (index < g_contexts.size()) {
        whisper_free(g_contexts[index]);
        g_contexts[index] = nullptr;
    }
 }
 EMSCRIPTEN_BINDINGS(command) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        for (size_t i = 0; i < g_contexts.size(); ++i) {
            if (g_contexts[i] == nullptr) {
                g_contexts[i] = whisper_init(path_model.c_str());
                if (g_contexts[i] != nullptr) {
                    g_running = true;
                    if (g_worker.joinable()) {
                        g_worker.join();
                    }
                    g_worker = std::thread([i]() {
                        command_main(i);
                    });
                    return i + 1;
                } else {
                    return (size_t) 0;
                }
            }
        }
        return (size_t) 0;
    }));
    emscripten::function("free", emscripten::optional_override([](size_t index) {
        if (g_running) {
            g_running = false;
        }
    }));
    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
        --index;
        if (index >= g_contexts.size()) {
            return -1;
        }
        if (g_contexts[index] == nullptr) {
            return -2;
        }
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            const int n = audio["length"].as<int>();
            emscripten::val heap = emscripten::val::module_property("HEAPU8");
            emscripten::val memory = heap["buffer"];
            g_pcmf32.resize(n);
            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
            memoryView.call<void>("set", audio);
        }
        return 0;
    }));
    emscripten::function("get_transcribed", emscripten::optional_override([]() {
        std::string transcribed;
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            transcribed = std::move(g_transcribed);
        }
        return transcribed;
    }));
    emscripten::function("get_status", emscripten::optional_override([]() {
        std::string status;
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            status = g_status_forced.empty() ? g_status : g_status_forced;
        }
        return status;
    }));
    emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            g_status_forced = status;
        }
    }));
 }
--- a/examples/command.wasm/index-tmpl.html
+++ b/examples/command.wasm/index-tmpl.html
@ -0,0 +1,386 @@
 <!doctype html>
 <html lang="en-us">
    <head>
        <title>command : Voice assistant example using Whisper + WebAssembly</title>
        <style>
            #output {
                width: 100%;
                height: 100%;
                margin: 0 auto;
                margin-top: 10px;
                border-left: 0px;
                border-right: 0px;
                padding-left: 0px;
                padding-right: 0px;
                display: block;
                background-color: black;
                color: white;
                font-size: 10px;
                font-family: 'Lucida Console', Monaco, monospace;
                outline: none;
                white-space: pre;
                overflow-wrap: normal;
                overflow-x: scroll;
            }
        </style>
    </head>
    <body>
        <div id="main-container">
            <b>command : Voice assistant example using Whisper + WebAssembly</b>
            <br><br>
            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">GitHub</a>.
            <br><br>
            <hr>
            Select the model you would like to use, click the "Start" button and follow the instructions.
            <br><br>
            <div id="model-whisper">
                Whisper model: <span id="model-whisper-status"></span>
                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
                <span id="fetch-whisper-progress"></span>
                <!--
                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
                -->
            </div>
            <br>
            <div id="input">
                <button id="start" onclick="onStart()" disabled>Start</button>
                <button id="stop"  onclick="onStop()" disabled>Stop</button>
                <button id="clear" onclick="clearCache()">Clear Cache</button>
            </div>
            <br>
            <div id="state">
                Status: <b><span id="state-status">not started</span></b>
                <pre id="state-transcribed">[The recognized voice commands will be displayed here]</pre>
            </div>
            <hr>
            Debug output:
            <textarea id="output" rows="20"></textarea>
            <br>
            <b>Troubleshooting</b>
            <br><br>
            The page does some heavy computations, so make sure:
            <ul>
                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
            </ul>
            <div class="cell-version">
                <span>
                    |
                    Build time: <span class="nav-link">@GIT_DATE@</span> |
                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">Source Code</a> |
                </span>
            </div>
        </div>
        <script type="text/javascript" src="helpers.js"></script>
        <script type='text/javascript'>
            // web audio context
            var context = null;
            // audio data
            var audio = null;
            var audio0 = null;
            // the command instance
            var instance = null;
            // model name
            var model_whisper = null;
            var Module = {
                print: printTextarea,
                printErr: printTextarea,
                setStatus: function(text) {
                    printTextarea('js: ' + text);
                },
                monitorRunDependencies: function(left) {
                },
                preRun: function() {
                    printTextarea('js: Preparing ...');
                },
                postRun: function() {
                    printTextarea('js: Initialized successfully!');
                }
            };
            //
            // fetch models
            //
            let dbVersion = 1
            let dbName    = 'whisper.ggerganov.com';
            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
            function storeFS(fname, buf) {
                // write to WASM file using FS_createDataFile
                // if the file exists, delete it
                try {
                    Module.FS_unlink(fname);
                } catch (e) {
                    // ignore
                }
                Module.FS_createDataFile("/", fname, buf, true, true);
                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
                if (model_whisper != null) {
                    document.getElementById('start').disabled = false;
                    document.getElementById('stop' ).disabled = true;
                }
            }
            function loadWhisper(model) {
                let urls = {
                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
                };
                let sizes = {
                    'tiny.en': 75,
                    'base.en': 142,
                };
                let url     = urls[model];
                let dst     = 'whisper.bin';
                let size_mb = sizes[model];
                model_whisper = model;
                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
                document.getElementById('fetch-whisper-base-en').style.display = 'none';
                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model + '" ... ';
                cbProgress = function(p) {
                    let el = document.getElementById('fetch-whisper-progress');
                    el.innerHTML = Math.round(100*p) + '%';
                };
                cbCancel = function() {
                    var el;
                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
                };
                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
            }
            //
            // microphone
            //
            const kSampleRate = 16000;
            const kRestartRecording_s = 120;
            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
            var mediaRecorder = null;
            var doRecording = false;
            var startTime = 0;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            function stopRecording() {
                Module.set_status("paused");
                doRecording = false;
                audio0 = null;
                audio = null;
                context = null;
            }
            function startRecording() {
                if (!context) {
                    context = new AudioContext({
                        sampleRate: kSampleRate,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
                        noiseSuppression: true,
                    });
                }
                Module.set_status("");
                document.getElementById('start').disabled = true;
                document.getElementById('stop').disabled = false;
                doRecording = true;
                startTime = Date.now();
                var chunks = [];
                var stream = null;
                navigator.mediaDevices.getUserMedia({audio: true, video: false})
                    .then(function(s) {
                        stream = s;
                        mediaRecorder = new MediaRecorder(stream);
                        mediaRecorder.ondataavailable = function(e) {
                            chunks.push(e.data);
                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
                            var reader = new FileReader();
                            reader.onload = function(event) {
                                var buf = new Uint8Array(reader.result);
                                if (!context) {
                                    return;
                                }
                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
                                    var source = offlineContext.createBufferSource();
                                    source.buffer = audioBuffer;
                                    source.connect(offlineContext.destination);
                                    source.start(0);
                                    offlineContext.startRendering().then(function(renderedBuffer) {
                                        audio = renderedBuffer.getChannelData(0);
                                        //printTextarea('js: audio recorded, size: ' + audio.length + ', old size: ' + (audio0 == null ? 0 : audio0.length));
                                        var audioAll = new Float32Array(audio0 == null ? audio.length : audio0.length + audio.length);
                                        if (audio0 != null) {
                                            audioAll.set(audio0, 0);
                                        }
                                        audioAll.set(audio, audio0 == null ? 0 : audio0.length);
                                        if (instance) {
                                            Module.set_audio(instance, audioAll);
                                        }
                                    });
                                }, function(e) {
                                    audio = null;
                                });
                            }
                            reader.readAsArrayBuffer(blob);
                        };
                        mediaRecorder.onstop = function(e) {
                            if (doRecording) {
                                setTimeout(function() {
                                    startRecording();
                                });
                            }
                        };
                        mediaRecorder.start(kIntervalAudio_ms);
                    })
                    .catch(function(err) {
                        printTextarea('js: error getting audio stream: ' + err);
                    });
                var interval = setInterval(function() {
                    if (!doRecording) {
                        clearInterval(interval);
                        mediaRecorder.stop();
                        stream.getTracks().forEach(function(track) {
                            track.stop();
                        });
                        document.getElementById('start').disabled = false;
                        document.getElementById('stop').disabled  = true;
                        mediaRecorder = null;
                    }
                    // if audio length is more than kRestartRecording_s seconds, restart recording
                    if (audio != null && audio.length > kSampleRate*kRestartRecording_s) {
                        if (doRecording) {
                            //printTextarea('js: restarting recording');
                            clearInterval(interval);
                            audio0 = audio;
                            audio = null;
                            mediaRecorder.stop();
                            stream.getTracks().forEach(function(track) {
                                track.stop();
                            });
                        }
                    }
                }, 100);
            }
            //
            // main
            //
            var nLines = 0;
            var intervalUpdate = null;
            var transcribedAll = '';
            function onStart() {
                if (!instance) {
                    instance = Module.init('whisper.bin');
                    if (instance) {
                        printTextarea("js: whisper initialized, instance: " + instance);
                    }
                }
                if (!instance) {
                    printTextarea("js: failed to initialize whisper");
                    return;
                }
                startRecording();
                intervalUpdate = setInterval(function() {
                    var transcribed = Module.get_transcribed();
                    if (transcribed != null && transcribed.length > 1) {
                        transcribedAll += transcribed + '<br>';
                        nLines++;
                        // if more than 10 lines, remove the first line
                        if (nLines > 10) {
                            var i = transcribedAll.indexOf('<br>');
                            if (i > 0) {
                                transcribedAll = transcribedAll.substring(i + 4);
                                nLines--;
                            }
                        }
                    }
                    document.getElementById('state-status').innerHTML = Module.get_status();
                    document.getElementById('state-transcribed').innerHTML = transcribedAll;
                }, 100);
            }
            function onStop() {
                stopRecording();
            }
        </script>
        <script type="text/javascript" src="command.js"></script>
    </body>
 </html>
--- a/examples/command/README.md
+++ b/examples/command/README.md
@ -13,6 +13,8 @@ More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/
 https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
 Web version: [examples/command.wasm](/examples/command.wasm)
 ## Building
 The `command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@ -535,7 +535,7 @@ int main(int argc, char ** argv) {
    bool is_running  = true;
    bool have_prompt = false;
-    bool ask_prompt = true;
+    bool ask_prompt  = true;
    float prob0 = 0.0f;
    float prob  = 0.0f;
--- a/examples/generate-karaoke.sh
+++ b/examples/generate-karaoke.sh
@ -1,5 +1,16 @@
 #!/bin/bash
 # Simple tool to record audio from the microphone and generate a karaoke video
 # Usage:
 #
 #  cd whisper.cpp
 #  make
 #
 #  ./examples/generate-karaoke.sh [model] [step_ms]
 #
 # Press Ctrl+C to stop recording
 #
 executable="./main"
 model="base.en"
 model_path="models/ggml-$model.bin"
--- a/examples/livestream.sh
+++ b/examples/livestream.sh
@ -0,0 +1,98 @@
 #!/bin/bash
 set -eo pipefail
 # Transcribe audio livestream by feeding ffmpeg output to whisper.cpp at regular intervals
 # Idea by @semiformal-net
 # ref: https://github.com/ggerganov/whisper.cpp/issues/185
 #
 # TODO:
 # - Currently, there is a gap between sequential chunks, so some of the words are dropped. Need to figure out a
 #   way to produce a continuous stream of audio chunks.
 #
 url="http://a.files.bbci.co.uk/media/live/manifesto/audio/simulcast/hls/nonuk/sbr_low/ak/bbc_world_service.m3u8"
 fmt=aac # the audio format extension of the stream (TODO: auto detect)
 step_s=30
 model="base.en"
 if [ -z "$1" ]; then
    echo "Usage: $0 stream_url [step_s] [model]"
    echo ""
    echo "  Example:"
    echo "    $0 $url $step_s $model"
    echo ""
    echo "No url specified, using default: $url"
 else
    url="$1"
 fi
 if [ -n "$2" ]; then
    step_s="$2"
 fi
 if [ -n "$3" ]; then
    model="$3"
 fi
 # Whisper models
 models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
 # list available models
 function list_models {
    printf "\n"
    printf "  Available models:"
    for model in "${models[@]}"; do
        printf " $model"
    done
    printf "\n\n"
 }
 if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
    printf "Invalid model: $model\n"
    list_models
    exit 1
 fi
 running=1
 trap "running=0" SIGINT SIGTERM
 printf "[+] Transcribing stream with model '$model', step_s $step_s (press Ctrl+C to stop):\n\n"
 # continuous stream in native fmt (this file will grow forever!)
 ffmpeg -loglevel quiet -y -re -probesize 32 -i $url -c copy /tmp/whisper-live0.${fmt} &
 if [ $? -ne 0 ]; then
    printf "Error: ffmpeg failed to capture audio stream\n"
    exit 1
 fi
 printf "Buffering audio. Please wait...\n\n"
 sleep $(($step_s))
 # do not stop script on error
 set +e
 i=0
 SECONDS=0
 while [ $running -eq 1 ]; do
    # extract the next piece from the main file above and transcode to wav. -ss sets start time and nudges it by -0.5s to catch missing words (??)
    err=1
    while [ $err -ne 0 ]; do
        if [ $i -gt 0 ]; then
            ffmpeg -loglevel quiet -v error -noaccurate_seek -i /tmp/whisper-live0.${fmt} -y -ar 16000 -ac 1 -c:a pcm_s16le -ss $(($i*$step_s-1)).5 -t $step_s /tmp/whisper-live.wav 2> /tmp/whisper-live.err
        else
            ffmpeg -loglevel quiet -v error -noaccurate_seek -i /tmp/whisper-live0.${fmt} -y -ar 16000 -ac 1 -c:a pcm_s16le -ss $(($i*$step_s)) -t $step_s /tmp/whisper-live.wav 2> /tmp/whisper-live.err
        fi
        err=$(cat /tmp/whisper-live.err | wc -l)
    done
    ./main -t 8 -m ./models/ggml-base.en.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
    while [ $SECONDS -lt $((($i+1)*$step_s)) ]; do
        sleep 1
    done
    ((i=i+1))
 done
 killall -v ffmpeg
 killall -v main
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -607,6 +607,19 @@ int main(int argc, char ** argv) {
                wparams.new_segment_callback_user_data = &user_data;
            }
            // example for abort mechanism
            // in this example, we do not abort the processing, but we could if the flag is set to true
            // the callback is called before every encoder run - if it returns false, the processing is aborted
            {
                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
                wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) {
                    bool is_aborted = *(bool*)user_data;
                    return !is_aborted;
                };
                wparams.encoder_begin_callback_user_data = &is_aborted;
            }
            if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
                return 10;
--- a/examples/stream.wasm/emscripten.cpp
+++ b/examples/stream.wasm/emscripten.cpp
@ -51,7 +51,7 @@ void stream_main(size_t index) {
    wparams.language         = "en";
-    printf("stream: using %d threads\n", N_THREAD);
+    printf("stream: using %d threads\n", wparams.n_threads);
    std::vector<float> pcmf32;
--- a/examples/stream.wasm/index-tmpl.html
+++ b/examples/stream.wasm/index-tmpl.html
@ -100,12 +100,6 @@
        <script type="text/javascript" src="helpers.js"></script>
        <script type='text/javascript'>
            const kRestartRecording_s = 15;
            const kSampleRate = 16000;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            // web audio context
            var context = null;
@ -204,10 +198,17 @@
            // microphone
            //
            const kSampleRate = 16000;
            const kRestartRecording_s = 120;
            const kIntervalAudio_ms = 5000; // pass the recorded audio to the C++ instance at this rate
            var mediaRecorder = null;
            var doRecording = false;
            var startTime = 0;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            function stopRecording() {
                Module.set_status("paused");
                doRecording = false;
@ -219,7 +220,7 @@
            function startRecording() {
                if (!context) {
                    context = new AudioContext({
-                        sampleRate: 16000,
+                        sampleRate: kSampleRate,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
@ -292,7 +293,7 @@
                            }
                        };
-                        mediaRecorder.start(5000);
+                        mediaRecorder.start(kIntervalAudio_ms);
                    })
                    .catch(function(err) {
                        printTextarea('js: error getting audio stream: ' + err);
@ -326,7 +327,7 @@
                            });
                        }
                    }
-                }, 250);
+                }, 100);
            }
            //
--- a/examples/talk.wasm/emscripten.cpp
+++ b/examples/talk.wasm/emscripten.cpp
@ -68,7 +68,7 @@ void talk_main(size_t index) {
    g_gpt2 = gpt2_init("gpt-2.bin");
-    printf("talk: using %d threads\n", N_THREAD);
+    printf("talk: using %d threads\n", wparams.n_threads);
    std::vector<float> pcmf32;
--- a/examples/talk.wasm/index-tmpl.html
+++ b/examples/talk.wasm/index-tmpl.html
@ -160,12 +160,6 @@
        <script type="text/javascript" src="helpers.js"></script>
        <script type='text/javascript'>
            const kRestartRecording_s = 15;
            const kSampleRate = 16000;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            // web audio context
            var context = null;
@ -342,10 +336,17 @@
            // microphone
            //
            const kSampleRate = 16000;
            const kRestartRecording_s = 120;
            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
            var mediaRecorder = null;
            var doRecording = false;
            var startTime = 0;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            function stopRecording() {
                Module.set_status("paused");
                doRecording = false;
@ -357,7 +358,7 @@
            function startRecording() {
                if (!context) {
                    context = new AudioContext({
-                        sampleRate: 16000,
+                        sampleRate: kSampleRate,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
@ -431,7 +432,7 @@
                            }
                        };
-                        mediaRecorder.start(250);
+                        mediaRecorder.start(kIntervalAudio_ms);
                    })
                    .catch(function(err) {
                        printTextarea('js: error getting audio stream: ' + err);
@ -466,7 +467,7 @@
                            });
                        }
                    }
-                }, 250);
+                }, 100);
            }
            //
--- a/examples/whisper.objc/README.md
+++ b/examples/whisper.objc/README.md
@ -5,6 +5,10 @@ The inference runs locally, on-device.
 https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
 Real-time transcription demo:
 https://user-images.githubusercontent.com/1991296/204126266-ce4177c6-6eca-4bd9-bca8-0e46d9da2364.mp4
 ## Usage
 ```java
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
@ -309,6 +309,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_TEAM = P8JZH34X63;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = whisper.objc/Info.plist;
 				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
@ -336,6 +337,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				CURRENT_PROJECT_VERSION = 1;
 				DEVELOPMENT_TEAM = P8JZH34X63;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = whisper.objc/Info.plist;
 				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
--- a/examples/whisper.objc/whisper.objc/Base.lproj/Main.storyboard
+++ b/examples/whisper.objc/whisper.objc/Base.lproj/Main.storyboard
@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21225" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21507" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
    <device id="retina6_0" orientation="portrait" appearance="light"/>
    <dependencies>
-        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21207"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21505"/>
        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
        <capability name="System colors in document resources" minToolsVersion="11.0"/>
        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
@ -40,7 +40,7 @@
                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
                                <color key="backgroundColor" systemColor="systemBackgroundColor"/>
                                <color key="textColor" systemColor="labelColor"/>
-                                <fontDescription key="fontDescription" type="system" pointSize="20"/>
+                                <fontDescription key="fontDescription" name="Georgia" family="Georgia" pointSize="16"/>
                                <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
                            </textView>
                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="Brs-xi-o8i">
@ -56,6 +56,18 @@
                                    <action selector="onTranscribePrepare:" destination="BYZ-38-t0r" eventType="touchDown" id="16T-dN-dfB"/>
                                </connections>
                            </button>
                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="AaW-T2-Ndw">
                                <rect key="frame" x="199" y="191" width="156" height="49"/>
                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
                                <color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
                                <color key="tintColor" systemColor="opaqueSeparatorColor"/>
                                <state key="normal" title="Real-time">
                                    <color key="titleColor" systemColor="labelColor"/>
                                </state>
                                <connections>
                                    <action selector="onRealtime:" destination="BYZ-38-t0r" eventType="touchUpInside" id="nhn-jT-aQJ"/>
                                </connections>
                            </button>
                        </subviews>
                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
                        <color key="backgroundColor" systemColor="systemBackgroundColor"/>
@ -64,6 +76,7 @@
                        </constraints>
                    </view>
                    <connections>
                        <outlet property="buttonRealtime" destination="AaW-T2-Ndw" id="gcU-Ol-BOo"/>
                        <outlet property="buttonToggleCapture" destination="VOi-PT-Rbu" id="nis-VC-DQO"/>
                        <outlet property="buttonTranscribe" destination="Brs-xi-o8i" id="N8h-9W-ywb"/>
                        <outlet property="labelStatusInp" destination="Tgu-2q-eHQ" id="1hH-Ql-K6j"/>
--- a/examples/whisper.objc/whisper.objc/ViewController.h
+++ b/examples/whisper.objc/whisper.objc/ViewController.h
@ -20,6 +20,8 @@ typedef struct
 {
    int ggwaveId;
    bool isCapturing;
    bool isTranscribing;
    bool isRealtime;
    UILabel * labelReceived;
    AudioQueueRef queue;
@ -31,6 +33,8 @@ typedef struct
    float   * audioBufferF32;
    struct whisper_context * ctx;
    void * vc;
 } StateInp;
@interface ViewController : UIViewController
--- a/examples/whisper.objc/whisper.objc/ViewController.m
+++ b/examples/whisper.objc/whisper.objc/ViewController.m
@ -21,9 +21,10 @@ void AudioInputCallback(void * inUserData,
@interface ViewController ()
-@property (weak, nonatomic) IBOutlet UILabel *labelStatusInp;
+@property (weak, nonatomic) IBOutlet UILabel    *labelStatusInp;
-@property (weak, nonatomic) IBOutlet UIButton *buttonToggleCapture;
+@property (weak, nonatomic) IBOutlet UIButton   *buttonToggleCapture;
-@property (weak, nonatomic) IBOutlet UIButton *buttonTranscribe;
+@property (weak, nonatomic) IBOutlet UIButton   *buttonTranscribe;
@property (weak, nonatomic) IBOutlet UIButton   *buttonRealtime;
@property (weak, nonatomic) IBOutlet UITextView *textviewResult;
@end
@ -32,7 +33,7 @@ void AudioInputCallback(void * inUserData,
 - (void)setupAudioFormat:(AudioStreamBasicDescription*)format
 {
-    format->mSampleRate       = 16000;
+    format->mSampleRate       = WHISPER_SAMPLE_RATE;
    format->mFormatID         = kAudioFormatLinearPCM;
    format->mFramesPerPacket  = 1;
    format->mChannelsPerFrame = 1;
@ -77,6 +78,9 @@ void AudioInputCallback(void * inUserData,
        stateInp.audioBufferI16 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(int16_t));
        stateInp.audioBufferF32 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(float));
    }
    stateInp.isTranscribing = false;
    stateInp.isRealtime = false;
 }
 -(IBAction) stopCapturing {
@ -109,6 +113,7 @@ void AudioInputCallback(void * inUserData,
    NSLog(@"Start capturing");
    stateInp.n_samples = 0;
    stateInp.vc = (__bridge void *)(self);
    OSStatus status = AudioQueueNewInput(&stateInp.dataFormat,
                                         AudioInputCallback,
@ -141,67 +146,105 @@ void AudioInputCallback(void * inUserData,
 - (IBAction)onTranscribePrepare:(id)sender {
    _textviewResult.text = @"Processing - please wait ...";
    if (stateInp.isRealtime) {
        [self onRealtime:(id)sender];
    }
    if (stateInp.isCapturing) {
        // stop capturing
        [self stopCapturing];
    }
 }
-        return;
+- (IBAction)onRealtime:(id)sender {
    stateInp.isRealtime = !stateInp.isRealtime;
    if (stateInp.isRealtime) {
        [_buttonRealtime setBackgroundColor:[UIColor greenColor]];
    } else {
        [_buttonRealtime setBackgroundColor:[UIColor grayColor]];
    }
    NSLog(@"Realtime: %@", stateInp.isRealtime ? @"ON" : @"OFF");
 }
 - (IBAction)onTranscribe:(id)sender {
    if (stateInp.isTranscribing) {
        return;
    }
    NSLog(@"Processing %d samples", stateInp.n_samples);
-    // process captured audio
+    stateInp.isTranscribing = true;
    // convert I16 to F32
    for (int i = 0; i < stateInp.n_samples; i++) {
        stateInp.audioBufferF32[i] = (float)stateInp.audioBufferI16[i] / 32768.0f;
    }
-    // run the model
+    // dispatch the model to a background thread
-    struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
        // process captured audio
        // convert I16 to F32
        for (int i = 0; i < self->stateInp.n_samples; i++) {
            self->stateInp.audioBufferF32[i] = (float)self->stateInp.audioBufferI16[i] / 32768.0f;
        }
-    params.print_realtime   = true;
+        // run the model
-    params.print_progress   = false;
+        struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
    params.print_timestamps = true;
    params.print_special    = false;
    params.translate        = false;
    params.language         = "en";
    params.n_threads        = 4;
    params.offset_ms        = 0;
-    CFTimeInterval startTime = CACurrentMediaTime();
+        // get maximum number of threads on this device (max 8)
        const int max_threads = MIN(8, (int)[[NSProcessInfo processInfo] processorCount]);
-    if (whisper_full(stateInp.ctx, params, stateInp.audioBufferF32, stateInp.n_samples) != 0) {
+        params.print_realtime   = true;
-        NSLog(@"Failed to run the model");
+        params.print_progress   = false;
-        _textviewResult.text = @"Failed to run the model";
+        params.print_timestamps = true;
        params.print_special    = false;
        params.translate        = false;
        params.language         = "en";
        params.n_threads        = max_threads;
        params.offset_ms        = 0;
        params.no_context       = true;
        params.single_segment   = self->stateInp.isRealtime;
-        return;
+        CFTimeInterval startTime = CACurrentMediaTime();
    }
-    CFTimeInterval endTime = CACurrentMediaTime();
+        whisper_reset_timings(self->stateInp.ctx);
-    // clear the text in the textview
+        if (whisper_full(self->stateInp.ctx, params, self->stateInp.audioBufferF32, self->stateInp.n_samples) != 0) {
-    _textviewResult.text = @"";
+            NSLog(@"Failed to run the model");
            self->_textviewResult.text = @"Failed to run the model";
-    int n_segments = whisper_full_n_segments(stateInp.ctx);
+            return;
-    for (int i = 0; i < n_segments; i++) {
+        }
        const char * text_cur = whisper_full_get_segment_text(stateInp.ctx, i);
-        // append the text to the textview
+        whisper_print_timings(self->stateInp.ctx);
-        _textviewResult.text = [_textviewResult.text stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
+
-    }
+        CFTimeInterval endTime = CACurrentMediaTime();
        NSLog(@"\nProcessing time: %5.3f, on %d threads", endTime - startTime, params.n_threads);
        // result text
        NSString *result = @"";
-    // internal model timing
+        int n_segments = whisper_full_n_segments(self->stateInp.ctx);
-    whisper_print_timings(stateInp.ctx);
+        for (int i = 0; i < n_segments; i++) {
            const char * text_cur = whisper_full_get_segment_text(self->stateInp.ctx, i);
-    NSLog(@"\nProcessing time: %5.3f", endTime - startTime);
+            // append the text to the result
            result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
        }
        const float tRecording = (float)self->stateInp.n_samples / (float)self->stateInp.dataFormat.mSampleRate;
-    _textviewResult.text = [_textviewResult.text stringByAppendingString:[NSString stringWithFormat:@"\n\n[processing time: %5.3f s]", endTime - startTime]];
+        // append processing time
        result = [result stringByAppendingString:[NSString stringWithFormat:@"\n\n[recording time:  %5.3f s]", tRecording]];
        result = [result stringByAppendingString:[NSString stringWithFormat:@"  \n[processing time: %5.3f s]", endTime - startTime]];
        // dispatch the result to the main thread
        dispatch_async(dispatch_get_main_queue(), ^{
            self->_textviewResult.text = result;
            self->stateInp.isTranscribing = false;
        });
    });
 }
 //
-// Callback implmentation
+// Callback implementation
 //
 void AudioInputCallback(void * inUserData,
@ -224,6 +267,12 @@ void AudioInputCallback(void * inUserData,
    if (stateInp->n_samples + n > MAX_AUDIO_SEC*SAMPLE_RATE) {
        NSLog(@"Too much audio data, ignoring");
        dispatch_async(dispatch_get_main_queue(), ^{
            ViewController * vc = (__bridge ViewController *)(stateInp->vc);
            [vc stopCapturing];
        });
        return;
    }
@ -235,6 +284,14 @@ void AudioInputCallback(void * inUserData,
    // put the buffer back in the queue
    AudioQueueEnqueueBuffer(stateInp->queue, inBuffer, 0, NULL);
    if (stateInp->isRealtime) {
        // dipatch onTranscribe() to the main thread
        dispatch_async(dispatch_get_main_queue(), ^{
            ViewController * vc = (__bridge ViewController *)(stateInp->vc);
            [vc onTranscribe:nil];
        });
    }
 }
@end
--- a/examples/whisper.wasm/index-tmpl.html
+++ b/examples/whisper.wasm/index-tmpl.html
@ -225,12 +225,6 @@
                }
            };
            const kMaxAudio_s = 120;
            const kSampleRate = 16000;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            // web audio context
            var context = null;
@ -348,9 +342,21 @@
            // audio file
            //
            const kMaxAudio_s = 120;
            const kSampleRate = 16000;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            function loadAudio(event) {
                if (!context) {
-                    context = new AudioContext({sampleRate: 16000});
+                    context = new AudioContext({
                        sampleRate: kSampleRate,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
                        noiseSuppression: true,
                    });
                }
                var file = event.target.files[0] || null;
@ -410,7 +416,13 @@
            // update progress information
            function startRecording() {
                if (!context) {
-                    context = new AudioContext({sampleRate: 16000});
+                    context = new AudioContext({
                        sampleRate: kSampleRate,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
                        noiseSuppression: true,
                    });
                }
                document.getElementById('start').disabled = true;
--- a/examples/yt-wsp.sh
+++ b/examples/yt-wsp.sh
@ -0,0 +1,132 @@
 #!/usr/bin/env bash
 # Small shell script to more easily automatically download and transcribe live stream VODs.
 # This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp
 # Use `./transcribe-vod help` to print help info.
 # MIT License
 # Copyright (c) 2022 Daniils Petrovs
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 set -Eeuo pipefail
 # You can find how to download models in the OG repo: https://github.com/ggerganov/whisper.cpp/#usage
 MODEL_PATH="${MODEL_PATH:-models/ggml-base.en.bin}" # Set to a multilingual model if you want to translate from foreign lang to en
 WHISPER_EXECUTABLE="${WHISPER_EXECUTABLE:-whisper}" # Where to find the whisper.cpp executable
 WHISPER_LANG="${WHISPER_LANG:-en}" # Set to desired lang to translate from
 msg() {
    echo >&2 -e "${1-}"
 }
 cleanup() {
    msg "Cleaning up..."
    rm -rf "${temp_dir}" "vod-resampled.wav" "vod-resampled.wav.srt"
 }
 print_help() {
    echo "Usage: ./transcribe-vod <video_url>"
    echo "See configurable env variables in the script"
    echo "This will produce an MP4 muxed file called res.mp4 in the working directory"
    echo "Requirements: ffmpeg yt-dlp whisper"
    echo "Whisper needs to be built into the main binary with make, then you can rename it to something like 'whisper' and add it to your PATH for convenience."
    echo "E.g. in the root of Whisper.cpp, run: 'make && cp ./main /usr/local/bin/whisper'"
 }
 check_requirements() {
    if ! command -v ffmpeg &>/dev/null; then
        echo "ffmpeg is required (https://ffmpeg.org)."
        exit 1
    fi
    if ! command -v yt-dlp &>/dev/null; then
        echo "yt-dlp is required (https://github.com/yt-dlp/yt-dlp)."
        exit 1
    fi
    if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
        WHISPER_EXECUTABLE="./main"
        if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
            echo "Whisper is required (https://github.com/ggerganov/whisper.cpp)."
            exit 1
        fi
    fi
 }
 if [[ $# -lt 1 ]]; then
    print_help
    exit 1
 fi
 if [[ "$1" == "help" ]]; then
    print_help
    exit 0
 fi
 temp_dir="tmp"
 source_url="$1"
 check_requirements
 msg "Downloading VOD..."
 # Optionally add --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] for members only VODs
 yt-dlp \
    -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
    --embed-thumbnail \
    --embed-chapters \
    --xattrs \
    "${source_url}" -o "${temp_dir}/vod.mp4"
 msg "Extracting audio and resampling..."
 ffmpeg -i "${temp_dir}/vod.mp4" \
    -hide_banner \
    -loglevel error \
    -ar 16000 \
    -ac 1 \
    -c:a \
    pcm_s16le -y "vod-resampled.wav"
 msg "Transcribing to subtitle file..."
 msg "Whisper specified at: ${WHISPER_EXECUTABLE}"
 $WHISPER_EXECUTABLE \
    -m "${MODEL_PATH}" \
    -l "${WHISPER_LANG}" \
    -f "vod-resampled.wav" \
    -t 8 \
    -osrt \
    --translate
 msg "Embedding subtitle track..."
 ffmpeg -i "${temp_dir}/vod.mp4" \
    -hide_banner \
    -loglevel error \
    -i "vod-resampled.wav.srt" \
    -c copy \
    -c:s mov_text \
    -y res.mp4
 cleanup
 msg "Done! Your finished file is ready: res.mp4"
--- a/extra/deploy-wasm.sh
+++ b/extra/deploy-wasm.sh
@ -0,0 +1,30 @@
 #!/bin/bash
 #
 # This is a helper script to deploy all WebAssembly examples to my node
 # Run from the build directory:
 #
 # cd build-em
 # ../extra/deploy-wasm.sh
 #
 # check if emcmake is available
 if ! command -v emcmake &> /dev/null
 then
    echo "Error: emscripten environment is not set up"
    exit
 fi
 emcmake cmake .. && make -j
 if [ $? -ne 0 ]; then
    echo "Error: build failed"
    exit
 fi
 # copy all wasm files to the node
 scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/         && scp bin/libwhisper.worker.js root@linode0:/var/www/html/whisper/
 scp bin/stream.wasm/*  root@linode0:/var/www/html/whisper/stream/  && scp bin/libstream.worker.js  root@linode0:/var/www/html/whisper/stream/
 scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
 scp bin/talk.wasm/*    root@linode0:/var/www/html/whisper/talk/    && scp bin/libtalk.worker.js    root@linode0:/var/www/html/whisper/talk/
 echo "Done"
 exit
--- a/ggml.c
+++ b/ggml.c
@ -147,7 +147,7 @@ static inline uint32_t fp32_to_bits(float f) {
 	return fp32.as_bits;
 }
-inline float ggml_fp16_to_fp32(ggml_fp16_t h) {
+float ggml_fp16_to_fp32(ggml_fp16_t h) {
    const uint32_t w = (uint32_t) h << 16;
    const uint32_t sign = w & UINT32_C(0x80000000);
    const uint32_t two_w = w + w;
@ -170,7 +170,7 @@ inline float ggml_fp16_to_fp32(ggml_fp16_t h) {
    return fp32_from_bits(result);
 }
-inline ggml_fp16_t ggml_fp32_to_fp16(float f) {
+ggml_fp16_t ggml_fp32_to_fp16(float f) {
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
    const float scale_to_inf = 0x1.0p+112f;
    const float scale_to_zero = 0x1.0p-110f;
--- a/tests/.gitignore
+++ b/tests/.gitignore
@ -0,0 +1,3 @@
 *.wav
 *.ogg
 *.wav.txt
--- a/tests/en-0-ref.txt
+++ b/tests/en-0-ref.txt
@ -0,0 +1 @@
 My fellow Americans, this day has brought terrible news and great sadness to our country. At 9 o'clock this morning, Mission Control in Houston lost contact with our space shuttle, Columbia. A short time later, debris was seen falling from the skies above Texas. The Colombians lost. There are no survivors. On board was a crew of seven. Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon, a colonel in the Israeli Air Force. These men and women assumed great risk in the service to all humanity. In an age when spaceflight has come to seem almost routine, it is easy to overlook the dangers of travel by rocket and the difficulties of navigating the fierce outer atmosphere of the Earth. These astronauts knew the dangers, and they faced them willingly, knowing they had a high and noble purpose in life. Because of their courage and daring and idealism, we will miss them all the more. All Americans today are thinking as well of the families of these men and women who have been given this sudden shock and grief. You're not alone. Our entire nation grieves with you. And those you love will always have the respect and gratitude of this country. The cause in which they died will continue. Mankind is led into the darkness beyond our world by the inspiration of discovery and the longing to understand. Our journey into space will go on. In the skies today, we saw destruction and tragedy. Yet farther than we can see, there is comfort and hope. In the words of the prophet Isaiah, "Lift your eyes and look to the heavens. Who created all these? He who brings out the starry hosts one by one and calls them each by name." Because of His great power and mighty strength, not one of them is missing. The same Creator who names the stars also knows the names of the seven souls we mourn today. The crew of the shuttle Columbia did not return safely to Earth, yet we can pray that all are safely home. May God bless the grieving families. And may God continue to bless America. [Silence]
--- a/tests/en-1-ref.txt
+++ b/tests/en-1-ref.txt
@ -0,0 +1 @@
 Henry F. Phillips from Wikipedia, the free encyclopedia at en.wikipedia.org. Henry F. Phillips from Wikipedia, the free encyclopedia. Henry F. Phillips 1890-1958, a U.S. businessman from Portland, Oregon, has the honor of having the Phillips head screw and screwdriver named after him. The importance of the cross head screw design lies in its self-centering property, useful on automated production lines that use powered screwdrivers. Phillips' major contribution was in driving the cross head concept forward to the point where it was adopted by screw makers and automobile companies. Although he received patents for the design in 1936, U.S. Patent #2,046,343, U.S. Patents #2,046,837 to #2,046,840, it was so widely copied that by 1949 Phillips lost his patent. The American Screw Company was responsible for devising a means of manufacturing the screw, and successfully patented and licensed their method. Other screw makers of the 1930s dismissed the Phillips concept since it calls for a relatively complex recessed socket shape in the head of the screw, as distinct from the simple milled slot of a slotted type screw. The Phillips Screw Company and the American Screw Company went on to devise the Pawsadrive screw, which differs from the Phillips in that it is designed to accommodate greater torque than the Phillips. An image accompanied this article, captioned "Phillips Screw Head." The following is an info box which accompanies this article. Info box, part of the series on screw drive types. Slotted, commonly erroneously flat head. Phillips, cross head. Pawsadrive, super drive. Torques. Hex, Allen. Robertson. Tri-wing. Torx set. Spanner head. Triple square, XZN. Others, poly drive, spline drive, double hex. Many images accompanied this info box. This page was last modified on the 9th of April, 2008, at 1704. All text is available under the terms of the GNU Free Documentation License. See copyrights for details. Wikipedia is a registered trademark of the Wikimedia Foundation Incorporated, a U.S. registered 501(c)(3) tax-deductible nonprofit charity. This sound file and all text in the article are licensed under the GNU Free Documentation License, available at www.gnu.org/copyleft/fdl.html.
--- a/tests/en-2-ref.txt
+++ b/tests/en-2-ref.txt
@ -0,0 +1 @@
 This is the Micro Machine Man presenting the most midget miniature motorcade of Micro Machines. Each one has dramatic details, terrific trim, precision paint jobs, plus incredible Micro Machine Pocket Playsets. There's a police station, fire station, restaurant, service station, and more. Perfect pocket portables to take anyplace. And there are many miniature playsets to play with, and each one comes with its own special edition Micro Machine vehicle and fun, fantastic features that miraculously move. Raise the boat lift at the airport marina, man the gun turret at the army base, clean your car at the car wash, raise the toll bridge. And these playsets fit together to form a Micro Machine world. Micro Machine Pocket Playsets, so tremendously tiny, so perfectly precise, so dazzlingly detailed, you'll want to pocket them all. Micro Machines are Micro Machine Pocket Playsets sold separately from Galoob. The smaller they are, the better they are.
--- a/tests/es-0-ref.txt
+++ b/tests/es-0-ref.txt
@ -0,0 +1 @@
 Hola, como están todos? Mi nombre es Julián Virrueta Mendoza y en este podcast les vengo a hablar sobre la contaminación del agua. Bueno, empezaré por decir que el ser humano no está midiendo las consecuencias de sus actos. No hay duda que uno de los mayores problemas a los que se enfrentan muchas poblaciones actualmente es la contaminación del agua. Principalmente porque como bien sabemos el agua prácticamente es fundamental para la vida, por lo que la contaminación puede ser algo muy negativo para el desarrollo tanto económico como social de los pueblos o de las poblaciones próximas en ese lugar contaminado. Los comienzos de la contaminación, como lo definen muchos expertos en la materia, la contaminación del agua es causada por las actividades humanas. Es un fenómeno ambiental de importancia, el cual se comienza a producir desde los primeros intentos de industrialización para transformarse luego en un problema tan habitual como generalizado. Generalmente la contaminación del agua se produce a través de la introducción directa o indirecta en los acuíferos o caos de agua, ríos, mares, lagos, océanos, etc. o de diversas sustancias que pueden ser consideradas como contaminantes. Pero existen dos formas principales de contaminación del agua. Una de ellas tiene que ver con la contaminación natural del agua que se corresponde con el ciclo natural de esta durante el que puede entrar en contacto con ciertos constituyentes contaminantes como sustancias minerales y orgánicas disueltas o en suspensión que se vierten en la corteza terrestre, la atmósfera y en las aguas. Pero todo esto se puede contradecir si el ser humano comía sus consecuencias, si no tirara basura a los lagos, a los ríos, no tirara botes de aceite, no contaminara. Bueno amigos, yo los invito a que no contaminen el agua y que sepan cuidar la naturaleza. Los saluda su buen amigo y compañero Julián Virreta. Nos vemos. ¡Claro!
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@ -0,0 +1,125 @@
 #!/bin/bash
 # This scripts run the selected model agains a collection of audio files from the web.
 # It downloads, converts and transcribes each file and then compares the result with the expected reference
 # transcription. The comparison is performed using git's diff command and shows the differences at the character level.
 # It can be used to quickly verify that the model is working as expected across a wide range of audio files.
 # I.e. like an integration test. The verification is done by visual inspection of the diff output.
 #
 # The reference data can be for example generated using the original OpenAI Whisper implementation, or entered manually.
 #
 # Feel free to suggest extra audio files to add to the list.
 # Make sure they are between 1-3 minutes long since we don't want to make the test too slow.
 #
 # Usage:
 #
 #   ./tests/run-tests.sh <model_name>
 #
 cd `dirname $0`
 # Whisper models
 models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
 # list available models
 function list_models {
    printf "\n"
    printf "  Available models:"
    for model in "${models[@]}"; do
        printf " $model"
    done
    printf "\n\n"
 }
 if [ $# -eq 0 ]; then
    printf "Usage: $0 [model]\n\n"
    printf "No model specified. Aborting\n"
    list_models
    exit 1
 fi
 model=$1
 main="../main"
 if [ ! -f ../models/ggml-$model.bin ]; then
    printf "Model $model not found. Aborting\n"
    list_models
    exit 1
 fi
 if [ ! -f $main ]; then
    printf "Executable $main not found. Aborting\n"
    exit 1
 fi
 # add various audio files for testing purposes here
 # the order of the files is important so don't change the existing order
 # when adding new files, make sure to add the expected "ref.txt" file with the correct transcript
 urls_en=(
    "https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg"
    "https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg"
    "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav"
 )
 urls_es=(
    "https://upload.wikimedia.org/wikipedia/commons/c/c1/La_contaminacion_del_agua.ogg"
 )
 urls_it=(
 )
 urls_pt=(
 )
 urls_de=(
 )
 urls_jp=(
 )
 urls_ru=(
 )
 function run_lang() {
    lang=$1
    shift
    urls=("$@")
    i=0
    for url in "${urls[@]}"; do
        echo "- [$lang] Processing '$url' ..."
        ext="${url##*.}"
        fname_src="$lang-${i}.${ext}"
        fname_dst="$lang-${i}-16khz.wav"
        if [ ! -f $fname_src ]; then
            wget --quiet --show-progress -O $fname_src $url
        fi
        if [ ! -f $fname_dst ]; then
            ffmpeg -loglevel -0 -y -i $fname_src -ar 16000 -ac 1 -c:a pcm_s16le $fname_dst
            if [ $? -ne 0 ]; then
                echo "Error: ffmpeg failed to convert $fname_src to $fname_dst"
                exit 1
            fi
        fi
        $main -m ../models/ggml-$model.bin -f $fname_dst -l $lang -otxt 2> /dev/null
        git diff --no-index --word-diff=color --word-diff-regex=. $fname_dst.txt $lang-$i-ref.txt
        i=$(($i+1))
    done
 }
 run_lang "en" "${urls_en[@]}"
 if [[ $model != *.en ]]; then
    run_lang "es" "${urls_es[@]}"
    run_lang "it" "${urls_it[@]}"
    run_lang "pt" "${urls_pt[@]}"
    run_lang "de" "${urls_de[@]}"
    run_lang "jp" "${urls_jp[@]}"
    run_lang "ru" "${urls_ru[@]}"
 fi
--- a/whisper.cpp
+++ b/whisper.cpp
@ -1846,7 +1846,9 @@ static bool whisper_decode(
 // the most basic sampling scheme - select the top token
 static whisper_token_data whisper_sample_best(
        const whisper_vocab & vocab,
-        const float * probs) {
+        const float * probs,
              bool force_timestamp,
              bool is_initial) {
    whisper_token_data result = {
        0, 0, 0.0f, 0.0f, 0.0f, -1, -1, 0.0f,
    };
@ -1869,7 +1871,18 @@ static whisper_token_data whisper_sample_best(
            max_tx = std::max(max_tx, probs_id[i].first);
        }
-        for (int i = vocab.token_beg; i < n_logits; i++) {
+        const auto i0 = is_initial ? vocab.token_beg + 101 : vocab.token_beg;
        const auto i1 = is_initial ? vocab.token_beg + 101 : n_logits;
        // the initial timestamp cannot be larger than 100
        // ref: https://github.com/openai/whisper/blob/0b1ba3d46ebf7fe6f953acfd8cad62a4f851b49f/whisper/decoding.py#L426-L429
        if (is_initial) {
            for (int i = i0; i < n_logits; ++ i) {
                probs_id[i].first = -INFINITY;
            }
        }
        for (int i = vocab.token_beg; i < i1; i++) {
            sum_ts += probs_id[i].first;
            if  (probs_id[i].first > max_ts) {
                max_ts = probs_id[i].first;
@ -1879,7 +1892,7 @@ static whisper_token_data whisper_sample_best(
        // if the probability sum of all timestamp tokens is higher than the max probability of the text tokens - sample a
        // timestamp token
-        if (sum_ts > max_tx) {
+        if (sum_ts > max_tx || force_timestamp) {
            // ref: https://github.com/openai/whisper/blob/0b1ba3d46ebf7fe6f953acfd8cad62a4f851b49f/whisper/decoding.py#L430-L438
            for (int i = 0; i < vocab.token_beg; i++) {
                probs_id[i].first = -INFINITY;
@ -1921,39 +1934,6 @@ static whisper_token_data whisper_sample_best(
    return result;
 }
 // samples only from the timestamps tokens
 static whisper_vocab::id whisper_sample_timestamp(
        const whisper_vocab & vocab,
        const float * probs) {
    int n_logits = vocab.id_to_token.size();
    std::vector<std::pair<double, whisper_vocab::id>> probs_id;
    probs_id.reserve(n_logits);
    for (int i = vocab.token_beg + 1; i < n_logits; i++) {
        probs_id.push_back(std::make_pair(probs[i], i));
    }
    const int top_k = 10;
    // find the top K tokens
    std::partial_sort(
            probs_id.begin(),
            probs_id.begin() + top_k, probs_id.end(),
            [](const std::pair<double, whisper_vocab::id> & a, const std::pair<double, whisper_vocab::id> & b) {
        return a.first > b.first;
    });
    probs_id.resize(top_k);
    //printf("\n");
    //for (int i = 0; i < (int) probs_id.size(); i++) {
    //    printf("%d: '%s' %f, %d\n", i, vocab.id_to_token.at(probs_id[i].second).c_str(), probs_id[i].first, probs_id[i].second);
    //}
    return probs_id[0].second;
 }
 //  500 -> 00:05.000
 // 6000 -> 01:00.000
 static std::string to_timestamp(int64_t t, bool comma = false) {
@ -2284,19 +2264,17 @@ int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, i
 struct whisper_token_data whisper_sample_best(struct whisper_context * ctx) {
    const int64_t t_start_sample_us = ggml_time_us();
-    // TODO: simplify
+    const auto res = whisper_sample_best(ctx->vocab, ctx->probs.data() + (ctx->probs.size() - ctx->vocab.n_vocab), false, false);
    auto res = whisper_sample_best(ctx->vocab, ctx->probs.data() + (ctx->probs.size() - ctx->vocab.n_vocab));
    ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
    return res;
 }
-whisper_token whisper_sample_timestamp(struct whisper_context * ctx) {
+struct whisper_token_data whisper_sample_timestamp(struct whisper_context * ctx, bool is_initial) {
    const int64_t t_start_sample_us = ggml_time_us();
-    // TODO: simplify
+    const auto res = whisper_sample_best(ctx->vocab, ctx->probs.data() + (ctx->probs.size() - ctx->vocab.n_vocab), true, is_initial);
    auto res = whisper_sample_timestamp(ctx->vocab, ctx->probs.data() + (ctx->probs.size() - ctx->vocab.n_vocab));
    ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
@ -2360,11 +2338,11 @@ whisper_token whisper_token_beg(struct whisper_context * ctx) {
    return ctx->vocab.token_beg;
 }
-whisper_token whisper_token_translate() {
+whisper_token whisper_token_translate(void) {
    return whisper_vocab::token_translate;
 }
-whisper_token whisper_token_transcribe() {
+whisper_token whisper_token_transcribe(void) {
    return whisper_vocab::token_transcribe;
 }
@ -2386,6 +2364,21 @@ void whisper_reset_timings(struct whisper_context * ctx) {
    ctx->t_decode_us = 0;
 }
 const char * whisper_print_system_info(void) {
    static std::string s;
    s  = "";
    s += "AVX = "       + std::to_string(ggml_cpu_has_avx())       + " | ";
    s += "AVX2 = "      + std::to_string(ggml_cpu_has_avx2())      + " | ";
    s += "AVX512 = "    + std::to_string(ggml_cpu_has_avx512())    + " | ";
    s += "NEON = "      + std::to_string(ggml_cpu_has_neon())      + " | ";
    s += "FP16_VA = "   + std::to_string(ggml_cpu_has_fp16_va())   + " | ";
    s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
    s += "BLAS = "      + std::to_string(ggml_cpu_has_blas())      + " | ";
    return s.c_str();
 }
 ////////////////////////////////////////////////////////////////////////////
 struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
@ -2436,6 +2429,9 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
                    /*.new_segment_callback           =*/ nullptr,
                    /*.new_segment_callback_user_data =*/ nullptr,
                    /*.encoder_begin_callback           =*/ nullptr,
                    /*.encoder_begin_callback_user_data =*/ nullptr,
                };
            } break;
        case WHISPER_SAMPLING_BEAM_SEARCH:
@ -2482,6 +2478,9 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
                    /*.new_segment_callback           =*/ nullptr,
                    /*.new_segment_callback_user_data =*/ nullptr,
                    /*.encoder_begin_callback           =*/ nullptr,
                    /*.encoder_begin_callback_user_data =*/ nullptr,
                };
            } break;
    }
@ -2644,6 +2643,13 @@ int whisper_full(
            break;
        }
        if (params.encoder_begin_callback) {
            if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
                fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
                break;
            }
        }
        // encode audio features starting at offset seek
        if (whisper_encode(ctx, seek, params.n_threads) != 0) {
            fprintf(stderr, "%s: failed to encode\n", __func__);
@ -2666,7 +2672,6 @@ int whisper_full(
        prompt.insert(prompt.end(), prompt_init.begin(), prompt_init.end());
        bool done = false;
        int seek_delta = 100*WHISPER_CHUNK_SIZE;
        // print the prompt
@ -2680,7 +2685,9 @@ int whisper_full(
        int result_len = 0;
        tokens_cur.clear();
-        for (int i = 0; i < whisper_n_text_ctx(ctx)/2 - 4; ++i) {
+        bool failed = false;
        for (int i = 0, n_max = whisper_n_text_ctx(ctx)/2 - 4; i < n_max; ++i) {
            if (whisper_decode(ctx, prompt.data(), prompt.size(), n_past, params.n_threads) != 0) {
                fprintf(stderr, "%s: failed to decode\n", __func__);
                return 8;
@ -2697,15 +2704,19 @@ int whisper_full(
            // feel free to experiment!
            //
            {
-                auto token = whisper_sample_best(ctx);
+                const auto token = (i == 0) ? whisper_sample_timestamp(ctx, true) : whisper_sample_best(ctx);
                if (i == 0) {
                    token.tid = whisper_token_beg(ctx);
                }
                // timestamp token - update sliding window
                if (token.id > whisper_token_beg(ctx)) {
-                    seek_delta = 2*(token.id - whisper_token_beg(ctx));
+                    const int seek_delta_new = 2*(token.id - whisper_token_beg(ctx));
                    // do not allow to go back in time
                    if (seek_delta != 100*WHISPER_CHUNK_SIZE &&
                        seek_delta > seek_delta_new && result_len < i) {
                        break;
                    }
                    seek_delta = seek_delta_new;
                    result_len = i + 1;
                }
@ -2724,8 +2735,8 @@ int whisper_full(
                        if (seek + seek_delta + 100 >= seek_end) {
                            result_len = i + 1;
                        } else {
-                            // TODO: figure out how to resolve this
+                            failed = true;
-                            fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
+                            break;
                        }
                    }
@ -2744,11 +2755,21 @@ int whisper_full(
                }
            }
-            if (done) {
+            // sometimes, the decoding can get stuck in a repetition loop
            // this is a simple strategy to avoid such cases - we simply flag the decoding as failed and advance
            // the sliding window by 1 second
            if (i == n_max - 1 && (result_len == 0 || seek_delta < 100*WHISPER_CHUNK_SIZE/2)) {
                failed = true;
                break;
            }
        }
        if (failed) {
            fprintf(stderr, "\n%s: failed to generate timestamp token - using fallback strategy\n\n", __func__);
            seek += 100;
            continue;
        }
        // shrink down to result_len
        tokens_cur.resize(result_len);
@ -2863,7 +2884,7 @@ int whisper_full_parallel(
        struct whisper_full_params params,
        const float * samples,
        int n_samples,
-        const int n_processors) {
+        int n_processors) {
    if (n_processors == 1) {
        return whisper_full(ctx, params, samples, n_samples);
    }
@ -2921,10 +2942,6 @@ int whisper_full_parallel(
                model.memory_cross_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
                model.memory_cross_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
            }
            const size_t memory_size =
                ggml_nbytes(model.memory_k)       + ggml_nbytes(model.memory_v) +
                ggml_nbytes(model.memory_cross_k) + ggml_nbytes(model.memory_cross_v);
        }
    }
@ -3044,21 +3061,6 @@ float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int
    return ctx->result_all[i_segment].tokens[i_token].p;
 }
 const char * whisper_print_system_info() {
    static std::string s;
    s  = "";
    s += "AVX = "       + std::to_string(ggml_cpu_has_avx())       + " | ";
    s += "AVX2 = "      + std::to_string(ggml_cpu_has_avx2())      + " | ";
    s += "AVX512 = "    + std::to_string(ggml_cpu_has_avx512())    + " | ";
    s += "NEON = "      + std::to_string(ggml_cpu_has_neon())      + " | ";
    s += "FP16_VA = "   + std::to_string(ggml_cpu_has_fp16_va())   + " | ";
    s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
    s += "BLAS = "      + std::to_string(ggml_cpu_has_blas())      + " | ";
    return s.c_str();
 }
 // =================================================================================================
 //
@ -3145,9 +3147,6 @@ static void whisper_exp_compute_token_level_timestamps(
    const int64_t t0 = segment.t0;
    const int64_t t1 = segment.t1;
    const int s0 = timestamp_to_sample(t0, n_samples);
    const int s1 = timestamp_to_sample(t1, n_samples);
    const int n = tokens.size();
    if (n == 0) {
--- a/whisper.h
+++ b/whisper.h
@ -72,16 +72,16 @@ extern "C" {
        whisper_token id;  // token id
        whisper_token tid; // forced timestamp token id
-        float p;     // probability of the token
+        float p;           // probability of the token
-        float pt;    // probability of the timestamp token
+        float pt;          // probability of the timestamp token
-        float ptsum; // sum of probabilities of all timestamp tokens
+        float ptsum;       // sum of probabilities of all timestamp tokens
        // token-level timestamp data
        // do not use if you haven't computed token-level timestamps
-        int64_t t0; // start time of the token
+        int64_t t0;        // start time of the token
-        int64_t t1; //   end time of the token
+        int64_t t1;        //   end time of the token
-        float vlen; // voice length of the token
+        float vlen;        // voice length of the token
    } whisper_token_data;
    // Allocates all memory needed for the model and loads the model from the given file.
@ -96,9 +96,9 @@ extern "C" {
    // Returns 0 on success
    WHISPER_API int whisper_pcm_to_mel(
            struct whisper_context * ctx,
-            const float * samples,
+                       const float * samples,
-            int n_samples,
+                               int   n_samples,
-            int n_threads);
+                               int   n_threads);
    // This can be used to set a custom log mel spectrogram inside the provided whisper context.
    // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
@ -106,9 +106,9 @@ extern "C" {
    // Returns 0 on success
    WHISPER_API int whisper_set_mel(
            struct whisper_context * ctx,
-            const float * data,
+                       const float * data,
-            int n_len,
+                               int   n_len,
-            int n_mel);
+                               int   n_mel);
    // Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
    // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
@ -116,8 +116,8 @@ extern "C" {
    // Returns 0 on success
    WHISPER_API int whisper_encode(
            struct whisper_context * ctx,
-            int offset,
+                               int   offset,
-            int n_threads);
+                               int   n_threads);
    // Run the Whisper decoder to obtain the logits and probabilities for the next token.
    // Make sure to call whisper_encode() first.
@ -126,10 +126,10 @@ extern "C" {
    // Returns 0 on success
    WHISPER_API int whisper_decode(
            struct whisper_context * ctx,
-            const whisper_token * tokens,
+               const whisper_token * tokens,
-            int n_tokens,
+                               int   n_tokens,
-            int n_past,
+                               int   n_past,
-            int n_threads);
+                               int   n_threads);
    // Token sampling methods.
    // These are provided for convenience and can be used after each call to whisper_decode().
@ -137,7 +137,7 @@ extern "C" {
    // whisper_sample_best() returns the token with the highest probability
    // whisper_sample_timestamp() returns the most probable timestamp token
    WHISPER_API whisper_token_data whisper_sample_best(struct whisper_context * ctx);
-    WHISPER_API whisper_token whisper_sample_timestamp(struct whisper_context * ctx);
+    WHISPER_API whisper_token_data whisper_sample_timestamp(struct whisper_context * ctx, bool is_initial);
    // Return the id of the specified language, returns -1 if not found
    WHISPER_API int whisper_lang_id(const char * lang);
@ -162,13 +162,16 @@ extern "C" {
    WHISPER_API whisper_token whisper_token_beg (struct whisper_context * ctx);
    // Task tokens
-    WHISPER_API whisper_token whisper_token_translate ();
+    WHISPER_API whisper_token whisper_token_translate (void);
-    WHISPER_API whisper_token whisper_token_transcribe();
+    WHISPER_API whisper_token whisper_token_transcribe(void);
    // Performance information
    WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
    WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
    // Print system information
    WHISPER_API const char * whisper_print_system_info(void);
    ////////////////////////////////////////////////////////////////////////////
    // Available sampling strategies
@ -182,17 +185,25 @@ extern "C" {
    // Use the whisper_full_...() functions to obtain the text segments
    typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, int n_new, void * user_data);
    // Encoder begin callback
    // If not NULL, called before the encoder starts
    // If it returns false, the computation is aborted
    typedef bool (*whisper_encoder_begin_callback)(struct whisper_context * ctx, void * user_data);
    // Parameters for the whisper_full() function
    // If you chnage the order or add new parameters, make sure to update the default values in whisper.cpp:
    // whisper_full_default_params()
    struct whisper_full_params {
        enum whisper_sampling_strategy strategy;
        int n_threads;
        int n_max_text_ctx;
-        int offset_ms;      // start offset in ms
+        int offset_ms;          // start offset in ms
-        int duration_ms;    // audio duration to process in ms
+        int duration_ms;        // audio duration to process in ms
        bool translate;
        bool no_context;
-        bool single_segment; // force single segment output (useful for streaming)
+        bool single_segment;    // force single segment output (useful for streaming)
        bool print_special;
        bool print_progress;
        bool print_realtime;
@ -206,8 +217,8 @@ extern "C" {
        int   max_tokens;       // max tokens per segment (0 = no limit)
        // [EXPERIMENTAL] speed-up techniques
-        bool speed_up;  // speed-up the audio by 2x using Phase Vocoder
+        bool speed_up;          // speed-up the audio by 2x using Phase Vocoder
-        int  audio_ctx; // overwrite the audio context size (0 = use default)
+        int  audio_ctx;         // overwrite the audio context size (0 = use default)
        // tokens to provide the whisper model as initial prompt
        // these are prepended to any existing text context from a previous call
@ -228,6 +239,9 @@ extern "C" {
        whisper_new_segment_callback new_segment_callback;
        void * new_segment_callback_user_data;
        whisper_encoder_begin_callback encoder_begin_callback;
        void * encoder_begin_callback_user_data;
    };
    WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
@ -235,20 +249,20 @@ extern "C" {
    // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
    // Uses the specified decoding strategy to obtain the text.
    WHISPER_API int whisper_full(
-            struct whisper_context * ctx,
+                struct whisper_context * ctx,
-            struct whisper_full_params params,
+            struct whisper_full_params   params,
-            const float * samples,
+                           const float * samples,
-            int n_samples);
+                                   int   n_samples);
    // Split the input audio in chunks and process each chunk separately using whisper_full()
    // It seems this approach can offer some speedup in some cases.
    // However, the transcription accuracy can be worse at the beginning and end of each chunk.
    WHISPER_API int whisper_full_parallel(
-            struct whisper_context * ctx,
+                struct whisper_context * ctx,
-            struct whisper_full_params params,
+            struct whisper_full_params   params,
-            const float * samples,
+                           const float * samples,
-            int n_samples,
+                                   int   n_samples,
-            const int n_processors);
+                                   int   n_processors);
    // Number of generated text segments.
    // A segment can be a few words, a sentence, or even a paragraph.
@ -275,9 +289,6 @@ extern "C" {
    // Get the probability of the specified token in the specified segment.
    WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);
    // Print system information
    WHISPER_API const char * whisper_print_system_info();
 #ifdef __cplusplus
 }
 #endif
		`@ -0,0 +1 @@`
							My fellow Americans, this day has brought terrible news and great sadness to our country. At 9 o'clock this morning, Mission Control in Houston lost contact with our space shuttle, Columbia. A short time later, debris was seen falling from the skies above Texas. The Colombians lost. There are no survivors. On board was a crew of seven. Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon, a colonel in the Israeli Air Force. These men and women assumed great risk in the service to all humanity. In an age when spaceflight has come to seem almost routine, it is easy to overlook the dangers of travel by rocket and the difficulties of navigating the fierce outer atmosphere of the Earth. These astronauts knew the dangers, and they faced them willingly, knowing they had a high and noble purpose in life. Because of their courage and daring and idealism, we will miss them all the more. All Americans today are thinking as well of the families of these men and women who have been given this sudden shock and grief. You're not alone. Our entire nation grieves with you. And those you love will always have the respect and gratitude of this country. The cause in which they died will continue. Mankind is led into the darkness beyond our world by the inspiration of discovery and the longing to understand. Our journey into space will go on. In the skies today, we saw destruction and tragedy. Yet farther than we can see, there is comfort and hope. In the words of the prophet Isaiah, "Lift your eyes and look to the heavens. Who created all these? He who brings out the starry hosts one by one and calls them each by name." Because of His great power and mighty strength, not one of them is missing. The same Creator who names the stars also knows the names of the seven souls we mourn today. The crew of the shuttle Columbia did not return safely to Earth, yet we can pray that all are safely home. May God bless the grieving families. And may God continue to bless America. [Silence]
		`@ -0,0 +1 @@`
							Henry F. Phillips from Wikipedia, the free encyclopedia at en.wikipedia.org. Henry F. Phillips from Wikipedia, the free encyclopedia. Henry F. Phillips 1890-1958, a U.S. businessman from Portland, Oregon, has the honor of having the Phillips head screw and screwdriver named after him. The importance of the cross head screw design lies in its self-centering property, useful on automated production lines that use powered screwdrivers. Phillips' major contribution was in driving the cross head concept forward to the point where it was adopted by screw makers and automobile companies. Although he received patents for the design in 1936, U.S. Patent #2,046,343, U.S. Patents #2,046,837 to #2,046,840, it was so widely copied that by 1949 Phillips lost his patent. The American Screw Company was responsible for devising a means of manufacturing the screw, and successfully patented and licensed their method. Other screw makers of the 1930s dismissed the Phillips concept since it calls for a relatively complex recessed socket shape in the head of the screw, as distinct from the simple milled slot of a slotted type screw. The Phillips Screw Company and the American Screw Company went on to devise the Pawsadrive screw, which differs from the Phillips in that it is designed to accommodate greater torque than the Phillips. An image accompanied this article, captioned "Phillips Screw Head." The following is an info box which accompanies this article. Info box, part of the series on screw drive types. Slotted, commonly erroneously flat head. Phillips, cross head. Pawsadrive, super drive. Torques. Hex, Allen. Robertson. Tri-wing. Torx set. Spanner head. Triple square, XZN. Others, poly drive, spline drive, double hex. Many images accompanied this info box. This page was last modified on the 9th of April, 2008, at 1704. All text is available under the terms of the GNU Free Documentation License. See copyrights for details. Wikipedia is a registered trademark of the Wikimedia Foundation Incorporated, a U.S. registered 501(c)(3) tax-deductible nonprofit charity. This sound file and all text in the article are licensed under the GNU Free Documentation License, available at www.gnu.org/copyleft/fdl.html.
		`@ -0,0 +1 @@`
							This is the Micro Machine Man presenting the most midget miniature motorcade of Micro Machines. Each one has dramatic details, terrific trim, precision paint jobs, plus incredible Micro Machine Pocket Playsets. There's a police station, fire station, restaurant, service station, and more. Perfect pocket portables to take anyplace. And there are many miniature playsets to play with, and each one comes with its own special edition Micro Machine vehicle and fun, fantastic features that miraculously move. Raise the boat lift at the airport marina, man the gun turret at the army base, clean your car at the car wash, raise the toll bridge. And these playsets fit together to form a Micro Machine world. Micro Machine Pocket Playsets, so tremendously tiny, so perfectly precise, so dazzlingly detailed, you'll want to pocket them all. Micro Machines are Micro Machine Pocket Playsets sold separately from Galoob. The smaller they are, the better they are.
		`@ -0,0 +1 @@`
							Hola, como están todos? Mi nombre es Julián Virrueta Mendoza y en este podcast les vengo a hablar sobre la contaminación del agua. Bueno, empezaré por decir que el ser humano no está midiendo las consecuencias de sus actos. No hay duda que uno de los mayores problemas a los que se enfrentan muchas poblaciones actualmente es la contaminación del agua. Principalmente porque como bien sabemos el agua prácticamente es fundamental para la vida, por lo que la contaminación puede ser algo muy negativo para el desarrollo tanto económico como social de los pueblos o de las poblaciones próximas en ese lugar contaminado. Los comienzos de la contaminación, como lo definen muchos expertos en la materia, la contaminación del agua es causada por las actividades humanas. Es un fenómeno ambiental de importancia, el cual se comienza a producir desde los primeros intentos de industrialización para transformarse luego en un problema tan habitual como generalizado. Generalmente la contaminación del agua se produce a través de la introducción directa o indirecta en los acuíferos o caos de agua, ríos, mares, lagos, océanos, etc. o de diversas sustancias que pueden ser consideradas como contaminantes. Pero existen dos formas principales de contaminación del agua. Una de ellas tiene que ver con la contaminación natural del agua que se corresponde con el ciclo natural de esta durante el que puede entrar en contacto con ciertos constituyentes contaminantes como sustancias minerales y orgánicas disueltas o en suspensión que se vierten en la corteza terrestre, la atmósfera y en las aguas. Pero todo esto se puede contradecir si el ser humano comía sus consecuencias, si no tirara basura a los lagos, a los ríos, no tirara botes de aceite, no contaminara. Bueno amigos, yo los invito a que no contaminen el agua y que sepan cuidar la naturaleza. Los saluda su buen amigo y compañero Julián Virreta. Nos vemos. ¡Claro!