diff --git a/README.md b/README.md index 5f02706..fd141b8 100644 --- a/README.md +++ b/README.md @@ -466,5 +466,6 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch | [examples/whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim | | [examples/generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture | | [examples/livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) | +| [examples/yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) | ## [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) diff --git a/examples/yt-wsp.sh b/examples/yt-wsp.sh new file mode 100755 index 0000000..0e41b1c --- /dev/null +++ b/examples/yt-wsp.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash + +# Small shell script to more easily automatically download and transcribe live stream VODs. +# This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp +# Use `./transcribe-vod help` to print help info. + +# MIT License + +# Copyright (c) 2022 Daniils Petrovs + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set -Eeuo pipefail + +# You can find how to download models in the OG repo: https://github.com/ggerganov/whisper.cpp/#usage +MODEL_PATH="${MODEL_PATH:-models/ggml-base.en.bin}" # Set to a multilingual model if you want to translate from foreign lang to en +WHISPER_EXECUTABLE="${WHISPER_EXECUTABLE:-whisper}" # Where to find the whisper.cpp executable +WHISPER_LANG="${WHISPER_LANG:-en}" # Set to desired lang to translate from + +msg() { + echo >&2 -e "${1-}" +} + +cleanup() { + msg "Cleaning up..." + rm -rf "${temp_dir}" "vod-resampled.wav" "vod-resampled.wav.srt" +} + +print_help() { + echo "Usage: ./transcribe-vod " + echo "See configurable env variables in the script" + echo "This will produce an MP4 muxed file called res.mp4 in the working directory" + echo "Requirements: ffmpeg yt-dlp whisper" + echo "Whisper needs to be built into the main binary with make, then you can rename it to something like 'whisper' and add it to your PATH for convenience." + echo "E.g. in the root of Whisper.cpp, run: 'make && cp ./main /usr/local/bin/whisper'" +} + +check_requirements() { + if ! command -v ffmpeg &>/dev/null; then + echo "ffmpeg is required (https://ffmpeg.org)." + exit 1 + fi + + if ! command -v yt-dlp &>/dev/null; then + echo "yt-dlp is required (https://github.com/yt-dlp/yt-dlp)." + exit 1 + fi + + if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then + WHISPER_EXECUTABLE="./main" + if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then + echo "Whisper is required (https://github.com/ggerganov/whisper.cpp)." + exit 1 + fi + fi +} + +if [[ $# -lt 1 ]]; then + print_help + exit 1 +fi + +if [[ "$1" == "help" ]]; then + print_help + exit 0 +fi + +temp_dir="tmp" +source_url="$1" + +check_requirements + +msg "Downloading VOD..." + +# Optionally add --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] for members only VODs +yt-dlp \ + -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" \ + --embed-thumbnail \ + --embed-chapters \ + --xattrs \ + "${source_url}" -o "${temp_dir}/vod.mp4" + +msg "Extracting audio and resampling..." + +ffmpeg -i "${temp_dir}/vod.mp4" \ + -hide_banner \ + -loglevel error \ + -ar 16000 \ + -ac 1 \ + -c:a \ + pcm_s16le -y "vod-resampled.wav" + +msg "Transcribing to subtitle file..." +msg "Whisper specified at: ${WHISPER_EXECUTABLE}" + +$WHISPER_EXECUTABLE \ + -m "${MODEL_PATH}" \ + -l "${WHISPER_LANG}" \ + -f "vod-resampled.wav" \ + -t 8 \ + -osrt \ + --translate + +msg "Embedding subtitle track..." + +ffmpeg -i "${temp_dir}/vod.mp4" \ + -hide_banner \ + -loglevel error \ + -i "vod-resampled.wav.srt" \ + -c copy \ + -c:s mov_text \ + -y res.mp4 + +cleanup + +msg "Done! Your finished file is ready: res.mp4"