|
|
|
@ -84,6 +84,7 @@ struct whisper_params {
|
|
|
|
|
std::string model = "models/ggml-base.en.bin";
|
|
|
|
|
|
|
|
|
|
std::vector<std::string> fname_inp = {};
|
|
|
|
|
std::vector<std::string> fname_outp = {};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
|
|
|
@ -121,6 +122,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
|
|
|
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
|
|
|
|
|
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
|
|
|
|
|
else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
|
|
|
|
|
else if (arg == "-of" || arg == "--output-file") { params.fname_outp.emplace_back(argv[++i]); }
|
|
|
|
|
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
|
|
|
|
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
|
|
|
|
|
else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
|
|
|
|
@ -165,6 +167,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
|
|
|
fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
|
|
|
|
|
fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
|
|
|
|
|
fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false");
|
|
|
|
|
fprintf(stderr, " -of FNAME, --output-file FNAME [%-7s] output file path (without file extension)\n", "");
|
|
|
|
|
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
|
|
|
|
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
|
|
|
|
|
fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
|
|
|
|
@ -514,6 +517,7 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
|
|
|
|
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
|
|
|
|
const auto fname_inp = params.fname_inp[f];
|
|
|
|
|
const auto fname_outp = f < params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];
|
|
|
|
|
|
|
|
|
|
std::vector<float> pcmf32; // mono-channel F32 PCM
|
|
|
|
|
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
|
|
|
@ -692,31 +696,31 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
|
|
|
|
// output to text file
|
|
|
|
|
if (params.output_txt) {
|
|
|
|
|
const auto fname_txt = fname_inp + ".txt";
|
|
|
|
|
const auto fname_txt = fname_outp + ".txt";
|
|
|
|
|
output_txt(ctx, fname_txt.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// output to VTT file
|
|
|
|
|
if (params.output_vtt) {
|
|
|
|
|
const auto fname_vtt = fname_inp + ".vtt";
|
|
|
|
|
const auto fname_vtt = fname_outp + ".vtt";
|
|
|
|
|
output_vtt(ctx, fname_vtt.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// output to SRT file
|
|
|
|
|
if (params.output_srt) {
|
|
|
|
|
const auto fname_srt = fname_inp + ".srt";
|
|
|
|
|
const auto fname_srt = fname_outp + ".srt";
|
|
|
|
|
output_srt(ctx, fname_srt.c_str(), params);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// output to WTS file
|
|
|
|
|
if (params.output_wts) {
|
|
|
|
|
const auto fname_wts = fname_inp + ".wts";
|
|
|
|
|
const auto fname_wts = fname_outp + ".wts";
|
|
|
|
|
output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// output to CSV file
|
|
|
|
|
if (params.output_csv) {
|
|
|
|
|
const auto fname_csv = fname_inp + ".csv";
|
|
|
|
|
const auto fname_csv = fname_outp + ".csv";
|
|
|
|
|
output_csv(ctx, fname_csv.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|