sync : whisper.cpp

pull/12/head
Georgi Gerganov 2 years ago
parent 7b70c5a561
commit 270829aa9f
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

@ -271,7 +271,7 @@ byte_decoder = {v:k for k, v in byte_encoder.items()}
fout.write(struct.pack("i", len(tokens))) fout.write(struct.pack("i", len(tokens)))
for key in tokens: for key in tokens:
text = bytearray([byte_decoder[c] for c in key]).decode('utf-8', errors='replace').encode('utf-8') text = bytearray([byte_decoder[c] for c in key])
fout.write(struct.pack("i", len(text))) fout.write(struct.pack("i", len(text)))
fout.write(text) fout.write(text)

@ -156,11 +156,11 @@ static const std::map<e_model, size_t> MEM_REQ_ENCODE_LAYER = {
}; };
static const std::map<e_model, size_t> MEM_REQ_DECODE = { static const std::map<e_model, size_t> MEM_REQ_DECODE = {
{ MODEL_TINY, 94ull*MB }, { MODEL_TINY, 200ull*MB },
{ MODEL_BASE, 96ull*MB }, { MODEL_BASE, 202ull*MB },
{ MODEL_SMALL, 98ull*MB }, { MODEL_SMALL, 204ull*MB },
{ MODEL_MEDIUM, 100ull*MB }, { MODEL_MEDIUM, 206ull*MB },
{ MODEL_LARGE, 102ull*MB }, { MODEL_LARGE, 208ull*MB },
}; };
static const std::map<e_model, size_t> MEM_REQ_DECODE_LAYER = { static const std::map<e_model, size_t> MEM_REQ_DECODE_LAYER = {
@ -2314,6 +2314,12 @@ int whisper_full(
struct whisper_full_params params, struct whisper_full_params params,
const float * samples, const float * samples,
int n_samples) { int n_samples) {
// clear old results
auto & result_all = ctx->result_all;
auto & result_cur = ctx->result_cur;
result_all.clear();
// compute log mel spectrogram // compute log mel spectrogram
if (whisper_pcm_to_mel(ctx, samples, n_samples, params.n_threads) != 0) { if (whisper_pcm_to_mel(ctx, samples, n_samples, params.n_threads) != 0) {
fprintf(stderr, "%s: failed to compute log mel spectrogram\n", __func__); fprintf(stderr, "%s: failed to compute log mel spectrogram\n", __func__);
@ -2344,11 +2350,6 @@ int whisper_full(
} }
} }
auto & result_all = ctx->result_all;
auto & result_cur = ctx->result_cur;
result_all.clear();
int progress_prev = 0; int progress_prev = 0;
int progress_step = 5; int progress_step = 5;
@ -2424,7 +2425,7 @@ int whisper_full(
whisper_token id = 0; whisper_token id = 0;
whisper_token tid = whisper_token_beg(ctx); whisper_token tid = whisper_token_beg(ctx);
id = whisper_sample_best(ctx, result_len == 0); id = whisper_sample_best(ctx, result_len == 0 || i > 32);
if (i > 0) { if (i > 0) {
tid = whisper_sample_timestamp(ctx); tid = whisper_sample_timestamp(ctx);
} }
@ -2444,7 +2445,9 @@ int whisper_full(
// end of text token // end of text token
if (id == whisper_token_eot(ctx)) { if (id == whisper_token_eot(ctx)) {
if (result_len == 0) { if (result_len == 0) {
result_len = i + 1; // TODO: figure out how to resolve this
fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
//result_len = i + 1;
} }
break; break;
} }

@ -2,7 +2,7 @@
#if defined(_MSC_VER) || defined(__MINGW32__) #if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW #include <malloc.h> // using malloc.h with MSC/MINGW
#else #elif !defined(__FreeBSD__)
#include <alloca.h> #include <alloca.h>
#endif #endif

Loading…
Cancel
Save