|
|
@ -2357,15 +2357,15 @@ struct whisper_token_data whisper_sample_timestamp(struct whisper_context * ctx,
|
|
|
|
return res;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int whisper_tokenize(struct whisper_context * ctx, const char * text, whisper_token * tokens, std::size_t n_max_tokens) {
|
|
|
|
int whisper_tokenize(struct whisper_context * ctx, const char * text, whisper_token * tokens, int n_max_tokens) {
|
|
|
|
const auto res = tokenize(ctx->vocab, text);
|
|
|
|
const auto res = tokenize(ctx->vocab, text);
|
|
|
|
|
|
|
|
|
|
|
|
if (res.size() > n_max_tokens) {
|
|
|
|
if (res.size() > n_max_tokens) {
|
|
|
|
fprintf(stderr, "%s: too many resulting tokens: %d (max %zu)\n", __func__, (int) res.size(), n_max_tokens);
|
|
|
|
fprintf(stderr, "%s: too many resulting tokens: %d (max %d)\n", __func__, (int) res.size(), n_max_tokens);
|
|
|
|
return -1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < res.size(); i++) {
|
|
|
|
for (int i = 0; i < (int) res.size(); i++) {
|
|
|
|
tokens[i] = res[i];
|
|
|
|
tokens[i] = res[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -2464,7 +2464,7 @@ int whisper_lang_auto_detect(
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
{
|
|
|
|
for (std::size_t i = 0; i < probs_id.size(); i++) {
|
|
|
|
for (int i = 0; i < (int) probs_id.size(); i++) {
|
|
|
|
if (lang_probs) {
|
|
|
|
if (lang_probs) {
|
|
|
|
lang_probs[probs_id[i].second] = probs_id[i].first;
|
|
|
|
lang_probs[probs_id[i].second] = probs_id[i].first;
|
|
|
|
}
|
|
|
|
}
|
|
|
|