|
|
|
@ -2846,6 +2846,26 @@ static void whisper_exp_compute_token_level_timestamps(
|
|
|
|
|
float thold_pt,
|
|
|
|
|
float thold_ptsum);
|
|
|
|
|
|
|
|
|
|
// trim from start (in place)
|
|
|
|
|
static inline void ltrim(std::string &s) {
|
|
|
|
|
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
|
|
|
|
|
return !std::isspace(ch);
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// trim from end (in place)
|
|
|
|
|
static inline void rtrim(std::string &s) {
|
|
|
|
|
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) {
|
|
|
|
|
return !std::isspace(ch);
|
|
|
|
|
}).base(), s.end());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// trim from both ends (in place)
|
|
|
|
|
static inline void trim(std::string &s) {
|
|
|
|
|
rtrim(s);
|
|
|
|
|
ltrim(s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// wrap the last segment to max_len characters
|
|
|
|
|
// returns the number of new segments
|
|
|
|
|
static int whisper_wrap_segment(struct whisper_context & ctx, int max_len) {
|
|
|
|
@ -2869,7 +2889,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, int max_len) {
|
|
|
|
|
|
|
|
|
|
if (acc + cur > max_len && i > 0 && s.substr(0, 1) == " ") {
|
|
|
|
|
// split here
|
|
|
|
|
::trim(text);
|
|
|
|
|
trim(text);
|
|
|
|
|
ctx.result_all.back().text = std::move(text);
|
|
|
|
|
ctx.result_all.back().t1 = token.t0;
|
|
|
|
|
ctx.result_all.back().tokens.resize(i);
|
|
|
|
@ -2897,7 +2917,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, int max_len) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
::trim(text);
|
|
|
|
|
trim(text);
|
|
|
|
|
ctx.result_all.back().text = std::move(text);
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|