parent
dd1f4dfbab
commit
8ca553add4
@ -1,6 +1,15 @@
|
||||
#
|
||||
# whisper
|
||||
|
||||
add_library(whisper-cpp SHARED
|
||||
whisper.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(whisper-cpp PRIVATE
|
||||
ggml
|
||||
)
|
||||
|
||||
set(TEST_TARGET whisper)
|
||||
add_executable(${TEST_TARGET} main.cpp)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE whisper-cpp)
|
||||
target_include_directories(${TEST_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,147 @@
|
||||
#ifndef WHISPER_H
|
||||
#define WHISPER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef WHISPER_SHARED
|
||||
# ifdef _WIN32
|
||||
# ifdef WHISPER_BUILD
|
||||
# define WHISPER_API __declspec(dllexport)
|
||||
# else
|
||||
# define WHISPER_API __declspec(dllimport)
|
||||
# endif
|
||||
# else
|
||||
# define WHISPER_API __attribute__ ((visibility ("default")))
|
||||
# endif
|
||||
#else
|
||||
# define WHISPER_API
|
||||
#endif
|
||||
|
||||
#define WHISPER_SAMPLE_RATE 16000
|
||||
#define WHISPER_N_FFT 400
|
||||
#define WHISPER_N_MEL 80
|
||||
#define WHISPER_HOP_LENGTH 160
|
||||
#define WHISPER_CHUNK_SIZE 30
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// C interface
|
||||
//
|
||||
|
||||
// TODO: documentation will come soon
|
||||
|
||||
struct whisper_context;
|
||||
|
||||
typedef int whisper_token;
|
||||
|
||||
WHISPER_API struct whisper_context * whisper_init(const char * path_model);
|
||||
WHISPER_API void whisper_free(struct whisper_context * ctx);
|
||||
|
||||
WHISPER_API int whisper_pcm_to_mel(
|
||||
struct whisper_context * ctx,
|
||||
const float * samples,
|
||||
int n_samples,
|
||||
int n_threads);
|
||||
|
||||
// n_mel must be 80
|
||||
WHISPER_API int whisper_set_mel(
|
||||
struct whisper_context * ctx,
|
||||
const float * data,
|
||||
int n_len,
|
||||
int n_mel);
|
||||
|
||||
WHISPER_API int whisper_encode(
|
||||
struct whisper_context * ctx,
|
||||
int offset,
|
||||
int n_threads);
|
||||
|
||||
WHISPER_API int whisper_decode(
|
||||
struct whisper_context * ctx,
|
||||
const whisper_token * tokens,
|
||||
int n_tokens,
|
||||
int n_past,
|
||||
int n_threads);
|
||||
|
||||
WHISPER_API whisper_token whisper_sample_best(struct whisper_context * ctx, bool need_timestamp);
|
||||
WHISPER_API whisper_token whisper_sample_timestamp(struct whisper_context * ctx);
|
||||
|
||||
// return the id of the specified language, returns -1 if not found
|
||||
WHISPER_API int whisper_lang_id(const char * lang);
|
||||
|
||||
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
|
||||
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
|
||||
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
|
||||
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
|
||||
WHISPER_API float * whisper_get_probs (struct whisper_context * ctx);
|
||||
|
||||
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
||||
|
||||
WHISPER_API whisper_token whisper_token_eot (struct whisper_context * ctx);
|
||||
WHISPER_API whisper_token whisper_token_sot (struct whisper_context * ctx);
|
||||
WHISPER_API whisper_token whisper_token_prev(struct whisper_context * ctx);
|
||||
WHISPER_API whisper_token whisper_token_solm(struct whisper_context * ctx);
|
||||
WHISPER_API whisper_token whisper_token_not (struct whisper_context * ctx);
|
||||
WHISPER_API whisper_token whisper_token_beg (struct whisper_context * ctx);
|
||||
|
||||
WHISPER_API whisper_token whisper_token_translate ();
|
||||
WHISPER_API whisper_token whisper_token_transcribe();
|
||||
|
||||
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum whisper_decode_strategy {
|
||||
WHISPER_DECODE_GREEDY,
|
||||
WHISPER_DECODE_BEAM_SEARCH,
|
||||
};
|
||||
|
||||
struct whisper_full_params {
|
||||
enum whisper_decode_strategy strategy;
|
||||
|
||||
int n_threads;
|
||||
|
||||
bool translate;
|
||||
bool print_special_tokens;
|
||||
bool print_progress;
|
||||
bool print_realtime;
|
||||
bool print_timestamps;
|
||||
|
||||
const char * language;
|
||||
|
||||
union {
|
||||
struct {
|
||||
int n_past;
|
||||
} greedy;
|
||||
|
||||
struct {
|
||||
int n_past;
|
||||
int beam_width;
|
||||
int n_best;
|
||||
} beam_search;
|
||||
};
|
||||
};
|
||||
|
||||
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy);
|
||||
|
||||
// full whisper run - encode + decode
|
||||
WHISPER_API int whisper_full(
|
||||
struct whisper_context * ctx,
|
||||
struct whisper_full_params params,
|
||||
const float * samples,
|
||||
int n_samples);
|
||||
|
||||
WHISPER_API int whisper_full_n_segments(struct whisper_context * ctx);
|
||||
|
||||
WHISPER_API int64_t whisper_full_get_segment_t0(struct whisper_context * ctx, int i_segment);
|
||||
WHISPER_API int64_t whisper_full_get_segment_t1(struct whisper_context * ctx, int i_segment);
|
||||
|
||||
WHISPER_API const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
Reference in new issue