You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
1.5 KiB
64 lines
1.5 KiB
package whisper
|
|
|
|
import (
|
|
"io"
|
|
"time"
|
|
)
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// TYPES
|
|
|
|
// SegmentCallback is the callback function for processing segments in real
|
|
// time. It is called during the Process function
|
|
type SegmentCallback func(Segment)
|
|
|
|
// Model is the interface to a whisper model. Create a new model with the
|
|
// function whisper.New(string)
|
|
type Model interface {
|
|
io.Closer
|
|
|
|
// Return a new speech-to-text context.
|
|
NewContext() (Context, error)
|
|
|
|
// Return all languages supported.
|
|
Languages() []string
|
|
}
|
|
|
|
// Context is the speach recognition context.
|
|
type Context interface {
|
|
SetLanguage(string) error // Set the language to use for speech recognition.
|
|
Language() string // Get language
|
|
SetSpeedup(bool) // Set speedup flag
|
|
|
|
// Process mono audio data and return any errors.
|
|
// If defined, newly generated segments are passed to the
|
|
// callback function during processing.
|
|
Process([]float32, SegmentCallback) error
|
|
|
|
// After process is called, return segments until the end of the stream
|
|
// is reached, when io.EOF is returned.
|
|
NextSegment() (Segment, error)
|
|
}
|
|
|
|
// Segment is the text result of a speech recognition.
|
|
type Segment struct {
|
|
// Segment Number
|
|
Num int
|
|
|
|
// Time beginning and end timestamps for the segment.
|
|
Start, End time.Duration
|
|
|
|
// The text of the segment.
|
|
Text string
|
|
|
|
// The tokens of the segment.
|
|
Tokens []Token
|
|
}
|
|
|
|
// Token is a text or special token
|
|
type Token struct {
|
|
Id int
|
|
Text string
|
|
P float32
|
|
}
|