whisper

package
v0.1.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 5, 2023 License: MIT Imports: 6 Imported by: 0

Documentation

Overview

github.com/jaybinks/goConstmeWhispers Go Bindings for https://github.com/Const-me/Whisper

Index

Constants

View Source
const (
	SsGreedy eSamplingStrategy = iota
	SsBeamSearch
	SsINVALIDARG
)
View Source
const (
	FlagNone            eFullParamsFlags = 0
	FlagTranslate                        = 1 << 0
	FlagNoContext                        = 1 << 1
	FlagSingleSegment                    = 1 << 2
	FlagPrintSpecial                     = 1 << 3
	FlagPrintProgress                    = 1 << 4
	FlagPrintRealtime                    = 1 << 5
	FlagPrintTimestamps                  = 1 << 6
	FlagTokenTimestamps                  = 1 << 7 // Experimental
	FlagSpeedupAudio                     = 1 << 8
)
View Source
const (
	TfNone    eTokenFlags = 0
	TfSpecial             = 1
)
View Source
const (
	RfNone eResultFlags = 0

	// Return individual tokens in addition to the segments
	RfTokens = 1

	// Return timestamps
	RfTimestamps = 2

	// Create a new COM object for the results.
	// Without this flag, the context returns a pointer to the COM object stored in the context.
	// The content of that object is replaced every time you call IContext.getResults method
	RfNewObject = 0x100
)
View Source
const (
	Auto eLanguage = -1 // "af"

	Afrikaans = 0x6661 // "af"
	/// <summary>Albanian</summary>
	Albanian = 0x7173 // "sq"
	/// <summary>Amharic</summary>
	Amharic = 0x6D61 // "am"
	/// <summary>Arabic</summary>
	Arabic = 0x7261 // "ar"
	/// <summary>Armenian</summary>
	Armenian = 0x7968 // "hy"
	/// <summary>Assamese</summary>
	Assamese = 0x7361 // "as"
	/// <summary>Azerbaijani</summary>
	Azerbaijani = 0x7A61 // "az"
	/// <summary>Bashkir</summary>
	Bashkir = 0x6162 // "ba"
	/// <summary>Basque</summary>
	Basque = 0x7565 // "eu"
	/// <summary>Belarusian</summary>
	Belarusian = 0x6562 // "be"
	/// <summary>Bengali</summary>
	Bengali = 0x6E62 // "bn"
	/// <summary>Bosnian</summary>
	Bosnian = 0x7362 // "bs"
	/// <summary>Breton</summary>
	Breton = 0x7262 // "br"
	/// <summary>Bulgarian</summary>
	Bulgarian = 0x6762 // "bg"
	/// <summary>Catalan</summary>
	Catalan = 0x6163 // "ca"
	/// <summary>Chinese</summary>
	Chinese = 0x687A // "zh"
	/// <summary>Croatian</summary>
	Croatian = 0x7268 // "hr"
	/// <summary>Czech</summary>
	Czech = 0x7363 // "cs"
	/// <summary>Danish</summary>
	Danish = 0x6164 // "da"
	/// <summary>Dutch</summary>
	Dutch = 0x6C6E // "nl"
	/// <summary>English</summary>
	English = 0x6E65 // "en"
	/// <summary>Estonian</summary>
	Estonian = 0x7465 // "et"
	/// <summary>Faroese</summary>
	Faroese = 0x6F66 // "fo"
	/// <summary>Finnish</summary>
	Finnish = 0x6966 // "fi"
	/// <summary>French</summary>
	French = 0x7266 // "fr"
	/// <summary>Galician</summary>
	Galician = 0x6C67 // "gl"
	/// <summary>Georgian</summary>
	Georgian = 0x616B // "ka"
	/// <summary>German</summary>
	German = 0x6564 // "de"
	/// <summary>Greek</summary>
	Greek = 0x6C65 // "el"
	/// <summary>Gujarati</summary>
	Gujarati = 0x7567 // "gu"
	/// <summary>Haitian Creole</summary>
	HaitianCreole = 0x7468 // "ht"
	/// <summary>Hausa</summary>
	Hausa = 0x6168 // "ha"
	/// <summary>Hawaiian</summary>
	Hawaiian = 0x776168 // "haw"
	/// <summary>Hebrew</summary>
	Hebrew = 0x7769 // "iw"
	/// <summary>Hindi</summary>
	Hindi = 0x6968 // "hi"
	/// <summary>Hungarian</summary>
	Hungarian = 0x7568 // "hu"
	/// <summary>Icelandic</summary>
	Icelandic = 0x7369 // "is"
	/// <summary>Indonesian</summary>
	Indonesian = 0x6469 // "id"
	/// <summary>Italian</summary>
	Italian = 0x7469 // "it"
	/// <summary>Japanese</summary>
	Japanese = 0x616A // "ja"
	/// <summary>Javanese</summary>
	Javanese = 0x776A // "jw"
	/// <summary>Kannada</summary>
	Kannada = 0x6E6B // "kn"
	/// <summary>Kazakh</summary>
	Kazakh = 0x6B6B // "kk"
	/// <summary>Khmer</summary>
	Khmer = 0x6D6B // "km"
	/// <summary>Korean</summary>
	Korean = 0x6F6B // "ko"
	/// <summary>Lao</summary>
	Lao = 0x6F6C // "lo"
	/// <summary>Latin</summary>
	Latin = 0x616C // "la"
	/// <summary>Latvian</summary>
	Latvian = 0x766C // "lv"
	/// <summary>Lingala</summary>
	Lingala = 0x6E6C // "ln"
	/// <summary>Lithuanian</summary>
	Lithuanian = 0x746C // "lt"
	/// <summary>Luxembourgish</summary>
	Luxembourgish = 0x626C // "lb"
	/// <summary>Macedonian</summary>
	Macedonian = 0x6B6D // "mk"
	/// <summary>Malagasy</summary>
	Malagasy = 0x676D // "mg"
	/// <summary>Malay</summary>
	Malay = 0x736D // "ms"
	/// <summary>Malayalam</summary>
	Malayalam = 0x6C6D // "ml"
	/// <summary>Maltese</summary>
	Maltese = 0x746D // "mt"
	/// <summary>Maori</summary>
	Maori = 0x696D // "mi"
	/// <summary>Marathi</summary>
	Marathi = 0x726D // "mr"
	/// <summary>Mongolian</summary>
	Mongolian = 0x6E6D // "mn"
	/// <summary>Myanmar</summary>
	Myanmar = 0x796D // "my"
	/// <summary>Nepali</summary>
	Nepali = 0x656E // "ne"
	/// <summary>Norwegian</summary>
	Norwegian = 0x6F6E // "no"
	/// <summary>Nynorsk</summary>
	Nynorsk = 0x6E6E // "nn"
	/// <summary>Occitan</summary>
	Occitan = 0x636F // "oc"
	/// <summary>Pashto</summary>
	Pashto = 0x7370 // "ps"
	/// <summary>Persian</summary>
	Persian = 0x6166 // "fa"
	/// <summary>Polish</summary>
	Polish = 0x6C70 // "pl"
	/// <summary>Portuguese</summary>
	Portuguese = 0x7470 // "pt"
	/// <summary>Punjabi</summary>
	Punjabi = 0x6170 // "pa"
	/// <summary>Romanian</summary>
	Romanian = 0x6F72 // "ro"
	/// <summary>Russian</summary>
	Russian = 0x7572 // "ru"
	/// <summary>Sanskrit</summary>
	Sanskrit = 0x6173 // "sa"
	/// <summary>Serbian</summary>
	Serbian = 0x7273 // "sr"
	/// <summary>Shona</summary>
	Shona = 0x6E73 // "sn"
	/// <summary>Sindhi</summary>
	Sindhi = 0x6473 // "sd"
	/// <summary>Sinhala</summary>
	Sinhala = 0x6973 // "si"
	/// <summary>Slovak</summary>
	Slovak = 0x6B73 // "sk"
	/// <summary>Slovenian</summary>
	Slovenian = 0x6C73 // "sl"
	/// <summary>Somali</summary>
	Somali = 0x6F73 // "so"
	/// <summary>Spanish</summary>
	Spanish = 0x7365 // "es"
	/// <summary>Sundanese</summary>
	Sundanese = 0x7573 // "su"
	/// <summary>Swahili</summary>
	Swahili = 0x7773 // "sw"
	/// <summary>Swedish</summary>
	Swedish = 0x7673 // "sv"
	/// <summary>Tagalog</summary>
	Tagalog = 0x6C74 // "tl"
	/// <summary>Tajik</summary>
	Tajik = 0x6774 // "tg"
	/// <summary>Tamil</summary>
	Tamil = 0x6174 // "ta"
	/// <summary>Tatar</summary>
	Tatar = 0x7474 // "tt"
	/// <summary>Telugu</summary>
	Telugu = 0x6574 // "te"
	/// <summary>Thai</summary>
	Thai = 0x6874 // "th"
	/// <summary>Tibetan</summary>
	Tibetan = 0x6F62 // "bo"
	/// <summary>Turkish</summary>
	Turkish = 0x7274 // "tr"
	/// <summary>Turkmen</summary>
	Turkmen = 0x6B74 // "tk"
	/// <summary>Ukrainian</summary>
	Ukrainian = 0x6B75 // "uk"
	/// <summary>Urdu</summary>
	Urdu = 0x7275 // "ur"
	/// <summary>Uzbek</summary>
	Uzbek = 0x7A75 // "uz"
	/// <summary>Vietnamese</summary>
	Vietnamese = 0x6976 // "vi"
	/// <summary>Welsh</summary>
	Welsh = 0x7963 // "cy"
	/// <summary>Yiddish</summary>
	Yiddish = 0x6979 // "yi"
	/// <summary>Yoruba</summary>
	Yoruba = 0x6F79 // "yo"
)
View Source
const (
	LlError   eLogLevel = 0
	LlWarning           = 1
	LlInfo              = 2
	LlDebug             = 3
)
View Source
const (
	LfNone              eLogFlags = 0
	LfUseStandardError            = 1
	LfSkipFormatMessage           = 2
)

Variables

This section is empty.

Functions

func ModelSetup

func ModelSetup(flags eGpuModelFlags, GPU string) *sModelSetup

Types

type EWhisperHWND

type EWhisperHWND uintptr
const (
	S_OK    EWhisperHWND = 0
	S_FALSE EWhisperHWND = 1
)

type EncoderBeginCallback_Type

type EncoderBeginCallback_Type func(context *IContext, user_data unsafe.Pointer) EWhisperHWND

Return S_OK to proceed, or S_FALSE to stop the process

type FullParams

type FullParams struct {
	// contains filtered or unexported fields
}

func NewFullParams

func NewFullParams(cstruct *_FullParams) *FullParams

func (*FullParams) AddFlags

func (this *FullParams) AddFlags(newflag eFullParamsFlags)

func (*FullParams) CpuThreads

func (this *FullParams) CpuThreads() int32

func (*FullParams) RemoveFlags

func (this *FullParams) RemoveFlags(newflag eFullParamsFlags)

func (*FullParams) SetEncoderBeginCallback

func (this *FullParams) SetEncoderBeginCallback(cb EncoderBeginCallback_Type)

func (*FullParams) SetMaxTextCTX

func (this *FullParams) SetMaxTextCTX(val int32)

func (*FullParams) SetNewSegmentCallback

func (this *FullParams) SetNewSegmentCallback(cb NewSegmentCallback_Type)

func (*FullParams) TestDefaultsOK

func (this *FullParams) TestDefaultsOK() bool

type IContext

type IContext struct {
	// contains filtered or unexported fields
}

func (*IContext) AddRef

func (this *IContext) AddRef() int32

func (*IContext) DetectSpeaker

func (context *IContext) DetectSpeaker(time *sTimeInterval, result *eSpeakerChannel) uintptr

func (*IContext) FullDefaultParams

func (context *IContext) FullDefaultParams(strategy eSamplingStrategy) (*FullParams, error)

https://github.com/Const-me/Whisper/blob/f6f743c7b3570b85ccf47f74b84e06a73667ef3e/Whisper/Whisper/ContextImpl.misc.cpp

Returns E_POINTER if null pointer provided in params Initialises params to all 0 sets values in struct, does not malloc

func (*IContext) GetModel

func (context *IContext) GetModel() (*_IModel, error)

func (*IContext) GetResults

func (context *IContext) GetResults(flags eResultFlags, pp **ITranscribeResult) uintptr

func (*IContext) Release

func (this *IContext) Release() int32

func (*IContext) RunCapture

func (context *IContext) RunCapture(params *FullParams, callbacks *sCaptureCallbacks, reader *iAudioCapture) uintptr

func (*IContext) RunFull

func (context *IContext) RunFull(params *FullParams, buffer *iAudioBuffer) error

Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text Uses the specified decoding strategy to obtain the text.

func (*IContext) RunStreamed

func (context *IContext) RunStreamed(params *FullParams, reader *iAudioReader) error

func (*IContext) TimingsPrint

func (context *IContext) TimingsPrint() error

type IContextVtbl

type IContextVtbl struct {
	QueryInterface uintptr
	AddRef         uintptr
	Release        uintptr

	RunFull           uintptr
	RunStreamed       uintptr
	RunCapture        uintptr
	GetResults        uintptr
	DetectSpeaker     uintptr
	GetModel          uintptr
	FullDefaultParams uintptr
	TimingsPrint      uintptr
	TimingsReset      uintptr
}

type IMediaFoundation

type IMediaFoundation struct {
	// contains filtered or unexported fields
}

func (*IMediaFoundation) AddRef

func (this *IMediaFoundation) AddRef() int32

func (*IMediaFoundation) LoadAudioFile

func (this *IMediaFoundation) LoadAudioFile(file string, stereo bool) (*iAudioBuffer, error)

( LPCTSTR path, bool stereo, iAudioBuffer** pp ) const;

func (*IMediaFoundation) LoadAudioFileData

func (this *IMediaFoundation) LoadAudioFileData(inbuffer *[]byte, stereo bool) (*iAudioReader, error)

func (*IMediaFoundation) OpenAudioFile

func (this *IMediaFoundation) OpenAudioFile(file string, stereo bool) (*iAudioReader, error)

func (*IMediaFoundation) Release

func (this *IMediaFoundation) Release() int32

type IMediaFoundationVtbl

type IMediaFoundationVtbl struct {
	QueryInterface uintptr
	AddRef         uintptr
	Release        uintptr
	// contains filtered or unexported fields
}

type IModelVtbl

type IModelVtbl struct {
	QueryInterface uintptr
	AddRef         uintptr
	Release        uintptr
	// contains filtered or unexported fields
}

https://github.com/Const-me/Whisper/blob/master/Whisper/API/iContext.cl.h

type ITranscribeResult

type ITranscribeResult struct {
	// contains filtered or unexported fields
}

func (*ITranscribeResult) AddRef

func (this *ITranscribeResult) AddRef() int32

func (*ITranscribeResult) GetSegments

func (this *ITranscribeResult) GetSegments(len uint32) []sSegment

func (*ITranscribeResult) GetSize

func (this *ITranscribeResult) GetSize() (*sTranscribeLength, error)

func (*ITranscribeResult) GetTokens

func (this *ITranscribeResult) GetTokens(len uint32) []SToken

func (*ITranscribeResult) Release

func (this *ITranscribeResult) Release() int32

type Model

type Model struct {
	// contains filtered or unexported fields
}

External - Go version of the struct

func NewModel

func NewModel(setup *sModelSetup, cstruct *_IModel) *Model

func (*Model) AddRef

func (this *Model) AddRef() int32

func (*Model) Clone

func (this *Model) Clone() (*_IModel, error)

func (*Model) CreateContext

func (this *Model) CreateContext() (*IContext, error)

func (*Model) IsMultilingual

func (this *Model) IsMultilingual() bool

func (*Model) Release

func (this *Model) Release() int32

type NewSegmentCallback_Type

type NewSegmentCallback_Type func(context *IContext, n_new uint32, user_data unsafe.Pointer) EWhisperHWND

using pfnNewSegment = HRESULT( __cdecl* )( iContext* ctx, uint32_t n_new, void* user_data ) noexcept;

type SToken

type SToken struct {

	// Start and end times of the token
	Time sTimeInterval
	// Probability of the token
	Probability float32

	// Probability of the timestamp token
	ProbabilityTimestamp float32

	// Sum of probabilities of all timestamp tokens
	Ptsum float32

	// Voice length of the token
	Vlen float32

	// Token id
	Id int32

	Flags eTokenFlags
	// contains filtered or unexported fields
}

func (*SToken) Text

func (this *SToken) Text() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL