goEagi

package module

v0.0.0-...-95d8059 Latest Latest Go to latest Published: Jan 22, 2024 License: MIT Imports: 24 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/andrewyang17/goEagi

README ¶

GoEAGI

A Go library designed to seamlessly integrate with Asterisk's EAGI, offering essential functionalities for enhanced interaction and communication.

last update

Features

Audio Streaming
Google's Text to Speech
Google's Speech to Text
Microsoft Azure's Speech to Text
Vosk server Speech to Text
Voice Activity Detection
Speech File Generation
Commands to Asterisk

Example Usage

Google Text to Speech

Render text to speech and play it back to the user.
You may refer the language code and voice name here.
Example dialplan code:

;GoogleTTS, playback message to the user
exten => 1234,1,Answer
exten => 1234,n,AGI(<build-script>, "What's up my buddy? how are you?", "en-GB", "en-GB-Neural2-A")
exten => 1234,n,Hangup

Example Go code:

package main

import (
	"strings"
	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	content := strings.TrimSpace(eagi.Env["arg_1"])
	languageCode := strings.TrimSpace(eagi.Env["arg_2"])
	voiceName := strings.TrimSpace(eagi.Env["arg_3"])

	tts, err := goEagi.NewGoogleTTS(
		"<GoogleSpeechToTextPrivateKey>",
		"/tmp/tts", 
		languageCode, 
		voiceName)
	if err != nil {
		eagi.Verbose(err.Error())
	}

	audioPath, err := tts.GenerateAudio(content)
	if err != nil {
		eagi.Verbose(err.Error())
	}

	_, err = eagi.StreamFile(audioPath, "")
	if err != nil {
		eagi.Verbose(err.Error())
	}
}

Google Speech to Text

package main

import (
	"context"
	"fmt"
	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	
	googleService, err := goEagi.NewGoogleService("<GoogleSpeechToTextPrivateKey>", "<languageCode>", nil)
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer googleService.Close()
	
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := googleService.StartStreaming(ctx, bridgeStream)
	googleResponseCh := googleService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)
	
	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-googleResponseCh:
			if response.Error != nil {
				eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", response.Error))
				cancel()
				return
			}

			transcription := response.Result.Alternatives[0].Transcript
			isFinal := response.Result.IsFinal

			eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", isFinal, transcription))
		}
	}
	
}

Microsoft Azure Speech to Text

Prerequisite - install the Speech SDK
Carefully read the Speech SDK documentation and verify the platform requirements to ensure compatibility with your Asterisk server.
If it is not possible to install the Speech SDK on your Asterisk server, you can install it on a different machine and stream the audio from your Asterisk server to the Speech SDK.
For Azure Speech to Text, you need to enable "CGO_ENABLED" flag and build the project with the tag "azure", as shown below:

CGO_ENABLED=1 go build -tags azure main.go

package main

import (
	"context"
	"fmt"
	"os"

	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	azureService, err := goEagi.NewAzureService("<subscriptionKey>", "serviceRegion", "", []string{"...<language_code>"})
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer azureService.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := azureService.StartStreaming(ctx, bridgeStream)
	azureResponseCh := azureService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)
	
	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-azureResponseCh:
			if response.Error != nil {
				eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", response.Error))
				cancel()
				return
			}

			if response.Info != "" {
				eagi.Verbose(fmt.Sprintf("Info: %v", response.Info))
				continue
			}

			eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", response.IsFinal, response.Transcription))
		}
	}
}

Vosk

prerequisite - run the vosk server

docker run -d -p 2700:2700 alphacep/kaldi-en:latest

package main

import (
	"context"
	"fmt"
	"os"

	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	//use phraseList to list the valid phrases/words. 
	//notes
	//	* if you use a phrase list, Vosk will only detect these words, ignoring any other word
	//	* some Vosk models doesn't support phrase list (I tested with spanish)
	//  * to disable phrase list, leave phraseList empty
	voskService, err := goEagi.NewVoskService("<voskHost>", "<voskPort>", nil)
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer voskService.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)
	defer close(bridgeStream)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := voskService.StartStreaming(ctx, bridgeStream)
	voskResponseCh := voskService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)

	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Vosk speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-voskResponseCh:
			// you will receive partial data in v.Partial and, if the full text was recognized, you will receive v.Text.
			eagi.Verbose(fmt.Sprintf("Transcription: %v\n", response.Text))
		}
	}
}

Contributing

Made with contrib.rocks

Contributions are always welcome!

License

MIT License, see LICENSE.

Contact

Andrew Yang - andrewyang177@gmail.com

Project Link: https://github.com/andrewyang17/goEagi

Acknowledgements

We would like to express our gratitude to the authors and contributors of the following open-source libraries, which were used in this project:

cloud.google.com/go/speech: Developed by Google
github.com/Microsoft/cognitive-services-speech-sdk-go: Developed by Microsoft
github.com/cryptix/wav: Developed by Henry Cryptix
github.com/zaf/agi: Developed by Lefteris Zafiris
github.com/gorilla/websocket: Developed by Gorilla

Documentation ¶

Overview ¶

Package goEagi of vosk.go provides a simplified interface for calling Vosk Server's speech to text service. It provides flexibility to the callers and allow them to set their desired configuration.

Index ¶

func ComputeAmplitude(sample []byte) (float64, error)
func GenerateAudio(sample []byte, audioDirectory string, audioName string) (string, error)
func StreamAudio(ctx context.Context) <-chan AudioResult
type AudioResult
type Eagi
- func New() (*Eagi, error)
type GoogleResult
type GoogleService
- func NewGoogleService(privateKeyPath string, languageCode string, speechContext []string) (*GoogleService, error)
- func (g *GoogleService) Close() error
- func (g *GoogleService) ReinitializeClient() error
- func (g *GoogleService) SpeechToTextResponse(ctx context.Context) <-chan GoogleResult
- func (g *GoogleService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error
type GoogleTTS
- func NewGoogleTTS(googleCred, audioOutputDir, languageCode, voiceName string) (*GoogleTTS, error)
- func (tts *GoogleTTS) GenerateAudio(content string) (string, error)
type Vad
- func NewVad(amplitudeThreshold float64) *Vad
- func (v *Vad) Detect(done <-chan interface{}, stream <-chan []byte) <-chan VadResult
type VadResult
type VoskResult
type VoskService
- func NewVoskService(host string, port string, phraseList []string) (*VoskService, error)
- func (v *VoskService) Close() error
- func (v *VoskService) SpeechToTextResponse(ctx context.Context) <-chan VoskResult
- func (v *VoskService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func ComputeAmplitude ¶

func ComputeAmplitude(sample []byte) (float64, error)

ComputeAmplitude analyzes the amplitude of a sample slice of bytes.

func GenerateAudio ¶

func GenerateAudio(sample []byte, audioDirectory string, audioName string) (string, error)

GenerateAudio writes a sample slice of bytes into an audio file. It returns a location path of an audio which passed in the function parameters. Please note that only wav extension is supported.

func StreamAudio ¶

func StreamAudio(ctx context.Context) <-chan AudioResult

StreamAudio launches a new goroutine for audio streaming via file descriptor 3.

Types ¶

type AudioResult ¶

type AudioResult struct {
	Error  error
	Stream []byte
}

type Eagi ¶

type Eagi struct {
	*agi.Session
}

func New ¶

func New() (*Eagi, error)

type GoogleResult ¶

type GoogleResult struct {
	Result            *speechpb.StreamingRecognitionResult
	Error             error
	Reinitialized     bool
	ReinitializedInfo string
}

GoogleResult is a struct that contains transcription result from Google Speech to Text service.

type GoogleService ¶

type GoogleService struct {
	sync.RWMutex
	// contains filtered or unexported fields
}

GoogleService is used to stream audio data to Google Speech to Text service.

func NewGoogleService ¶

func NewGoogleService(privateKeyPath string, languageCode string, speechContext []string) (*GoogleService, error)

NewGoogleService creates a new GoogleService instance, it takes a privateKeyPath and set it in environment with key GOOGLE_APPLICATION_CREDENTIALS, a languageCode, example ["en-GB", "en-US", "ch", ...], see (https://cloud.google.com/speech-to-text/docs/languages), and a speech context, see (https://cloud.google.com/speech-to-text/docs/speech-adaptation).

func (*GoogleService) Close ¶

func (g *GoogleService) Close() error

Close closes the GoogleService.

func (*GoogleService) ReinitializeClient ¶

func (g *GoogleService) ReinitializeClient() error

ReinitializeClient reinitializes the Google client.

func (*GoogleService) SpeechToTextResponse ¶

func (g *GoogleService) SpeechToTextResponse(ctx context.Context) <-chan GoogleResult

SpeechToTextResponse sends the transcription response from Google's SpeechToText.

func (*GoogleService) StartStreaming ¶

func (g *GoogleService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error

StartStreaming takes a reading channel of audio stream and sends it as a gRPC request to Google service through the initialized client.

type GoogleTTS ¶

type GoogleTTS struct {
	AudioOutputDirectory string
	LanguageCode         string
	VoiceName            string
}

func NewGoogleTTS ¶

func NewGoogleTTS(googleCred, audioOutputDir, languageCode, voiceName string) (*GoogleTTS, error)

func (*GoogleTTS) GenerateAudio ¶

func (tts *GoogleTTS) GenerateAudio(content string) (string, error)

GenerateAudio generates audio file from content. It returns audio file path without extension for playback, and error if any.

type Vad ¶

type Vad struct {
	AmplitudeDetectionThreshold float64
}

func NewVad ¶

func NewVad(amplitudeThreshold float64) *Vad

NewVad is a constructor of Vad. The initialization will use the defaultAmplitudeDetectionThreshold.

func (*Vad) Detect ¶

func (v *Vad) Detect(done <-chan interface{}, stream <-chan []byte) <-chan VadResult

Detect analyzes voice activity for a given slice of bytes.

type VadResult ¶

type VadResult struct {
	Error     error
	Detected  bool
	Amplitude float64
	Frame     []byte
}

type VoskResult ¶

type VoskResult struct {
	Result []struct {
		Conf  float64
		End   float64
		Start float64
		Word  string
	}
	Text    string
	Partial string
}

VoskResult is the response from Vosk Speech Recognizer.

type VoskService ¶

type VoskService struct {
	PhraseList []string        `json:"phrase_list"`
	Words      bool            `json:"words"`
	Client     *websocket.Conn `json:"-"`
	// contains filtered or unexported fields
}

VoskService is the client for Vosk Speech Recognizer.

func NewVoskService ¶

func NewVoskService(host string, port string, phraseList []string) (*VoskService, error)

NewVoskService creates a new VoskService.

func (*VoskService) Close ¶

func (v *VoskService) Close() error

Close the websocket connection to Vosk service.

func (*VoskService) SpeechToTextResponse ¶

func (v *VoskService) SpeechToTextResponse(ctx context.Context) <-chan VoskResult

SpeechToTextResponse sends the transcription response from Vosk's SpeechToText.

func (*VoskService) StartStreaming ¶

func (v *VoskService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error

StartStreaming starts the streaming to Vosk speech to text service. It takes a reading channel of audio stream and sends it as a websocket binary message to Vosk service.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL