goEagi

package module
v0.0.0-...-95d8059 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 22, 2024 License: MIT Imports: 24 Imported by: 0

README


logo

GoEAGI

A Go library designed to seamlessly integrate with Asterisk's EAGI, offering essential functionalities for enhanced interaction and communication.

contributors last update forks stars open issues license

Report Bug · Request Feature


Features

  1. Audio Streaming
  2. Google's Text to Speech
  3. Google's Speech to Text
  4. Microsoft Azure's Speech to Text
  5. Vosk server Speech to Text
  6. Voice Activity Detection
  7. Speech File Generation
  8. Commands to Asterisk

Example Usage

Google Text to Speech

  • Render text to speech and play it back to the user.
  • You may refer the language code and voice name here.
  • Example dialplan code:
;GoogleTTS, playback message to the user
exten => 1234,1,Answer
exten => 1234,n,AGI(<build-script>, "What's up my buddy? how are you?", "en-GB", "en-GB-Neural2-A")
exten => 1234,n,Hangup
  • Example Go code:
package main

import (
	"strings"
	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	content := strings.TrimSpace(eagi.Env["arg_1"])
	languageCode := strings.TrimSpace(eagi.Env["arg_2"])
	voiceName := strings.TrimSpace(eagi.Env["arg_3"])

	tts, err := goEagi.NewGoogleTTS(
		"<GoogleSpeechToTextPrivateKey>",
		"/tmp/tts", 
		languageCode, 
		voiceName)
	if err != nil {
		eagi.Verbose(err.Error())
	}

	audioPath, err := tts.GenerateAudio(content)
	if err != nil {
		eagi.Verbose(err.Error())
	}

	_, err = eagi.StreamFile(audioPath, "")
	if err != nil {
		eagi.Verbose(err.Error())
	}
}

Google Speech to Text

package main

import (
	"context"
	"fmt"
	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	
	googleService, err := goEagi.NewGoogleService("<GoogleSpeechToTextPrivateKey>", "<languageCode>", nil)
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer googleService.Close()
	
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := googleService.StartStreaming(ctx, bridgeStream)
	googleResponseCh := googleService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)
	
	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-googleResponseCh:
			if response.Error != nil {
				eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", response.Error))
				cancel()
				return
			}

			transcription := response.Result.Alternatives[0].Transcript
			isFinal := response.Result.IsFinal

			eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", isFinal, transcription))
		}
	}
	
}

Microsoft Azure Speech to Text

  • Prerequisite - install the Speech SDK
  • Carefully read the Speech SDK documentation and verify the platform requirements to ensure compatibility with your Asterisk server.
  • If it is not possible to install the Speech SDK on your Asterisk server, you can install it on a different machine and stream the audio from your Asterisk server to the Speech SDK.
  • For Azure Speech to Text, you need to enable "CGO_ENABLED" flag and build the project with the tag "azure", as shown below:
CGO_ENABLED=1 go build -tags azure main.go
package main

import (
	"context"
	"fmt"
	"os"

	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	azureService, err := goEagi.NewAzureService("<subscriptionKey>", "serviceRegion", "", []string{"...<language_code>"})
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer azureService.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := azureService.StartStreaming(ctx, bridgeStream)
	azureResponseCh := azureService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)
	
	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-azureResponseCh:
			if response.Error != nil {
				eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", response.Error))
				cancel()
				return
			}

			if response.Info != "" {
				eagi.Verbose(fmt.Sprintf("Info: %v", response.Info))
				continue
			}

			eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", response.IsFinal, response.Transcription))
		}
	}
}

Vosk

  • prerequisite - run the vosk server
docker run -d -p 2700:2700 alphacep/kaldi-en:latest
package main

import (
	"context"
	"fmt"
	"os"

	"github.com/andrewyang17/goEagi"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}

	//use phraseList to list the valid phrases/words. 
	//notes
	//	* if you use a phrase list, Vosk will only detect these words, ignoring any other word
	//	* some Vosk models doesn't support phrase list (I tested with spanish)
	//  * to disable phrase list, leave phraseList empty
	voskService, err := goEagi.NewVoskService("<voskHost>", "<voskPort>", nil)
	if err != nil {
		eagi.Verbose(fmt.Sprintf("error: %v", err))
		os.Exit(1)
	}
	defer voskService.Close()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)
	defer close(bridgeStream)

	audioStream := goEagi.StreamAudio(ctx)
	errCh := voskService.StartStreaming(ctx, bridgeStream)
	voskResponseCh := voskService.SpeechToTextResponse(ctx)

	go func(ctx context.Context, eagi *goEagi.Eagi) {
		for {
			select {
			case <-ctx.Done(): return

			case audio := <-audioStream:
				if audio.Error != nil {
					eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
					cancel()
					return
				}
				bridgeStream <- audio.Stream
			}
		}
	}(ctx, eagi)

	for {
		select {
		case <-ctx.Done(): return
			
		case err := <-errCh:
			eagi.Verbose(fmt.Sprintf("Vosk speech to text response: G error: %v", err))
			cancel()
			return

		case response := <-voskResponseCh:
			// you will receive partial data in v.Partial and, if the full text was recognized, you will receive v.Text.
			eagi.Verbose(fmt.Sprintf("Transcription: %v\n", response.Text))
		}
	}
}

Contributing

Made with contrib.rocks

Contributions are always welcome!


License

MIT License, see LICENSE.


Contact

Andrew Yang - andrewyang177@gmail.com

Project Link: https://github.com/andrewyang17/goEagi


Acknowledgements

We would like to express our gratitude to the authors and contributors of the following open-source libraries, which were used in this project:

Documentation

Overview

Package goEagi of vosk.go provides a simplified interface for calling Vosk Server's speech to text service. It provides flexibility to the callers and allow them to set their desired configuration.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ComputeAmplitude

func ComputeAmplitude(sample []byte) (float64, error)

ComputeAmplitude analyzes the amplitude of a sample slice of bytes.

func GenerateAudio

func GenerateAudio(sample []byte, audioDirectory string, audioName string) (string, error)

GenerateAudio writes a sample slice of bytes into an audio file. It returns a location path of an audio which passed in the function parameters. Please note that only wav extension is supported.

func StreamAudio

func StreamAudio(ctx context.Context) <-chan AudioResult

StreamAudio launches a new goroutine for audio streaming via file descriptor 3.

Types

type AudioResult

type AudioResult struct {
	Error  error
	Stream []byte
}

type Eagi

type Eagi struct {
	*agi.Session
}

func New

func New() (*Eagi, error)

type GoogleResult

type GoogleResult struct {
	Result            *speechpb.StreamingRecognitionResult
	Error             error
	Reinitialized     bool
	ReinitializedInfo string
}

GoogleResult is a struct that contains transcription result from Google Speech to Text service.

type GoogleService

type GoogleService struct {
	sync.RWMutex
	// contains filtered or unexported fields
}

GoogleService is used to stream audio data to Google Speech to Text service.

func NewGoogleService

func NewGoogleService(privateKeyPath string, languageCode string, speechContext []string) (*GoogleService, error)

NewGoogleService creates a new GoogleService instance, it takes a privateKeyPath and set it in environment with key GOOGLE_APPLICATION_CREDENTIALS, a languageCode, example ["en-GB", "en-US", "ch", ...], see (https://cloud.google.com/speech-to-text/docs/languages), and a speech context, see (https://cloud.google.com/speech-to-text/docs/speech-adaptation).

func (*GoogleService) Close

func (g *GoogleService) Close() error

Close closes the GoogleService.

func (*GoogleService) ReinitializeClient

func (g *GoogleService) ReinitializeClient() error

ReinitializeClient reinitializes the Google client.

func (*GoogleService) SpeechToTextResponse

func (g *GoogleService) SpeechToTextResponse(ctx context.Context) <-chan GoogleResult

SpeechToTextResponse sends the transcription response from Google's SpeechToText.

func (*GoogleService) StartStreaming

func (g *GoogleService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error

StartStreaming takes a reading channel of audio stream and sends it as a gRPC request to Google service through the initialized client.

type GoogleTTS

type GoogleTTS struct {
	AudioOutputDirectory string
	LanguageCode         string
	VoiceName            string
}

func NewGoogleTTS

func NewGoogleTTS(googleCred, audioOutputDir, languageCode, voiceName string) (*GoogleTTS, error)

func (*GoogleTTS) GenerateAudio

func (tts *GoogleTTS) GenerateAudio(content string) (string, error)

GenerateAudio generates audio file from content. It returns audio file path without extension for playback, and error if any.

type Vad

type Vad struct {
	AmplitudeDetectionThreshold float64
}

func NewVad

func NewVad(amplitudeThreshold float64) *Vad

NewVad is a constructor of Vad. The initialization will use the defaultAmplitudeDetectionThreshold.

func (*Vad) Detect

func (v *Vad) Detect(done <-chan interface{}, stream <-chan []byte) <-chan VadResult

Detect analyzes voice activity for a given slice of bytes.

type VadResult

type VadResult struct {
	Error     error
	Detected  bool
	Amplitude float64
	Frame     []byte
}

type VoskResult

type VoskResult struct {
	Result []struct {
		Conf  float64
		End   float64
		Start float64
		Word  string
	}
	Text    string
	Partial string
}

VoskResult is the response from Vosk Speech Recognizer.

type VoskService

type VoskService struct {
	PhraseList []string        `json:"phrase_list"`
	Words      bool            `json:"words"`
	Client     *websocket.Conn `json:"-"`
	// contains filtered or unexported fields
}

VoskService is the client for Vosk Speech Recognizer.

func NewVoskService

func NewVoskService(host string, port string, phraseList []string) (*VoskService, error)

NewVoskService creates a new VoskService.

func (*VoskService) Close

func (v *VoskService) Close() error

Close the websocket connection to Vosk service.

func (*VoskService) SpeechToTextResponse

func (v *VoskService) SpeechToTextResponse(ctx context.Context) <-chan VoskResult

SpeechToTextResponse sends the transcription response from Vosk's SpeechToText.

func (*VoskService) StartStreaming

func (v *VoskService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error

StartStreaming starts the streaming to Vosk speech to text service. It takes a reading channel of audio stream and sends it as a websocket binary message to Vosk service.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL