dashscopego

package module

v0.0.3 Latest Latest Go to latest Published: Mar 8, 2024 License: MIT Imports: 10 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/eswulei/dashscope-go

Links

Open Source Insights

README ¶

dashscopego

forked from devinyf/dashscopego 阿里云平台 dashscope api 的 golang 封装 (非官方)

开通DashScope并创建API-KEY

开发中...

通义千问

import (
	"context"
	"fmt"
	"os"

	"github.com/eswulei/dashscope-go"
	"github.com/eswulei/dashscope-go/qwen"
)

func main() {
	model := qwen.QwenTurbo
	token := os.Getenv("DASHSCOPE_API_KEY")

	if token == "" {
		panic("token is empty")
	}

	cli := dashscopego.NewTongyiClient(model, token)

	content := qwen.TextContent{Text: "讲个冷笑话"}

	input := dashscopego.TextInput{
		Messages: []dashscopego.TextMessage{
			{Role: "user", Content: &content},
		},
	}

	// (可选 SSE开启) 需要流式输出时 通过该 Callback Function 获取结果
	streamCallbackFn := func(ctx context.Context, chunk []byte) error {
		fmt.Print(string(chunk))
		return nil
	}
	req := &dashscopego.TextRequest{
		Input:       input,
		StreamingFn: streamCallbackFn,
	}

	ctx := context.TODO()
	resp, err := cli.CreateCompletion(ctx, req)
	if err != nil {
		panic(err)
	}

	fmt.Println("\nnon-stream result: ")
	fmt.Println(resp.Output.Choices[0].Message.Content.ToString())
}

通义万相(图像生成)

文本生成图像
人像风格重绘
图像背景生成

func main() {
	model := wanx.WanxV1
	token := os.Getenv("DASHSCOPE_API_KEY")
	if token == "" {
		panic("token is empty")
	}

	cli := dashscopego.NewTongyiClient(model, token)

	req := &wanx.ImageSynthesisRequest{
		// Model: "wanx-v1",
		Model: model,
		Input: wanx.ImageSynthesisInput{
			Prompt: "画一只松鼠",
		},
		Params: wanx.ImageSynthesisParams{
			N: 1,
		},
		Download: true // 从 URL 下载图片
	}
	ctx := context.TODO()

	imgBlobs, err := cli.CreateImageGeneration(ctx, req)
	if err != nil {
		panic(err)
	}

	for _, blob := range imgBlobs {
		// blob.Data 会在 request 中设置了 Download: true 时下载
		// 否则使用 blob.ImgURL
		saveImg2Desktop(blob.ImgType, blob.Data)
	}
}

func saveImg2Desktop(fileType string, data []byte) {
	buf := bytes.NewBuffer(data)
	img, _, err := image.Decode(buf)
	if err != nil {
		log.Fatal(err)
	}

	usr, err := user.Current()
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(usr.HomeDir, "Desktop", "wanx_image.png"))
	if err != nil {
		panic(err)
	}
	defer f.Close()

	if err := png.Encode(f, img); err != nil {
		panic(err)
	}
}

通义千问VL(视觉理解模型)

Image 也可以直接使用图片本地路径或图片URL链接的, 参照了 dashscope python 库的实现步骤临时上传到 oss
其中上传图片到 oss 的步骤在开发文档中还没有看到HTTP调用的例子, 所以后续可能会做变更

func main() {
	model := qwen.QwenVLPlus
	token := os.Getenv("DASHSCOPE_API_KEY")

	if token == "" {
		panic("token is empty")
	}

	cli := dashscopego.NewTongyiClient(model, token)

	sysContent := qwen.VLContentList{
		{
			Text: "You are a helpful assistant.",
		},
	}
	userContent := qwen.VLContentList{
		{
			Text: "用唐诗体描述一下这张图片中的内容",
		},
		{
            // 官方文档的例子, oss 下载
			Image: "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg",
            // 使用 图片URL链接
            // Image: "https://pic.ntimg.cn/20140113/8800276_184351657000_2.jpg",
            // 本地图片
            // Image: "file:///Users/xxxx/xxxx.png",
		},
	}

	input := dashscopego.VLInput{
		Messages: []dashscopego.VLMessage{
			{Role: "system", Content: &sysContent},
			{Role: "user", Content: &userContent},
		},
	}

	// (可选 SSE开启)需要流式输出时 通过该 Callback Function 获取结果
	streamCallbackFn := func(ctx context.Context, chunk []byte) error {
		fmt.Print(string(chunk))
		return nil
	}
	req := &dashscopego.VLRequest{
		Input:       input,
		StreamingFn: streamCallbackFn,
	}

	ctx := context.TODO()
	resp, err := cli.CreateVLCompletion(ctx, req)
	if err != nil {
		panic(err)
	}

	fmt.Println("\nnon-stream result: ")
	fmt.Println(resp.Output.Choices[0].Message.Content.ToString())
}

通义千问Audio(音频语言模型)

同 QwenVL, 如果使用本地音频文件会临时上传 oss, 之后可能会有变动

func main() {
	model := qwen.QwenAudioTurbo
	token := os.Getenv("DASHSCOPE_API_KEY")

	if token == "" {
		panic("token is empty")
	}

	cli := dashscopego.NewTongyiClient(model, token)

	sysContent := qwen.AudioContentList{
		{
			Text: "You are a helpful assistant.",
		},
	}
	userContent := qwen.AudioContentList{
		{
			Text: "该段对话表达了什么观点? 详细分析该讲话者的语气,展现出什么样的情绪", //nolint:gosmopolitan
		},
		{
			// 使用本地音频文件
			// Audio: "file:///Users/xxx/Desktop/hello_world_female2.wav",
			// 官方文档中的例子
			Audio: "https://dashscope.oss-cn-beijing.aliyuncs.com/audios/2channel_16K.wav",
		},
	}

	input := dashscopego.AudioInput{
		Messages: []dashscopego.AudioMessage{
			{Role: "system", Content: &sysContent},
			{Role: "user", Content: &userContent},
		},
	}

	// callback function:  print stream result
	streamCallbackFn := func(ctx context.Context, chunk []byte) error {
		log.Print(string(chunk))
		return nil
	}
	req := &dashscopego.AudioRequest{
		Input:       input,
		StreamingFn: streamCallbackFn,
	}

	ctx := context.TODO()
	resp, err := cli.CreateAudioCompletion(ctx, req)
	if err != nil {
		panic(err)
	}

	log.Println("\nnon-stream result: ")
	log.Println(resp.Output.Choices[0].Message.Content.ToString())
}

Paraformer(语音识别)

实时语音识别API
录音文件识别API

Experimental:

开发文档中还没有看到 HTTP调用说明, 参照 dashscope python 库中的步骤实现, 将来可能会有变更
参数中的: SampleRate 好像目前仅支持 16000, 使用真实录音要留意录音设备的 sample_rate 是与之否匹配

package main

import (
	"bufio"
	"context"
	"fmt"
	"os"
	"os/user"
	"path/filepath"
	"time"

	"github.com/eswulei/dashscope-go"
	"github.com/eswulei/dashscope-go/paraformer"
)

func main() {
	model := paraformer.ParaformerRealTimeV1
	token := os.Getenv("DASHSCOPE_API_KEY")
	if token == "" {
		panic("token is empty")
	}

	cli := dashscopego.NewTongyiClient(model, token)

	streamCallbackFn := func(ctx context.Context, chunk []byte) error {
		fmt.Print(string(chunk))
		return nil
	}

	headerPara := paraformer.ReqHeader{
		Streaming: "duplex",
		TaskID:    paraformer.GenerateTaskID(),
		Action:    "run-task",
	}

	payload := paraformer.PayloadIn{
		Parameters: paraformer.Parameters{
			// seems like only support 16000 sample-rate.
			SampleRate: 16000,
			Format:     "pcm",
		},
		Input:     map[string]interface{}{},
		Task:      "asr",
		TaskGroup: "audio",
		Function:  "recognition",
	}

	req := &paraformer.Request{
		Header:      headerPara,
		Payload:     payload,
		StreamingFn: streamCallbackFn,
	}

	// 声音获取 实际使用时请替换成实时音频流.
	voiceReader := readAudioFromDesktop()

	reader := bufio.NewReader(voiceReader)

	cli.CreateSpeechToTextGeneration(context.TODO(), req, reader)

	// 等待语音识别结果输出
	time.Sleep(5 * time.Second)
}

// 读取音频文件中的录音 模拟实时语音流. 这里下载的官方文档中的示例音频文件.
// `https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_male2.wav`.
func readAudioFromDesktop() *bufio.Reader {
	usr, err := user.Current()
	if err != nil {
		panic(err)
	}

	voiceFilePath := filepath.Join(usr.HomeDir, "Desktop", "hello_world_female2.wav")
	f, err := os.OpenFile(voiceFilePath, os.O_RDONLY, 0640)
	if err != nil {
		panic(err)
	}
	if err != nil {
		panic(err)
	}

	reader := bufio.NewReader(f)
	return reader
}

Documentation ¶

Index ¶

Constants
Variables
func NewQwenMessage[T qwen.IQwenContent](role string, content T) *qwen.Message[T]
type AudioInput
type AudioMessage
type AudioQwenResponse
type AudioRequest
type TextInput
type TextMessage
type TextQwenResponse
type TextRequest
type TongyiClient
- func NewTongyiClient(model string, token string) *TongyiClient
- func (q *TongyiClient) CreateAudioCompletion(ctx context.Context, payload *qwen.Request[*qwen.AudioContentList]) (*AudioQwenResponse, error)
- func (q *TongyiClient) CreateCompletion(ctx context.Context, payload *qwen.Request[*qwen.TextContent]) (*TextQwenResponse, error)
- func (q *TongyiClient) CreateEmbedding(ctx context.Context, r *embedding.Request) ([][]float32, error)
- func (q *TongyiClient) CreateImageGeneration(ctx context.Context, payload *wanx.ImageSynthesisRequest) ([]*wanx.ImgBlob, error)
- func (q *TongyiClient) CreateSpeechToTextGeneration(ctx context.Context, request *paraformer.Request, reader *bufio.Reader) error
- func (q *TongyiClient) CreateVLCompletion(ctx context.Context, payload *qwen.Request[*qwen.VLContentList]) (*VLQwenResponse, error)
type VLInput
type VLMessage
type VLQwenResponse
type VLRequest
type WrapMessageError
- func (e *WrapMessageError) Error() string

Constants ¶

View Source

const (
	DashscopeTokenEnvName = "DASHSCOPE_API_KEY" //nolint:gosec
)

Variables ¶

View Source

var (
	ErrModelNotSet     = errors.New("model is not set")
	ErrEmptyResponse   = errors.New("empty response")
	ErrImageFilePrefix = errors.New("file prefix is not supported, must be one of: file://, https://, http://")
)

Functions ¶

func NewQwenMessage ¶

func NewQwenMessage[T qwen.IQwenContent](role string, content T) *qwen.Message[T]

Types ¶

type AudioInput ¶

type AudioInput = qwen.Input[*qwen.AudioContentList]

type AudioMessage ¶

type AudioMessage = qwen.Message[*qwen.AudioContentList]

type AudioQwenResponse ¶

type AudioQwenResponse = qwen.OutputResponse[*qwen.AudioContentList]

type AudioRequest ¶

type AudioRequest = qwen.Request[*qwen.AudioContentList]

type TextInput ¶

type TextInput = qwen.Input[*qwen.TextContent]

type TextMessage ¶

type TextMessage = qwen.Message[*qwen.TextContent]

type TextQwenResponse ¶

type TextQwenResponse = qwen.OutputResponse[*qwen.TextContent]

type TextRequest ¶

type TextRequest = qwen.Request[*qwen.TextContent]

type TongyiClient ¶

type TongyiClient struct {
	Model string
	// contains filtered or unexported fields
}

func NewTongyiClient ¶

func NewTongyiClient(model string, token string) *TongyiClient

func (*TongyiClient) CreateAudioCompletion ¶

func (q *TongyiClient) CreateAudioCompletion(ctx context.Context, payload *qwen.Request[*qwen.AudioContentList]) (*AudioQwenResponse, error)

func (*TongyiClient) CreateCompletion ¶

func (q *TongyiClient) CreateCompletion(ctx context.Context, payload *qwen.Request[*qwen.TextContent]) (*TextQwenResponse, error)

duplicate: CreateCompletion and CreateVLCompletion are the same but with different payload types. maybe this can be change in the future.

nolint:lll

func (*TongyiClient) CreateEmbedding ¶

func (q *TongyiClient) CreateEmbedding(ctx context.Context, r *embedding.Request) ([][]float32, error)

func (*TongyiClient) CreateImageGeneration ¶

func (q *TongyiClient) CreateImageGeneration(ctx context.Context, payload *wanx.ImageSynthesisRequest) ([]*wanx.ImgBlob, error)

TODO: intergrate wanx.Request into qwen.IQwenContent(or should rename to ITongyiContent)

func (*TongyiClient) CreateSpeechToTextGeneration ¶

func (q *TongyiClient) CreateSpeechToTextGeneration(ctx context.Context, request *paraformer.Request, reader *bufio.Reader) error

func (*TongyiClient) CreateVLCompletion ¶

func (q *TongyiClient) CreateVLCompletion(ctx context.Context, payload *qwen.Request[*qwen.VLContentList]) (*VLQwenResponse, error)

type VLInput ¶

type VLInput = qwen.Input[*qwen.VLContentList]

type VLMessage ¶

type VLMessage = qwen.Message[*qwen.VLContentList]

type VLQwenResponse ¶

type VLQwenResponse = qwen.OutputResponse[*qwen.VLContentList]

type VLRequest ¶

type VLRequest = qwen.Request[*qwen.VLContentList]

type WrapMessageError ¶

type WrapMessageError struct {
	Message string
	Cause   error
}

func (*WrapMessageError) Error ¶

func (e *WrapMessageError) Error() string

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
embedding
example
paraformer
qwen
qwen_audio
qwen_vl
wanx
httpclient Code generated by MockGen.	Code generated by MockGen.
paraformer
qwen
wanx

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL