llm

package
v0.1.32 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 14, 2024 License: MIT Imports: 31 Imported by: 2

Documentation

Index

Constants

View Source
const (
	// Magic constant for `ggml` files (unversioned).
	FILE_MAGIC_GGML = 0x67676d6c
	// Magic constant for `ggml` files (versioned, ggmf).
	FILE_MAGIC_GGMF = 0x67676d66
	// Magic constant for `ggml` files (versioned, ggjt).
	FILE_MAGIC_GGJT = 0x67676a74
	// Magic constant for `ggla` files (LoRA adapter).
	FILE_MAGIC_GGLA = 0x67676C61
	// Magic constant for `gguf` files (versioned, gguf)
	FILE_MAGIC_GGUF_LE = 0x46554747
	FILE_MAGIC_GGUF_BE = 0x47475546
)
View Source
const (
	GGUFTokenNormal uint32
	GGUFTokenUnknown
	GGUFTokenControl
	GGUFTokenUserDefined
	GGUFTokenUnused
	GGUFTokenByte
)

Variables

View Source
var ErrUnsupportedFormat = errors.New("unsupported model format")

Functions

func Init

func Init() error

func NewGGUFV3 added in v0.1.32

func NewGGUFV3(bo binary.ByteOrder) *gguf

func Quantize added in v0.1.32

func Quantize(infile, outfile, filetype string) error

func SystemInfo added in v0.1.32

func SystemInfo() string

SystemInfo is an unused example of calling llama.cpp functions using CGo

Types

type CompletionRequest added in v0.1.32

type CompletionRequest struct {
	Prompt  string
	Format  string
	Images  []ImageData
	Options api.Options
}

type CompletionResponse added in v0.1.32

type CompletionResponse struct {
	Content            string
	Done               bool
	PromptEvalCount    int
	PromptEvalDuration time.Duration
	EvalCount          int
	EvalDuration       time.Duration
}

type DetokenizeRequest

type DetokenizeRequest struct {
	Tokens []int `json:"tokens"`
}

type DetokenizeResponse

type DetokenizeResponse struct {
	Content string `json:"content"`
}

type EmbeddingRequest

type EmbeddingRequest struct {
	Content string `json:"content"`
}

type EmbeddingResponse

type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

type GGML

type GGML struct {
	// contains filtered or unexported fields
}

func DecodeGGML

func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error)

func (GGML) GraphSize added in v0.1.32

func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64)

type ImageData

type ImageData struct {
	Data []byte `json:"data"`
	ID   int    `json:"id"`
}

type KV

type KV map[string]any

func (KV) Architecture added in v0.1.32

func (kv KV) Architecture() string

func (KV) BlockCount added in v0.1.32

func (kv KV) BlockCount() uint64

func (KV) ContextLength added in v0.1.32

func (kv KV) ContextLength() uint64

func (KV) EmbeddingLength added in v0.1.32

func (kv KV) EmbeddingLength() uint64

func (KV) FileType added in v0.1.32

func (kv KV) FileType() string

func (KV) GQA added in v0.1.32

func (kv KV) GQA() uint64

func (KV) HeadCount added in v0.1.32

func (kv KV) HeadCount() uint64

func (KV) HeadCountKV added in v0.1.32

func (kv KV) HeadCountKV() uint64

func (KV) ParameterCount added in v0.1.32

func (kv KV) ParameterCount() uint64

type Layer added in v0.1.32

type Layer map[string]*Tensor

type LlamaServer added in v0.1.32

type LlamaServer struct {
	// contains filtered or unexported fields
}

LlamaServer is an instance of the llama.cpp server

func NewLlamaServer added in v0.1.32

func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error)

func (*LlamaServer) Close added in v0.1.32

func (s *LlamaServer) Close() error

func (*LlamaServer) Completion added in v0.1.32

func (s *LlamaServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error

func (*LlamaServer) Detokenize added in v0.1.32

func (s *LlamaServer) Detokenize(ctx context.Context, tokens []int) (string, error)

func (*LlamaServer) Embedding added in v0.1.32

func (s *LlamaServer) Embedding(ctx context.Context, prompt string) ([]float64, error)

func (*LlamaServer) Ping added in v0.1.32

func (s *LlamaServer) Ping(ctx context.Context) error

func (*LlamaServer) Tokenize added in v0.1.32

func (s *LlamaServer) Tokenize(ctx context.Context, content string) ([]int, error)

type ServerStatus added in v0.1.32

type ServerStatus int
const (
	ServerStatusReady ServerStatus = iota
	ServerStatusNoSlotsAvaialble
	ServerStatusLoadingModel
	ServerStatusNotResponding
	ServerStatusError
)

type ServerStatusResp added in v0.1.32

type ServerStatusResp struct {
	Status          string `json:"status"`
	SlotsIdle       int    `json:"slots_idle"`
	SlotsProcessing int    `json:"slots_processing"`
	Error           string `json:"error"`
}

type StatusWriter added in v0.1.32

type StatusWriter struct {
	LastErrMsg string
	// contains filtered or unexported fields
}

StatusWriter is a writer that captures error messages from the llama runner process

func NewStatusWriter added in v0.1.32

func NewStatusWriter(out *os.File) *StatusWriter

func (*StatusWriter) Write added in v0.1.32

func (w *StatusWriter) Write(b []byte) (int, error)

type Tensor

type Tensor struct {
	Name   string `json:"name"`
	Kind   uint32 `json:"kind"`
	Offset uint64 `json:"-"`

	// Shape is the number of elements in each dimension
	Shape []uint64 `json:"shape"`

	io.WriterTo `json:"-"`
}

type Tensors added in v0.1.32

type Tensors []*Tensor

func (Tensors) Layers added in v0.1.32

func (ts Tensors) Layers() map[string]Layer

type TokenizeRequest

type TokenizeRequest struct {
	Content string `json:"content"`
}

type TokenizeResponse

type TokenizeResponse struct {
	Tokens []int `json:"tokens"`
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL