llm

package
v0.1.33 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 2, 2024 License: MIT Imports: 32 Imported by: 2

Documentation

Index

Constants

View Source
const (
	// Magic constant for `ggml` files (unversioned).
	FILE_MAGIC_GGML = 0x67676d6c
	// Magic constant for `ggml` files (versioned, ggmf).
	FILE_MAGIC_GGMF = 0x67676d66
	// Magic constant for `ggml` files (versioned, ggjt).
	FILE_MAGIC_GGJT = 0x67676a74
	// Magic constant for `ggla` files (LoRA adapter).
	FILE_MAGIC_GGLA = 0x67676C61
	// Magic constant for `gguf` files (versioned, gguf)
	FILE_MAGIC_GGUF_LE = 0x46554747
	FILE_MAGIC_GGUF_BE = 0x47475546
)
View Source
const (
	GGUFTokenNormal uint32
	GGUFTokenUnknown
	GGUFTokenControl
	GGUFTokenUserDefined
	GGUFTokenUnused
	GGUFTokenByte
)

Variables

View Source
var ErrUnsupportedFormat = errors.New("unsupported model format")

Functions

func EstimateGPULayers added in v0.1.33

func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts api.Options) (int, uint64)

Given a model and one or more GPU targets, predict how many layers and bytes we can load The GPUs provided must all be the same Library

func Init

func Init() error

func NewGGUFV3 added in v0.1.32

func NewGGUFV3(bo binary.ByteOrder) *gguf

func PredictServerFit added in v0.1.33

func PredictServerFit(allGpus gpu.GpuInfoList, ggml *GGML, adapters, projectors []string, opts api.Options) (bool, uint64)

This algorithm looks for a complete fit to determine if we need to unload other models

func Quantize added in v0.1.32

func Quantize(infile, outfile, filetype string) error

func SystemInfo added in v0.1.32

func SystemInfo() string

SystemInfo is an unused example of calling llama.cpp functions using CGo

Types

type CompletionRequest added in v0.1.32

type CompletionRequest struct {
	Prompt  string
	Format  string
	Images  []ImageData
	Options api.Options
}

type CompletionResponse added in v0.1.32

type CompletionResponse struct {
	Content            string
	Done               bool
	PromptEvalCount    int
	PromptEvalDuration time.Duration
	EvalCount          int
	EvalDuration       time.Duration
}

type DetokenizeRequest

type DetokenizeRequest struct {
	Tokens []int `json:"tokens"`
}

type DetokenizeResponse

type DetokenizeResponse struct {
	Content string `json:"content"`
}

type EmbeddingRequest

type EmbeddingRequest struct {
	Content string `json:"content"`
}

type EmbeddingResponse

type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

type GGML

type GGML struct {
	// contains filtered or unexported fields
}

func DecodeGGML

func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error)

func LoadModel added in v0.1.33

func LoadModel(model string) (*GGML, error)

func (GGML) GraphSize added in v0.1.32

func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64)

type ImageData

type ImageData struct {
	Data []byte `json:"data"`
	ID   int    `json:"id"`
}

type KV

type KV map[string]any

func (KV) Architecture added in v0.1.32

func (kv KV) Architecture() string

func (KV) BlockCount added in v0.1.32

func (kv KV) BlockCount() uint64

func (KV) ContextLength added in v0.1.32

func (kv KV) ContextLength() uint64

func (KV) EmbeddingLength added in v0.1.32

func (kv KV) EmbeddingLength() uint64

func (KV) FileType added in v0.1.32

func (kv KV) FileType() string

func (KV) GQA added in v0.1.32

func (kv KV) GQA() uint64

func (KV) HeadCount added in v0.1.32

func (kv KV) HeadCount() uint64

func (KV) HeadCountKV added in v0.1.32

func (kv KV) HeadCountKV() uint64

func (KV) ParameterCount added in v0.1.32

func (kv KV) ParameterCount() uint64

type Layer added in v0.1.32

type Layer map[string]*Tensor

type LlamaServer added in v0.1.32

type LlamaServer interface {
	Ping(ctx context.Context) error
	WaitUntilRunning(ctx context.Context) error
	Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error
	Embedding(ctx context.Context, prompt string) ([]float64, error)
	Tokenize(ctx context.Context, content string) ([]int, error)
	Detokenize(ctx context.Context, tokens []int) (string, error)
	Close() error
	EstimatedVRAM() uint64
}

func NewLlamaServer added in v0.1.32

func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error)

NewLlamaServer will run a server for the given GPUs The gpu list must be a single family.

type ServerStatus added in v0.1.32

type ServerStatus int
const (
	ServerStatusReady ServerStatus = iota
	ServerStatusNoSlotsAvaialble
	ServerStatusLoadingModel
	ServerStatusNotResponding
	ServerStatusError
)

func (ServerStatus) ToString added in v0.1.33

func (s ServerStatus) ToString() string

type ServerStatusResp added in v0.1.32

type ServerStatusResp struct {
	Status          string `json:"status"`
	SlotsIdle       int    `json:"slots_idle"`
	SlotsProcessing int    `json:"slots_processing"`
	Error           string `json:"error"`
}

type StatusWriter added in v0.1.32

type StatusWriter struct {
	LastErrMsg string
	// contains filtered or unexported fields
}

StatusWriter is a writer that captures error messages from the llama runner process

func NewStatusWriter added in v0.1.32

func NewStatusWriter(out *os.File) *StatusWriter

func (*StatusWriter) Write added in v0.1.32

func (w *StatusWriter) Write(b []byte) (int, error)

type Tensor

type Tensor struct {
	Name   string `json:"name"`
	Kind   uint32 `json:"kind"`
	Offset uint64 `json:"-"`

	// Shape is the number of elements in each dimension
	Shape []uint64 `json:"shape"`

	io.WriterTo `json:"-"`
}

type Tensors added in v0.1.32

type Tensors []*Tensor

func (Tensors) Layers added in v0.1.32

func (ts Tensors) Layers() map[string]Layer

type TokenizeRequest

type TokenizeRequest struct {
	Content string `json:"content"`
}

type TokenizeResponse

type TokenizeResponse struct {
	Tokens []int `json:"tokens"`
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL