llm

package

v0.1.32 Latest Latest Go to latest Published: Apr 14, 2024 License: MIT Imports: 31 Imported by: 2

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/ollama/ollama

Documentation ¶

Rendered for

Index ¶

Constants
Variables
func Init() error
func NewGGUFV3(bo binary.ByteOrder) *gguf
func Quantize(infile, outfile, filetype string) error
func SystemInfo() string
type CompletionRequest
type CompletionResponse
type DetokenizeRequest
type DetokenizeResponse
type EmbeddingRequest
type EmbeddingResponse
type GGML
- func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error)
- func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64)
type ImageData
type KV
- func (kv KV) Architecture() string
- func (kv KV) BlockCount() uint64
- func (kv KV) ContextLength() uint64
- func (kv KV) EmbeddingLength() uint64
- func (kv KV) FileType() string
- func (kv KV) GQA() uint64
- func (kv KV) HeadCount() uint64
- func (kv KV) HeadCountKV() uint64
- func (kv KV) ParameterCount() uint64
type Layer
type LlamaServer
- func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error)
- func (s *LlamaServer) Close() error
- func (s *LlamaServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error
- func (s *LlamaServer) Detokenize(ctx context.Context, tokens []int) (string, error)
- func (s *LlamaServer) Embedding(ctx context.Context, prompt string) ([]float64, error)
- func (s *LlamaServer) Ping(ctx context.Context) error
- func (s *LlamaServer) Tokenize(ctx context.Context, content string) ([]int, error)
type ServerStatus
type ServerStatusResp
type StatusWriter
- func NewStatusWriter(out *os.File) *StatusWriter
- func (w *StatusWriter) Write(b []byte) (int, error)
type Tensor
type Tensors
- func (ts Tensors) Layers() map[string]Layer
type TokenizeRequest
type TokenizeResponse

Constants ¶

View Source

const (
	// Magic constant for `ggml` files (unversioned).
	FILE_MAGIC_GGML = 0x67676d6c
	// Magic constant for `ggml` files (versioned, ggmf).
	FILE_MAGIC_GGMF = 0x67676d66
	// Magic constant for `ggml` files (versioned, ggjt).
	FILE_MAGIC_GGJT = 0x67676a74
	// Magic constant for `ggla` files (LoRA adapter).
	FILE_MAGIC_GGLA = 0x67676C61
	// Magic constant for `gguf` files (versioned, gguf)
	FILE_MAGIC_GGUF_LE = 0x46554747
	FILE_MAGIC_GGUF_BE = 0x47475546
)

View Source

const (
	GGUFTokenNormal uint32
	GGUFTokenUnknown
	GGUFTokenControl
	GGUFTokenUserDefined
	GGUFTokenUnused
	GGUFTokenByte
)

Variables ¶

View Source

var ErrUnsupportedFormat = errors.New("unsupported model format")

Functions ¶

func Init ¶

func Init() error

func NewGGUFV3 ¶ added in v0.1.32

func NewGGUFV3(bo binary.ByteOrder) *gguf

func Quantize ¶ added in v0.1.32

func Quantize(infile, outfile, filetype string) error

func SystemInfo ¶ added in v0.1.32

func SystemInfo() string

SystemInfo is an unused example of calling llama.cpp functions using CGo

Types ¶

type CompletionRequest ¶ added in v0.1.32

type CompletionRequest struct {
	Prompt  string
	Format  string
	Images  []ImageData
	Options api.Options
}

type CompletionResponse ¶ added in v0.1.32

type CompletionResponse struct {
	Content            string
	Done               bool
	PromptEvalCount    int
	PromptEvalDuration time.Duration
	EvalCount          int
	EvalDuration       time.Duration
}

type DetokenizeRequest ¶

type DetokenizeRequest struct {
	Tokens []int `json:"tokens"`
}

type DetokenizeResponse ¶

type DetokenizeResponse struct {
	Content string `json:"content"`
}

type EmbeddingRequest ¶

type EmbeddingRequest struct {
	Content string `json:"content"`
}

type EmbeddingResponse ¶

type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

type GGML ¶

type GGML struct {
	// contains filtered or unexported fields
}

func DecodeGGML ¶

func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error)

func (GGML) GraphSize ¶ added in v0.1.32

func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64)

type ImageData ¶

type ImageData struct {
	Data []byte `json:"data"`
	ID   int    `json:"id"`
}

type KV ¶

type KV map[string]any

func (KV) Architecture ¶ added in v0.1.32

func (kv KV) Architecture() string

func (KV) BlockCount ¶ added in v0.1.32

func (kv KV) BlockCount() uint64

func (KV) ContextLength ¶ added in v0.1.32

func (kv KV) ContextLength() uint64

func (KV) EmbeddingLength ¶ added in v0.1.32

func (kv KV) EmbeddingLength() uint64

func (KV) FileType ¶ added in v0.1.32

func (kv KV) FileType() string

func (KV) GQA ¶ added in v0.1.32

func (kv KV) GQA() uint64

func (KV) HeadCount ¶ added in v0.1.32

func (kv KV) HeadCount() uint64

func (KV) HeadCountKV ¶ added in v0.1.32

func (kv KV) HeadCountKV() uint64

func (KV) ParameterCount ¶ added in v0.1.32

func (kv KV) ParameterCount() uint64

type Layer ¶ added in v0.1.32

type Layer map[string]*Tensor

type LlamaServer ¶ added in v0.1.32

type LlamaServer struct {
	// contains filtered or unexported fields
}

LlamaServer is an instance of the llama.cpp server

func NewLlamaServer ¶ added in v0.1.32

func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error)

func (*LlamaServer) Close ¶ added in v0.1.32

func (s *LlamaServer) Close() error

func (*LlamaServer) Completion ¶ added in v0.1.32

func (s *LlamaServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error

func (*LlamaServer) Detokenize ¶ added in v0.1.32

func (s *LlamaServer) Detokenize(ctx context.Context, tokens []int) (string, error)

func (*LlamaServer) Embedding ¶ added in v0.1.32

func (s *LlamaServer) Embedding(ctx context.Context, prompt string) ([]float64, error)

func (*LlamaServer) Ping ¶ added in v0.1.32

func (s *LlamaServer) Ping(ctx context.Context) error

func (*LlamaServer) Tokenize ¶ added in v0.1.32

func (s *LlamaServer) Tokenize(ctx context.Context, content string) ([]int, error)

type ServerStatus ¶ added in v0.1.32

type ServerStatus int

const (
	ServerStatusReady ServerStatus = iota
	ServerStatusNoSlotsAvaialble
	ServerStatusLoadingModel
	ServerStatusNotResponding
	ServerStatusError
)

type ServerStatusResp ¶ added in v0.1.32

type ServerStatusResp struct {
	Status          string `json:"status"`
	SlotsIdle       int    `json:"slots_idle"`
	SlotsProcessing int    `json:"slots_processing"`
	Error           string `json:"error"`
}

type StatusWriter ¶ added in v0.1.32

type StatusWriter struct {
	LastErrMsg string
	// contains filtered or unexported fields
}

StatusWriter is a writer that captures error messages from the llama runner process

func NewStatusWriter ¶ added in v0.1.32

func NewStatusWriter(out *os.File) *StatusWriter

func (*StatusWriter) Write ¶ added in v0.1.32

func (w *StatusWriter) Write(b []byte) (int, error)

type Tensor ¶

type Tensor struct {
	Name   string `json:"name"`
	Kind   uint32 `json:"kind"`
	Offset uint64 `json:"-"`

	// Shape is the number of elements in each dimension
	Shape []uint64 `json:"shape"`

	io.WriterTo `json:"-"`
}

type Tensors ¶ added in v0.1.32

type Tensors []*Tensor

func (Tensors) Layers ¶ added in v0.1.32

func (ts Tensors) Layers() map[string]Layer

type TokenizeRequest ¶

type TokenizeRequest struct {
	Content string `json:"content"`
}

type TokenizeResponse ¶

type TokenizeResponse struct {
	Tokens []int `json:"tokens"`
}

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
generate

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL