rag

package
v0.0.12 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 1, 2024 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Index

Examples

Constants

View Source
const (
	ErrDocAreadyExists = "Document already exists!"
	ErrDocNotExists    = "Document not exists!"
	ErrChunkNotExists  = "Chunk not exists!"
)
View Source
const (
	DefaultChunkSize  = 512
	DefaultMinOverlap = 0.00
	DefaultMaxOverlap = 0.99
)

Variables

This section is empty.

Functions

func CosSim

func CosSim(qembeds, dbembeds []autog.Embedding, qnorms, dbnorms *autog.Embedding, qsi, dsi int, topk int, dbchunks *[]autog.Chunk, channel chan<- []autog.ScoredChunks)

func DotProduct

func DotProduct(a, b []float64) float64

func Norm

func Norm(embed autog.Embedding) float64

func Norms

func Norms(embeds []autog.Embedding) autog.Embedding

Types

type MemChunk

type MemChunk struct {
	Index     int       `json:"Index"`
	Path      string    `json:"Path"`
	Query     string    `json:"Query"`
	Content   string    `json:"Content"`
	ByteStart int       `json:"ByteStart"`
	ByteEnd   int       `json:"ByteEnd"`
	Payload   string    `json:"Payload"`
	Embedding []float64 `json:"Embedding"`
}

func (*MemChunk) GetByteEnd

func (chunk *MemChunk) GetByteEnd() int

func (*MemChunk) GetByteStart

func (chunk *MemChunk) GetByteStart() int

func (*MemChunk) GetContent

func (chunk *MemChunk) GetContent() string

func (*MemChunk) GetEmbedding

func (chunk *MemChunk) GetEmbedding() autog.Embedding

func (*MemChunk) GetIndex

func (chunk *MemChunk) GetIndex() int

func (*MemChunk) GetPath

func (chunk *MemChunk) GetPath() string

func (*MemChunk) GetPayload

func (chunk *MemChunk) GetPayload() interface{}

func (*MemChunk) GetQuery

func (chunk *MemChunk) GetQuery() string

func (*MemChunk) SetByteEnd

func (chunk *MemChunk) SetByteEnd(i int)

func (*MemChunk) SetByteStart

func (chunk *MemChunk) SetByteStart(i int)

func (*MemChunk) SetContent

func (chunk *MemChunk) SetContent(content string)

func (*MemChunk) SetEmbedding

func (chunk *MemChunk) SetEmbedding(embed autog.Embedding)

func (*MemChunk) SetIndex

func (chunk *MemChunk) SetIndex(index int)

func (*MemChunk) SetPath

func (chunk *MemChunk) SetPath(path string)

func (*MemChunk) SetPayload

func (chunk *MemChunk) SetPayload(payload interface{})

func (*MemChunk) SetQuery

func (chunk *MemChunk) SetQuery(query string)

type MemDocument

type MemDocument struct {
	Path    string      `json:"Path"`
	Payload string      `json:"Payload"`
	Chunks  []*MemChunk `json:"Chunks"`
}

func (*MemDocument) GetChunks

func (doc *MemDocument) GetChunks() []autog.Chunk

func (*MemDocument) GetPath

func (doc *MemDocument) GetPath() string

func (*MemDocument) GetPayload

func (doc *MemDocument) GetPayload() interface{}

func (*MemDocument) SetChunks

func (doc *MemDocument) SetChunks(chunks []autog.Chunk)

func (*MemDocument) SetPath

func (doc *MemDocument) SetPath(path string)

func (*MemDocument) SetPayload

func (doc *MemDocument) SetPayload(payload interface{})

type MemDocuments

type MemDocuments []*MemDocument

func (*MemDocuments) Append

func (m *MemDocuments) Append(doc *MemDocument)

type MemoryDatabase

type MemoryDatabase struct {
	PathToDocuments map[string]*MemDocuments
}

func NewMemDatabase

func NewMemDatabase() (*MemoryDatabase, error)

func (*MemoryDatabase) AppendChunks

func (md *MemoryDatabase) AppendChunks(path string, payload interface{}, chunks []autog.Chunk) error

func (*MemoryDatabase) DelDocuments

func (md *MemoryDatabase) DelDocuments(path string) error

func (*MemoryDatabase) GetChunks

func (md *MemoryDatabase) GetChunks() ([]autog.Chunk, []autog.Embedding, error)

func (*MemoryDatabase) GetDocuments

func (md *MemoryDatabase) GetDocuments(path string) (*MemDocuments, error)

func (*MemoryDatabase) GetPathChunks

func (md *MemoryDatabase) GetPathChunks(path string) ([]autog.Chunk, []autog.Embedding, error)

func (*MemoryDatabase) GetPaths

func (md *MemoryDatabase) GetPaths() ([]string, error)

func (*MemoryDatabase) InitDatabase

func (md *MemoryDatabase) InitDatabase() error

func (*MemoryDatabase) SaveChunks

func (md *MemoryDatabase) SaveChunks(path string, payload interface{}, chunks []autog.Chunk) error

func (*MemoryDatabase) SearchChunks

func (md *MemoryDatabase) SearchChunks(path string, embeds []autog.Embedding, topk int) ([]autog.ScoredChunks, error)

type ScoredChunkIndex

type ScoredChunkIndex struct {
	Index int
	Score float64
}

type ScoredChunkIndexs

type ScoredChunkIndexs []ScoredChunkIndex

func (ScoredChunkIndexs) Len

func (s ScoredChunkIndexs) Len() int

func (ScoredChunkIndexs) Less

func (s ScoredChunkIndexs) Less(i, j int) bool

func (ScoredChunkIndexs) Peek

func (s ScoredChunkIndexs) Peek() interface{}

func (*ScoredChunkIndexs) Pop

func (s *ScoredChunkIndexs) Pop() interface{}

func (*ScoredChunkIndexs) Push

func (s *ScoredChunkIndexs) Push(c interface{})

func (ScoredChunkIndexs) Swap

func (s ScoredChunkIndexs) Swap(i, j int)

type TextSplitter

type TextSplitter struct {
	ChunkSize       int
	Overlap         float64
	BreakStartChars []rune
	BreakEndChars   []rune
}
Example
package main

import (
	"fmt"
	"github.com/autogorg/autog/rag"
)

var text string = `<aaa> aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa </aaa>
{ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa }
<bbb> bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb </bbb>
{ bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb }
`

func main() {
	splitter := &rag.TextSplitter{
		ChunkSize:       80,
		Overlap:         0.25,
		BreakStartChars: []rune{'<', '{'},
		BreakEndChars:   []rune{'>', '}'},
	}

	parser := splitter.GetParser()
	chunks, err := parser("/doc", text)
	if err != nil {
		fmt.Println(err)
	}
	for i, chunk := range chunks {
		fmt.Printf("%d ->\n%s\n", i, chunk.GetContent())
	}

}
Output:

0 ->
<aaa> aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa </aaa>
{ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa }
<bbb>
1 ->
</aaa>
{ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa }
<bbb> bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb </bbb>
2 ->
<bbb> bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb </bbb>
{ bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb }
3 ->
</bbb>
{ bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb }

func NewTextSplitter

func NewTextSplitter(chunkSize int) *TextSplitter

func (*TextSplitter) GetParser

func (ts *TextSplitter) GetParser() autog.ParserFunction

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL