ngram

package module
v0.0.0-...-4489dcc Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 7, 2014 License: MIT Imports: 6 Imported by: 0

README

go-ngram

Ngram for golang

Build Status

API Docs

See godoc

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Document

type Document struct {
	// contains filtered or unexported fields
}

func NewDocument

func NewDocument(id, content string) *Document

func (*Document) Content

func (d *Document) Content() string

func (*Document) Id

func (d *Document) Id() string

type Index

type Index struct {
	// contains filtered or unexported fields
}
Example
trigram := NewIndex(3)
inputs := []string{
	`...`,
	`...`,
	`...`,
}
for _, v := range inputs {
	trigram.AddString(v)
}

// Find strings whose scores are above trigram.GetMinScore()
// (which is by default 0)
matches := trigram.FindSimilarStrings(`...`)
log.Printf("%#v", matches)

// Find 1 best match (the best score) out of similar strings
best := trigram.FindBestMatch(`...`)
log.Printf("%s", best)

// Iterate match results
minScore := 0.5
limit := 0
c := trigram.IterateSimilar(` ... your input ...`, minScore, limit)
for r := range c {
	log.Printf("Item id %s matched with score %d", r.Item.Id(), r.Score)
	log.Printf("Content of Item was %s", r.Item.Content())
}
Output:

func NewIndex

func NewIndex(n int) *Index

func (*Index) AddItem

func (i *Index) AddItem(item IndexItem) error

func (*Index) AddString

func (i *Index) AddString(input string) error

func (*Index) FindBestMatch

func (i *Index) FindBestMatch(input string) string

func (*Index) FindMatchingStrings

func (i *Index) FindMatchingStrings(input string) []string

func (*Index) FindSimilarStrings

func (i *Index) FindSimilarStrings(input string) []string

func (*Index) GetItem

func (i *Index) GetItem(id string) IndexItem

func (*Index) GetItemWithMetadata

func (i *Index) GetItemWithMetadata(id string) *IndexItemWithMetadata

func (*Index) GetMinScore

func (i *Index) GetMinScore() float64

func (*Index) GetWarp

func (i *Index) GetWarp() float64

func (*Index) IterateSimilar

func (i *Index) IterateSimilar(input string, min float64, limit int) <-chan MatchResult

search for similar strings in index, sending the search results to the returned channel. you can specify the minimum score and the maximum items to be fetched

func (*Index) SetMinScore

func (i *Index) SetMinScore(min float64)

func (*Index) SetWarp

func (i *Index) SetWarp(w float64)

type IndexItem

type IndexItem interface {
	Id() string
	Content() string
}

type IndexItemDepot

type IndexItemDepot map[string]*IndexItemWithMetadata

Map of item id to IndexItemWithMetadata. This object holds the IndexItem itself, as well as the number of ngrams in this document

type IndexItemWithMetadata

type IndexItemWithMetadata struct {
	// contains filtered or unexported fields
}

type InvertedIndex

type InvertedIndex map[string]map[string]int

type MatchResult

type MatchResult struct {
	Score float64
	Item  IndexItem
}

type Token

type Token struct {
	// contains filtered or unexported fields
}

func (*Token) End

func (s *Token) End() int

func (*Token) Start

func (s *Token) Start() int

func (*Token) String

func (s *Token) String() string

type Tokenize

type Tokenize struct {
	// contains filtered or unexported fields
}
Example
input := `the quick red quick red fox red fox jumps fox jumps over jumps over the over the lazy the lazy brown lazy brown dog`
n := NewTokenize(3, input) // Trigram
for _, s := range n.Tokens() {
	log.Printf("segment = %s", s)
}
Output:

func NewTokenize

func NewTokenize(n int, input string) *Tokenize

func (*Tokenize) N

func (n *Tokenize) N() int

func (*Tokenize) NewToken

func (n *Tokenize) NewToken(start, end int) *Token

A Token holds a pointer to the original tokenizer struct that created it. This is used to create the textual representation of the token (see String()) w/o having to allocate memory for it

func (*Tokenize) Parse

func (n *Tokenize) Parse()

func (*Tokenize) Text

func (n *Tokenize) Text() string

func (*Tokenize) TokenSet

func (n *Tokenize) TokenSet() mapset.Set

func (*Tokenize) Tokens

func (n *Tokenize) Tokens() []*Token

type Tokenizer

type Tokenizer interface {
	Tokenize()
	Tokens() []*Token
	Text() string
	N() int
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL