go-ngram: github.com/lestrrat/go-ngram Index | Examples | Files

package ngram

import "github.com/lestrrat/go-ngram"



Package Files

index.go ngram.go

type Document Uses

type Document struct {
    // contains filtered or unexported fields

func NewDocument Uses

func NewDocument(id, content string) *Document

func (*Document) Content Uses

func (d *Document) Content() string

func (*Document) Id Uses

func (d *Document) Id() string

type Index Uses

type Index struct {
    // contains filtered or unexported fields


trigram := NewIndex(3)
inputs := []string{
for _, v := range inputs {

// Find strings whose scores are above trigram.GetMinScore()
// (which is by default 0)
matches := trigram.FindSimilarStrings(`...`)
log.Printf("%#v", matches)

// Find 1 best match (the best score) out of similar strings
best := trigram.FindBestMatch(`...`)
log.Printf("%s", best)

// Iterate match results
minScore := 0.5
limit := 0
c := trigram.IterateSimilar(` ... your input ...`, minScore, limit)
for r := range c {
    log.Printf("Item id %s matched with score %d", r.Item.Id(), r.Score)
    log.Printf("Content of Item was %s", r.Item.Content())

func NewIndex Uses

func NewIndex(n int) *Index

func (*Index) AddItem Uses

func (i *Index) AddItem(item IndexItem) error

func (*Index) AddString Uses

func (i *Index) AddString(input string) error

func (*Index) FindBestMatch Uses

func (i *Index) FindBestMatch(input string) string

func (*Index) FindMatchingStrings Uses

func (i *Index) FindMatchingStrings(input string) []string

func (*Index) FindSimilarStrings Uses

func (i *Index) FindSimilarStrings(input string) []string

func (*Index) GetItem Uses

func (i *Index) GetItem(id string) IndexItem

func (*Index) GetItemWithMetadata Uses

func (i *Index) GetItemWithMetadata(id string) *IndexItemWithMetadata

func (*Index) GetMinScore Uses

func (i *Index) GetMinScore() float64

func (*Index) GetWarp Uses

func (i *Index) GetWarp() float64

func (*Index) IterateSimilar Uses

func (i *Index) IterateSimilar(input string, min float64, limit int) <-chan MatchResult

search for similar strings in index, sending the search results to the returned channel. you can specify the minimum score and the maximum items to be fetched

func (*Index) SetMinScore Uses

func (i *Index) SetMinScore(min float64)

func (*Index) SetWarp Uses

func (i *Index) SetWarp(w float64)

type IndexItem Uses

type IndexItem interface {
    Id() string
    Content() string

type IndexItemDepot Uses

type IndexItemDepot map[string]*IndexItemWithMetadata

Map of item id to IndexItemWithMetadata. This object holds the IndexItem itself, as well as the number of ngrams in this document

type IndexItemWithMetadata Uses

type IndexItemWithMetadata struct {
    // contains filtered or unexported fields

type InvertedIndex Uses

type InvertedIndex map[string]map[string]int

type MatchResult Uses

type MatchResult struct {
    Score float64
    Item  IndexItem

type Token Uses

type Token struct {
    // contains filtered or unexported fields

func (*Token) End Uses

func (s *Token) End() int

func (*Token) Start Uses

func (s *Token) Start() int

func (*Token) String Uses

func (s *Token) String() string

type Tokenize Uses

type Tokenize struct {
    // contains filtered or unexported fields


input := `the quick red quick red fox red fox jumps fox jumps over jumps over the over the lazy the lazy brown lazy brown dog`
n := NewTokenize(3, input) // Trigram
for _, s := range n.Tokens() {
    log.Printf("segment = %s", s)

func NewTokenize Uses

func NewTokenize(n int, input string) *Tokenize

func (*Tokenize) N Uses

func (n *Tokenize) N() int

func (*Tokenize) NewToken Uses

func (n *Tokenize) NewToken(start, end int) *Token

A Token holds a pointer to the original tokenizer struct that created it. This is used to create the textual representation of the token (see String()) w/o having to allocate memory for it

func (*Tokenize) Parse Uses

func (n *Tokenize) Parse()

func (*Tokenize) Text Uses

func (n *Tokenize) Text() string

func (*Tokenize) TokenSet Uses

func (n *Tokenize) TokenSet() mapset.Set

func (*Tokenize) Tokens Uses

func (n *Tokenize) Tokens() []*Token

type Tokenizer Uses

type Tokenizer interface {
    Tokens() []*Token
    Text() string
    N() int

Package ngram imports 6 packages (graph). Updated 2018-03-01. Refresh now. Tools for package owners.