jbleve

package module
v1.0.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 1, 2020 License: MIT Imports: 4 Imported by: 0

README

GoJieba Bleve support

License GoDoc Go Report Card

Intro

GoJieba bleve support mod

this repo exists because the original mod has
removed bleve support

Get the mod

go get github.com/ttys3/gojieba-bleve

Usage

import it like this to register the gojieba Tokenizer and Analyzer

_ "github.com/ttys3/gojieba-bleve"

please see bleve_test.go

Documentation

Overview

Example
INDEX_DIR := "gojieba.bleve"
messages := []struct {
	Id   string
	Body string
}{
	{
		Id:   "1",
		Body: "你好",
	},
	{
		Id:   "2",
		Body: "交代",
	},
	{
		Id:   "3",
		Body: "长江大桥",
	},
}

indexMapping := bleve.NewIndexMapping()
os.RemoveAll(INDEX_DIR)
// clean index when example finished
defer os.RemoveAll(INDEX_DIR)

err := indexMapping.AddCustomTokenizer("gojieba",
	map[string]interface{}{
		"dictpath":     gojieba.DICT_PATH,
		"hmmpath":      gojieba.HMM_PATH,
		"userdictpath": gojieba.USER_DICT_PATH,
		"idf":          gojieba.IDF_PATH,
		"stop_words":   gojieba.STOP_WORDS_PATH,
		"type":         "gojieba",
	},
)
if err != nil {
	panic(err)
}
err = indexMapping.AddCustomAnalyzer("gojieba",
	map[string]interface{}{
		"type":      "gojieba",
		"tokenizer": "gojieba",
	},
)
if err != nil {
	panic(err)
}
indexMapping.DefaultAnalyzer = "gojieba"

index, err := bleve.New(INDEX_DIR, indexMapping)
if err != nil {
	panic(err)
}
for _, msg := range messages {
	if err := index.Index(msg.Id, msg); err != nil {
		panic(err)
	}
}

querys := []string{
	"你好世界",
	"亲口交代",
	"长江",
}

for _, q := range querys {
	req := bleve.NewSearchRequest(bleve.NewQueryStringQuery(q))
	req.Highlight = bleve.NewHighlight()
	res, err := index.Search(req)
	if err != nil {
		panic(err)
	}
	fmt.Println(prettify(res))
}

//cleanup cgo allocated heap memory
if jieba, ok := (index.Mapping().AnalyzerNamed("gojieba").Tokenizer).(*JiebaTokenizer); !ok {
	panic("jieba.Free() failed")
} else {
	jieba.Free()
}
index.Close()
Output:

[{"id":"1","score":0.27650412875470115}]
[{"id":"2","score":0.27650412875470115}]
[{"id":"3","score":0.7027325540540822}]

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	DefaultTokenizerConfig = map[string]interface{}{
		"dictpath":     gojieba.DICT_PATH,
		"hmmpath":      gojieba.HMM_PATH,
		"userdictpath": gojieba.USER_DICT_PATH,
		"idf":          gojieba.IDF_PATH,
		"stop_words":   gojieba.STOP_WORDS_PATH,
		"type":         "gojieba",
	}
	DefaultAnalyzerConfig = map[string]interface{}{
		"type":      "gojieba",
		"tokenizer": "gojieba",
	}
)

Functions

This section is empty.

Types

type JiebaAnalyzer

type JiebaAnalyzer struct {
}

type JiebaTokenizer

type JiebaTokenizer struct {
	// contains filtered or unexported fields
}

func NewJiebaTokenizer

func NewJiebaTokenizer(dictpath, hmmpath, userdictpath, idf, stop_words string) *JiebaTokenizer

func (*JiebaTokenizer) Free

func (x *JiebaTokenizer) Free()

func (*JiebaTokenizer) Tokenize

func (x *JiebaTokenizer) Tokenize(sentence []byte) analysis.TokenStream

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL