mecab

package module
v0.0.8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 2, 2024 License: MIT Imports: 5 Imported by: 13

README

go-mecab

test PkgGoDev

go-mecab is MeCab binding for Golang.

SYNOPSIS

import "github.com/shogo82148/go-mecab"

tagger, err := mecab.New(map[string]string{"output-format-type": "wakati"})
defer tagger.Destroy()
result, err := tagger.Parse("こんにちは世界")
fmt.Println(result)
// Output: こんにちは 世界

INSTALL

You need to tell Go where MeCab has been installed.

$ export CGO_LDFLAGS="-L/path/to/lib -lmecab -lstdc++"
$ export CGO_CFLAGS="-I/path/to/include"
$ go get github.com/shogo82148/go-mecab

If you installed mecab-config, execute following comands.

$ export CGO_LDFLAGS="`mecab-config --libs`"
$ export CGO_CFLAGS="-I`mecab-config --inc-dir`"
$ go get github.com/shogo82148/go-mecab

SEE ALSO

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Error added in v0.0.4

type Error struct {
	// contains filtered or unexported fields
}

Error is an error of MeCab.

func (*Error) Error added in v0.0.4

func (e *Error) Error() string

type Lattice

type Lattice struct {
	// contains filtered or unexported fields
}

Lattice is a lattice.

func NewLattice

func NewLattice() (Lattice, error)

NewLattice creates new lattice.

func (Lattice) AddRequestType added in v0.0.8

func (l Lattice) AddRequestType(t RequestType)

AddRequestType adds the request type.

func (Lattice) BOSNode

func (l Lattice) BOSNode() Node

BOSNode returns the Begin Of Sentence node.

func (Lattice) Clear

func (l Lattice) Clear()

Clear set empty string to the lattice.

func (Lattice) Destroy

func (l Lattice) Destroy()

Destroy frees the lattice.

func (Lattice) EOSNode

func (l Lattice) EOSNode() Node

EOSNode returns the End Of Sentence node.

func (Lattice) IsAvailable

func (l Lattice) IsAvailable() bool

IsAvailable returns the lattice is available.

func (Lattice) Next added in v0.0.8

func (l Lattice) Next() bool

Next obtains next-best result. The internal linked list structure is updated. You should set RequestTypeNBest in advance. Return false if no more results are available or RequestType is invalid.

func (Lattice) RequestType added in v0.0.8

func (l Lattice) RequestType() RequestType

RequestType returns the request type.

func (Lattice) Sentence

func (l Lattice) Sentence() string

Sentence returns the sentence in the lattice.

func (Lattice) SetRequestType added in v0.0.8

func (l Lattice) SetRequestType(t RequestType)

SetRequestType sets the request type.

func (Lattice) SetSentence

func (l Lattice) SetSentence(s string)

SetSentence set the sentence in the lattice.

func (Lattice) String

func (l Lattice) String() string

type MeCab

type MeCab struct {
	// contains filtered or unexported fields
}

MeCab is a morphological parser.

func New

func New(args map[string]string) (MeCab, error)

New returns new MeCab parser.

func (MeCab) Destroy

func (m MeCab) Destroy()

Destroy frees the MeCab parser.

func (MeCab) Error

func (m MeCab) Error() error

Error returns the error of MeCab.

func (MeCab) Parse

func (m MeCab) Parse(s string) (string, error)

Parse parses the string and returns the result as string. Parse is not safe for concurrent use by multiple goroutines.

Example
options := map[string]string{}
if path := os.Getenv("MECABRC_PATH"); path != "" {
	options["rcfile"] = path
}

tagger, err := mecab.New(options)
if err != nil {
	panic(err)
}
defer tagger.Destroy()

result, err := tagger.Parse("こんにちは世界")
if err != nil {
	panic(err)
}
fmt.Println(result)
Output:

こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世界	名詞,一般,*,*,*,*,世界,セカイ,セカイ
EOS

func (MeCab) ParseLattice

func (m MeCab) ParseLattice(lattice Lattice) error

ParseLattice parses the lattice and returns the result as string. ParseLattice is safe for concurrent use by multiple goroutines. Create a lattice for each goroutine.

Example
options := map[string]string{}
if path := os.Getenv("MECABRC_PATH"); path != "" {
	options["rcfile"] = path
}

tagger, err := mecab.New(options)
if err != nil {
	panic(err)
}
defer tagger.Destroy()

lattice, err := mecab.NewLattice()
if err != nil {
	panic(err)
}

lattice.SetSentence("こんにちは世界")
err = tagger.ParseLattice(lattice)
if err != nil {
	panic(err)
}
fmt.Println(lattice.String())
Output:

こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世界	名詞,一般,*,*,*,*,世界,セカイ,セカイ
EOS
Example (NBest)
options := map[string]string{}
if path := os.Getenv("MECABRC_PATH"); path != "" {
	options["rcfile"] = path
}

tagger, err := mecab.New(options)
if err != nil {
	panic(err)
}
defer tagger.Destroy()

lattice, err := mecab.NewLattice()
if err != nil {
	panic(err)
}

lattice.SetSentence("こんにちは世界")
lattice.AddRequestType(mecab.RequestTypeNBest)
err = tagger.ParseLattice(lattice)
if err != nil {
	panic(err)
}
for i := 0; i < 5; i++ {
	fmt.Println(lattice.String())
	if !lattice.Next() {
		break
	}
}
Output:

こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世界	名詞,一般,*,*,*,*,世界,セカイ,セカイ
EOS

こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世界	名詞,一般,*,*,*,*,世界,セカイ,セカイ
EOS

こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世	名詞,一般,*,*,*,*,世,ヨ,ヨ
界	名詞,接尾,一般,*,*,*,界,カイ,カイ
EOS

こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世	名詞,一般,*,*,*,*,世,ヨ,ヨ
界	名詞,固有名詞,地域,一般,*,*,界,サカイ,サカイ
EOS

こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世	名詞,接尾,助数詞,*,*,*,世,セイ,セイ
界	名詞,接尾,一般,*,*,*,界,カイ,カイ
EOS

func (MeCab) ParseToNode

func (m MeCab) ParseToNode(s string) (Node, error)

ParseToNode parses the string and returns the result as Node. ParseToNode is not safe for concurrent use by multiple goroutines.

Example
options := map[string]string{}
if path := os.Getenv("MECABRC_PATH"); path != "" {
	options["rcfile"] = path
}

tagger, err := mecab.New(options)
if err != nil {
	panic(err)
}
defer tagger.Destroy()

// XXX: avoid GC problem with MeCab 0.996 (see https://github.com/taku910/mecab/pull/24)
tagger.Parse("")

node, err := tagger.ParseToNode("こんにちは世界")
if err != nil {
	panic(err)
}

for ; !node.IsZero(); node = node.Next() {
	fmt.Printf("%s\t%s\n", node.Surface(), node.Feature())
}
Output:

	BOS/EOS,*,*,*,*,*,*,*,*
こんにちは	感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
世界	名詞,一般,*,*,*,*,世界,セカイ,セカイ
	BOS/EOS,*,*,*,*,*,*,*,*

func (MeCab) ParseToString

func (m MeCab) ParseToString(s string) (string, error)

ParseToString is alias of [Parse]. ParseToString is not safe for concurrent use by multiple goroutines.

type Model

type Model struct {
	// contains filtered or unexported fields
}

Model is a dictionary model of MeCab.

func NewModel

func NewModel(args map[string]string) (Model, error)

NewModel returns a new model.

func (Model) Destroy

func (m Model) Destroy()

Destroy frees the model.

func (Model) NewLattice

func (m Model) NewLattice() (Lattice, error)

NewLattice returns a new lattice.

func (Model) NewMeCab

func (m Model) NewMeCab() (MeCab, error)

NewMeCab returns a new mecab.

func (Model) Swap

func (m Model) Swap(m2 Model) error

Swap replaces the model by the other model.

type Node

type Node struct {
	// contains filtered or unexported fields
}

Node is a node in a lattice.

func (Node) Alpha

func (node Node) Alpha() float32

Alpha returns the forward accumulative log summation.

func (Node) BNext

func (node Node) BNext() Node

BNext returns a node which begins same position

func (Node) Beta

func (node Node) Beta() float32

Beta returns the backward accumulative log summation.

func (Node) CharType

func (node Node) CharType() int

CharType returns the character type.

func (Node) Cost

func (node Node) Cost() int

Cost returns the best accumulative cost from bos node to this node.

func (Node) ENext

func (node Node) ENext() Node

ENext returns a node which ends same position

func (Node) Feature

func (node Node) Feature() string

Feature returns the feature.

func (Node) ID

func (node Node) ID() int

ID returns the id of Node.

func (Node) IsBest

func (node Node) IsBest() bool

IsBest returns that if the Node is the best solution.

func (Node) IsZero

func (node Node) IsZero() bool

IsZero returns whether the node is zero.

func (Node) LCAttr

func (node Node) LCAttr() int

LCAttr returns the right context attribute.

func (Node) Length

func (node Node) Length() int

Length returns the length of the surface string.

func (Node) Next

func (node Node) Next() Node

Next returns the next Node.

func (Node) PosID added in v0.0.6

func (node Node) PosID() int

PosID returns the part-of-speech id.

func (Node) Prev

func (node Node) Prev() Node

Prev returns the previous Node.

func (Node) Prob

func (node Node) Prob() float32

Prob returns the marginal probability.

func (Node) RCAttr

func (node Node) RCAttr() int

RCAttr returns the right context attribute.

func (Node) RLength

func (node Node) RLength() int

RLength returns the length of the surface string including white space before the morph.

func (Node) Stat

func (node Node) Stat() NodeStat

Stat returns the type of Node.

func (Node) String

func (node Node) String() string

String returns Surface and Feature

func (Node) Surface

func (node Node) Surface() string

Surface returns the surface string.

func (Node) WCost

func (node Node) WCost() int

WCost returns word cost.

type NodeStat

type NodeStat int

NodeStat is status of a node.

const (
	// NormalNode is status for normal node.
	NormalNode NodeStat = 0

	// UnknownNode is status for unknown node.
	UnknownNode NodeStat = 1

	// BOSNode is status for BOS(Begin Of Sentence) node.
	BOSNode NodeStat = 2

	// EOSNode is status for EOS(End Of Sentence) node.
	EOSNode NodeStat = 3

	// EONNode is status for EON(End Of Node) node.
	EONNode NodeStat = 4
)

func (NodeStat) String

func (stat NodeStat) String() string

type RequestType added in v0.0.8

type RequestType int

RequestType is a request type.

const (
	// RequestTypeOneBest is a request type for one best result.
	RequestTypeOneBest RequestType = 1

	// RequestTypeNBest is a request type for N-best results.
	RequestTypeNBest RequestType = 2

	// RequestTypePartial enables a partial parsing mode.
	// When this flag is set, the input |sentence| needs to be written
	// in partial parsing format.
	RequestTypePartial RequestType = 4

	// RequestTypeMarginalProb is a request type for marginal probability.
	// Set this flag if you want to obtain marginal probabilities.
	// Marginal probability is set in [Node.Prob].
	// The parsing speed will get 3-5 times slower than the default mode.
	RequestTypeMarginalProb RequestType = 8

	// RequestTypeMorphsToNBest is a request type for alternative results.
	// Set this flag if you want to obtain alternative results.
	// Not implemented.
	RequestTypeAlternative RequestType = 16

	// RequestTypeAllMorphs is a request type for all morphs.
	RequestTypeAllMorphs RequestType = 32

	// RequestTypeAllocateSentence is a request type for allocating sentence.
	// When this flag is set, tagger internally copies the body of passed
	// sentence into internal buffer.
	RequestTypeAllocateSentence RequestType = 64
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL