mmsego

package module
v0.0.0-...-38f37e9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 18, 2012 License: Apache-2.0 Imports: 3 Imported by: 0

README

MMSEGO

This is a GO implementation of MMSEG which a Chinese word splitting algorithm.

TO DO list

  • Documentation/comments
  • Benchmark

Usage

#Input Dictionary Format

Key\tFreq

Each key occupies one line. The file should be utf-8 encoded, please refer to go-darts

#Code example

package main

import (
    "fmt"
    "time"
    "os"
    "mmsego"
    "bufio"
    "log"
    )

func main() {
    var s = new(mmsego.Segmenter)
    s.Init("darts.lib")
    if err != nil {
	log.Fatal(err)
    }

    t := time.Now()
    offset := 0

    unifile, _ := os.Open("/tmp/a.txt")
    uniLineReader := bufio.NewReaderSize(unifile, 4000)
    line, bufErr := uniLineReader.ReadString('\n')
    for nil == bufErr {
	//takeWord := func(off int, length int){ fmt.Printf("%s ", string(line[off-offset:off-offset+length])) }
	takeWord := func(off, length int){ }
	s.Mmseg(line[:], offset, takeWord, nil, false)
	offset += len(line)
	line, bufErr = uniLineReader.ReadString('\n')
    }
    takeWord := func(off int, length int){ fmt.Printf("%s ", string(line[off-offset:off-offset+length])) }
    s.Mmseg(line, offset, takeWord, nil, true)

    fmt.Printf("Duration: %v\n", time.Since(t))
}

LICENSE

Apache License 2.0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Segmenter

type Segmenter struct {
	// contains filtered or unexported fields
}

func (*Segmenter) Init

func (s *Segmenter) Init(dictPath string)

func (*Segmenter) Mmseg

func (s *Segmenter) Mmseg(inString string, initOffset int, takeWord func(int, int), takeThesaurus func(int, int), lastPiece bool) (lstPos int)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL