simhash

package module
v0.0.0-...-c97885a Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 22, 2020 License: MIT Imports: 5 Imported by: 0

README

simhash

Go implementation of simhash algoritim

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Compare

func Compare(a, b uint64) int

Compare calculates the Hamming distance between two 64-bit integers.

func Fingerprint

func Fingerprint(v Vector) uint64

func ShingleBytes

func ShingleBytes(a [][]byte, k int) [][]byte

------------------------------------------------------------------------------

func ShingleStrings

func ShingleStrings(a []string, k int) []string

func Simhash

func Simhash(fs Features) uint64

func Words

func Words(text string) []string

Words splits the string words

Types

type BytesSlice

type BytesSlice [][]byte

BytesSlice

func (BytesSlice) Bytes

func (p BytesSlice) Bytes(i int) []byte

func (BytesSlice) Len

func (p BytesSlice) Len() int

func (BytesSlice) Weight

func (p BytesSlice) Weight(i int) int

type Feature

type Feature struct {
	Hash   uint64
	Weight int
}

func FeaturesByBytesSlices

func FeaturesByBytesSlices(vs [][]byte) []Feature

func FeaturesByStrings

func FeaturesByStrings(vs []string) []Feature

type Features

type Features interface {
	Len() int           // number of features
	Bytes(i int) []byte // bytes of i-th feature
	Weight(i int) int   // weight of i-th feature
}

type RuneSlice

type RuneSlice []rune

RuneSlice

func (RuneSlice) Bytes

func (p RuneSlice) Bytes(i int) []byte

func (RuneSlice) Len

func (p RuneSlice) Len() int

func (RuneSlice) Weight

func (p RuneSlice) Weight(i int) int

type ShinglingRunes

type ShinglingRunes struct {
	Runes []rune
	K     int
}

------------------------------------------------------------------------------

func (ShinglingRunes) Bytes

func (p ShinglingRunes) Bytes(i int) []byte

func (ShinglingRunes) Len

func (p ShinglingRunes) Len() int

func (ShinglingRunes) Weight

func (p ShinglingRunes) Weight(i int) int

type ShinglingStrings

type ShinglingStrings struct {
	Strings []string
	K       int
}

------------------------------------------------------------------------------

func (ShinglingStrings) Bytes

func (p ShinglingStrings) Bytes(i int) []byte

func (ShinglingStrings) Len

func (p ShinglingStrings) Len() int

func (ShinglingStrings) Weight

func (p ShinglingStrings) Weight(i int) int

type StringSlice

type StringSlice []string

StringSlice

func (StringSlice) Bytes

func (p StringSlice) Bytes(i int) []byte

func (StringSlice) Len

func (p StringSlice) Len() int

func (StringSlice) Weight

func (p StringSlice) Weight(i int) int

type Vector

type Vector [64]int

func Vectorize

func Vectorize(fs Features) Vector

func VectorizeBytes

func VectorizeBytes(bs [][]byte) Vector

func VectorizeFeatures

func VectorizeFeatures(fs []Feature) Vector

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL