paraphrase

package
v0.0.0-...-9825f04 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 4, 2017 License: MIT Imports: 25 Imported by: 0

Documentation

Overview

Copyright 2017 Joseph Lewis III <joseph@josephlewis.net> Licensed under the MIT License. See LICENSE file for full details.

Index

Constants

View Source
const (
	DbExt                  = ".ppdb"
	DbName                 = "paraphrasedb.ppdb"
	DocumentBucket         = "documents"
	IndexBucket            = "index"
	SettingsBucket         = "settings"
	FileBucket             = "files"
	MinIndex               = "00000000000000000000"
	MaxIndex               = "99999999999999999999"
	CurrentSettingsVersion = 1 // the version of the settings file, won't match the version of paraphrase

)

Variables

View Source
var (
	SettingsNotDefinedErr = errors.New("No settings found. If you meant to create a database run 'paraphrase init'")
	AlreadyInitializedErr = errors.New("It looks like paraphrase has already been initialized.")
	ImportErr             = errors.New("Errors encountered while importing documents")
	DatabaseDNEErr        = errors.New("It looks like the database does not exist, try running paraphrase init to create it")
)

Functions

func FindDbPath

func FindDbPath(directory string) string

func FormatDocuments

func FormatDocuments(w io.Writer, docs []Document, templateFormat string, shortSha bool, db *ParaphraseDb)

Writes the documents in fashion suitable for displaying on-screen

func FormatSearchResults

func FormatSearchResults(w io.Writer, docs []SearchResult, templateFormat string, db *ParaphraseDb)

func GlobToRegexStr

func GlobToRegexStr(glob string) string

GlobToRegexStr converts a basic glob string to a regex e.g. "foo*bar.java" to "^foo.*bar\.java$" everything that isn't a * gets escaped

func NewDocument

func NewDocument(path, namespace string, body []byte) (*Document, *DocumentData)

func RenderDocument

func RenderDocument(templateFormat string, doc *Document, db *ParaphraseDb, extraFuncs template.FuncMap) error

func WriteDocuments

func WriteDocuments(w io.Writer, docs []Document, shortSha bool)

Writes the documents in fashion suitable for displaying on-screen

Types

type BucketSet

type BucketSet map[uint64]float64

func NewBucketSet

func NewBucketSet() BucketSet

func (BucketSet) AddAll

func (hs BucketSet) AddAll(elements []uint64) BucketSet

func (BucketSet) GetOrDefault

func (hs BucketSet) GetOrDefault(key uint64, defaultVal float64) float64

func (BucketSet) Intersect

func (hs BucketSet) Intersect(other BucketSet) BucketSet

func (BucketSet) Mult

func (hs BucketSet) Mult(other BucketSet) BucketSet

func (BucketSet) OverlapProportion

func (hs BucketSet) OverlapProportion(other BucketSet) float64

func (BucketSet) Sum

func (hs BucketSet) Sum() float64

func (BucketSet) TfIdf

func (hs BucketSet) TfIdf() float64

func (BucketSet) Union

func (hs BucketSet) Union(other BucketSet) BucketSet

type ChangeLogEntry

type ChangeLogEntry struct {
	Id     int `storm:"id,increment"`
	User   string
	Date   time.Time
	Change string
}

type Document

type Document struct {
	Id        int64 `storm:"id,unique"`
	Path      string
	Namespace string
	IndexDate time.Time
	Sha1      string `storm:"index"`
	Hashes    TermCountVector
}

func (*Document) NormalizedTermFrequency

func (d *Document) NormalizedTermFrequency() linalg.IFVector

type DocumentData

type DocumentData struct {
	Id        int64 `storm:"id,unique"`
	Path      string
	Namespace string
	IndexDate time.Time
	Body      []byte
}

func NewDocumentData

func NewDocumentData(doc *Document, body []byte) *DocumentData

func (*DocumentData) BodySha1

func (dd *DocumentData) BodySha1() string

type Fingerprint

type Fingerprint uint64

type IndexEntry

type IndexEntry struct {
	Hash      uint64 `storm:"id,index"`
	Doc       int64
	Frequency int16
}

type ParaphraseDb

type ParaphraseDb struct {
	// contains filtered or unexported fields
}

func Create

func Create(directory string, settings Settings) (*ParaphraseDb, error)

Creates a new database in the given directory with the given settings

func Open

func Open(directory string) (*ParaphraseDb, error)

Open or create a new paraphrase database in the given directory

func (*ParaphraseDb) AddDocuments

func (p *ParaphraseDb) AddDocuments(producer provider.DocumentProducer) (added []Document, ok bool)

func (*ParaphraseDb) Close

func (p *ParaphraseDb) Close() error

func (*ParaphraseDb) CountDocuments

func (p *ParaphraseDb) CountDocuments() (int, error)

func (*ParaphraseDb) CreateDocument

func (p *ParaphraseDb) CreateDocument(path, namespace string, body []byte) (*Document, error)

func (*ParaphraseDb) FindDocumentById

func (p *ParaphraseDb) FindDocumentById(id int64) (*Document, error)

func (*ParaphraseDb) FindDocumentDataById

func (p *ParaphraseDb) FindDocumentDataById(id int64) (*DocumentData, error)

func (*ParaphraseDb) FindDocumentsBySha1

func (p *ParaphraseDb) FindDocumentsBySha1(sha1 string) (results []Document, err error)

func (*ParaphraseDb) FindDocumentsLike

func (p *ParaphraseDb) FindDocumentsLike(query Document) (results []Document, err error)

FindDocumentsLike finds documents like the one given. * Ids are matched exactly, * SHA1s are matched as a prefix (you can give the n characters only) * Namespaces are searched like globs * Paths are searched like globs

func (*ParaphraseDb) GetSettings

func (p *ParaphraseDb) GetSettings() Settings

func (*ParaphraseDb) ImportDocumentsMatching

func (p *ParaphraseDb) ImportDocumentsMatching(from *ParaphraseDb, query Document) error

func (*ParaphraseDb) QueryById

func (p *ParaphraseDb) QueryById(id int64) (results []SearchResult, err error)

func (*ParaphraseDb) QueryByString

func (p *ParaphraseDb) QueryByString(query string) (results []SearchResult, err error)

func (*ParaphraseDb) QueryByVector

func (p *ParaphraseDb) QueryByVector(query TermCountVector) (results []SearchResult, err error)

func (*ParaphraseDb) WinnowData

func (p *ParaphraseDb) WinnowData(bytes []byte) (TermCountVector, error)

func (*ParaphraseDb) WriteChanges

func (p *ParaphraseDb) WriteChanges(writer io.Writer)

func (*ParaphraseDb) WriteStats

func (p *ParaphraseDb) WriteStats(writer io.Writer)

Write information about Paraphrase and the database to an output. Output format _may change without warning_.

type SearchResult

type SearchResult struct {
	Query *TermCountVector
	Doc   *Document
	// contains filtered or unexported fields
}

func (*SearchResult) Similarity

func (sr *SearchResult) Similarity() float64

type Settings

type Settings struct {
	Version         int `storm:"id,unique"`
	WindowSize      int
	FingerprintSize int
	RobustHash      bool
	CreatedAt       time.Time
}

func NewDefaultSettings

func NewDefaultSettings() Settings

type TermCountVector

type TermCountVector map[uint64]int16

func (TermCountVector) NormalizedTermFrequency

func (vec TermCountVector) NormalizedTermFrequency() linalg.IFVector

Directories

Path Synopsis
Package snappyjson implements snappy compression of JSON objects for StormDB Package snappyjson implements snappy compression of JSON objects for StormDB
Package snappyjson implements snappy compression of JSON objects for StormDB Package snappyjson implements snappy compression of JSON objects for StormDB

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL