corpus

package
v0.1.7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 28, 2023 License: GPL-3.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var DefaultStoplist = &Stoplist{
	"'ll":             true,
	"'ve":             true,
	"I":               true,
	"a":               true,
	"a's":             true,
	"able":            true,
	"about":           true,
	"above":           true,
	"abroad":          true,
	"abst":            true,
	"accordance":      true,
	"according":       true,
	"accordingly":     true,
	"across":          true,
	"act":             true,
	"actually":        true,
	"added":           true,
	"adj":             true,
	"adopted":         true,
	"affected":        true,
	"affecting":       true,
	"affects":         true,
	"after":           true,
	"afterwards":      true,
	"again":           true,
	"against":         true,
	"ago":             true,
	"ah":              true,
	"ahead":           true,
	"ain't":           true,
	"all":             true,
	"allow":           true,
	"allows":          true,
	"almost":          true,
	"alone":           true,
	"along":           true,
	"alongside":       true,
	"already":         true,
	"also":            true,
	"although":        true,
	"always":          true,
	"am":              true,
	"amid":            true,
	"amidst":          true,
	"among":           true,
	"amongst":         true,
	"amount":          true,
	"an":              true,
	"and":             true,
	"announce":        true,
	"another":         true,
	"any":             true,
	"anybody":         true,
	"anyhow":          true,
	"anymore":         true,
	"anyone":          true,
	"anything":        true,
	"anyway":          true,
	"anyways":         true,
	"anywhere":        true,
	"apart":           true,
	"apparently":      true,
	"appear":          true,
	"appreciate":      true,
	"appropriate":     true,
	"approximately":   true,
	"are":             true,
	"aren":            true,
	"aren't":          true,
	"arent":           true,
	"arise":           true,
	"around":          true,
	"as":              true,
	"aside":           true,
	"ask":             true,
	"asking":          true,
	"associated":      true,
	"at":              true,
	"auth":            true,
	"available":       true,
	"away":            true,
	"awfully":         true,
	"b":               true,
	"back":            true,
	"backward":        true,
	"backwards":       true,
	"be":              true,
	"became":          true,
	"because":         true,
	"become":          true,
	"becomes":         true,
	"becoming":        true,
	"been":            true,
	"before":          true,
	"beforehand":      true,
	"begin":           true,
	"beginning":       true,
	"beginnings":      true,
	"begins":          true,
	"behind":          true,
	"being":           true,
	"believe":         true,
	"below":           true,
	"beside":          true,
	"besides":         true,
	"best":            true,
	"better":          true,
	"between":         true,
	"beyond":          true,
	"bill":            true,
	"biol":            true,
	"both":            true,
	"bottom":          true,
	"brief":           true,
	"briefly":         true,
	"but":             true,
	"by":              true,
	"c":               true,
	"c'mon":           true,
	"c's":             true,
	"ca":              true,
	"call":            true,
	"came":            true,
	"can":             true,
	"can't":           true,
	"cannot":          true,
	"cant":            true,
	"caption":         true,
	"cause":           true,
	"causes":          true,
	"certain":         true,
	"certainly":       true,
	"changes":         true,
	"clearly":         true,
	"co":              true,
	"co.":             true,
	"com":             true,
	"come":            true,
	"comes":           true,
	"computer":        true,
	"con":             true,
	"concerning":      true,
	"consequently":    true,
	"consider":        true,
	"considering":     true,
	"contain":         true,
	"containing":      true,
	"contains":        true,
	"corresponding":   true,
	"could":           true,
	"couldn't":        true,
	"couldnt":         true,
	"course":          true,
	"cry":             true,
	"currently":       true,
	"d":               true,
	"dare":            true,
	"daren't":         true,
	"date":            true,
	"de":              true,
	"definitely":      true,
	"describe":        true,
	"described":       true,
	"despite":         true,
	"detail":          true,
	"did":             true,
	"didn't":          true,
	"different":       true,
	"directly":        true,
	"do":              true,
	"does":            true,
	"doesn't":         true,
	"doing":           true,
	"don't":           true,
	"done":            true,
	"down":            true,
	"downwards":       true,
	"due":             true,
	"during":          true,
	"e":               true,
	"each":            true,
	"ed":              true,
	"edu":             true,
	"effect":          true,
	"eg":              true,
	"eight":           true,
	"eighty":          true,
	"either":          true,
	"eleven":          true,
	"else":            true,
	"elsewhere":       true,
	"empty":           true,
	"end":             true,
	"ending":          true,
	"enough":          true,
	"entirely":        true,
	"especially":      true,
	"et":              true,
	"et-al":           true,
	"etc":             true,
	"even":            true,
	"ever":            true,
	"evermore":        true,
	"every":           true,
	"everybody":       true,
	"everyone":        true,
	"everything":      true,
	"everywhere":      true,
	"ex":              true,
	"exactly":         true,
	"example":         true,
	"except":          true,
	"f":               true,
	"fairly":          true,
	"far":             true,
	"farther":         true,
	"few":             true,
	"fewer":           true,
	"ff":              true,
	"fifteen":         true,
	"fifth":           true,
	"fifty":           true,
	"fill":            true,
	"find":            true,
	"fire":            true,
	"first":           true,
	"five":            true,
	"fix":             true,
	"followed":        true,
	"following":       true,
	"follows":         true,
	"for":             true,
	"forever":         true,
	"former":          true,
	"formerly":        true,
	"forth":           true,
	"forty":           true,
	"forward":         true,
	"found":           true,
	"four":            true,
	"from":            true,
	"front":           true,
	"full":            true,
	"further":         true,
	"furthermore":     true,
	"g":               true,
	"gave":            true,
	"get":             true,
	"gets":            true,
	"getting":         true,
	"give":            true,
	"given":           true,
	"gives":           true,
	"giving":          true,
	"go":              true,
	"goes":            true,
	"going":           true,
	"gone":            true,
	"got":             true,
	"gotten":          true,
	"greetings":       true,
	"h":               true,
	"had":             true,
	"hadn't":          true,
	"half":            true,
	"happens":         true,
	"hardly":          true,
	"has":             true,
	"hasn't":          true,
	"hasnt":           true,
	"have":            true,
	"haven't":         true,
	"having":          true,
	"he":              true,
	"he'd":            true,
	"he'll":           true,
	"he's":            true,
	"hello":           true,
	"help":            true,
	"hence":           true,
	"her":             true,
	"here":            true,
	"here's":          true,
	"hereafter":       true,
	"hereby":          true,
	"herein":          true,
	"heres":           true,
	"hereupon":        true,
	"hers":            true,
	"herse":           true,
	"herself":         true,
	"hes":             true,
	"hi":              true,
	"hid":             true,
	"him":             true,
	"himse":           true,
	"himself":         true,
	"his":             true,
	"hither":          true,
	"home":            true,
	"hopefully":       true,
	"how":             true,
	"how's":           true,
	"howbeit":         true,
	"however":         true,
	"hundred":         true,
	"i":               true,
	"i'd":             true,
	"i'll":            true,
	"i'm":             true,
	"i've":            true,
	"id":              true,
	"ie":              true,
	"if":              true,
	"ignored":         true,
	"im":              true,
	"immediate":       true,
	"immediately":     true,
	"importance":      true,
	"important":       true,
	"in":              true,
	"inasmuch":        true,
	"inc":             true,
	"inc.":            true,
	"indeed":          true,
	"index":           true,
	"indicate":        true,
	"indicated":       true,
	"indicates":       true,
	"information":     true,
	"inner":           true,
	"inside":          true,
	"insofar":         true,
	"instead":         true,
	"interest":        true,
	"into":            true,
	"invention":       true,
	"inward":          true,
	"is":              true,
	"isn't":           true,
	"it":              true,
	"it'd":            true,
	"it'll":           true,
	"it's":            true,
	"itd":             true,
	"its":             true,
	"itself":          true,
	"itse”":           true,
	"j":               true,
	"just":            true,
	"k":               true,
	"keep":            true,
	"keeps":           true,
	"kept":            true,
	"keys":            true,
	"kg":              true,
	"km":              true,
	"know":            true,
	"known":           true,
	"knows":           true,
	"l":               true,
	"largely":         true,
	"last":            true,
	"lately":          true,
	"later":           true,
	"latter":          true,
	"latterly":        true,
	"least":           true,
	"less":            true,
	"lest":            true,
	"let":             true,
	"let's":           true,
	"lets":            true,
	"like":            true,
	"liked":           true,
	"likely":          true,
	"likewise":        true,
	"line":            true,
	"little":          true,
	"look":            true,
	"looking":         true,
	"looks":           true,
	"low":             true,
	"lower":           true,
	"ltd":             true,
	"m":               true,
	"made":            true,
	"mainly":          true,
	"make":            true,
	"makes":           true,
	"many":            true,
	"may":             true,
	"maybe":           true,
	"mayn't":          true,
	"me":              true,
	"mean":            true,
	"means":           true,
	"meantime":        true,
	"meanwhile":       true,
	"merely":          true,
	"mg":              true,
	"might":           true,
	"mightn't":        true,
	"mill":            true,
	"million":         true,
	"mine":            true,
	"minus":           true,
	"miss":            true,
	"ml":              true,
	"more":            true,
	"moreover":        true,
	"most":            true,
	"mostly":          true,
	"move":            true,
	"mr":              true,
	"mrs":             true,
	"much":            true,
	"mug":             true,
	"must":            true,
	"mustn't":         true,
	"my":              true,
	"myself":          true,
	"myse”":           true,
	"n":               true,
	"na":              true,
	"name":            true,
	"namely":          true,
	"nay":             true,
	"nd":              true,
	"near":            true,
	"nearly":          true,
	"necessarily":     true,
	"necessary":       true,
	"need":            true,
	"needn't":         true,
	"needs":           true,
	"neither":         true,
	"never":           true,
	"neverf":          true,
	"neverless":       true,
	"nevertheless":    true,
	"new":             true,
	"next":            true,
	"nine":            true,
	"ninety":          true,
	"no":              true,
	"no-one":          true,
	"nobody":          true,
	"non":             true,
	"none":            true,
	"nonetheless":     true,
	"noone":           true,
	"nor":             true,
	"normally":        true,
	"nos":             true,
	"not":             true,
	"noted":           true,
	"nothing":         true,
	"notwithstanding": true,
	"novel":           true,
	"now":             true,
	"nowhere":         true,
	"o":               true,
	"obtain":          true,
	"obtained":        true,
	"obviously":       true,
	"of":              true,
	"off":             true,
	"often":           true,
	"oh":              true,
	"ok":              true,
	"okay":            true,
	"old":             true,
	"omitted":         true,
	"on":              true,
	"once":            true,
	"one":             true,
	"one's":           true,
	"ones":            true,
	"only":            true,
	"onto":            true,
	"opposite":        true,
	"or":              true,
	"ord":             true,
	"other":           true,
	"others":          true,
	"otherwise":       true,
	"ought":           true,
	"oughtn't":        true,
	"our":             true,
	"ours":            true,
	"ourselves":       true,
	"out":             true,
	"outside":         true,
	"over":            true,
	"overall":         true,
	"owing":           true,
	"own":             true,
	"p":               true,
	"page":            true,
	"pages":           true,
	"part":            true,
	"particular":      true,
	"particularly":    true,
	"past":            true,
	"per":             true,
	"perhaps":         true,
	"placed":          true,
	"please":          true,
	"plus":            true,
	"poorly":          true,
	"possible":        true,
	"possibly":        true,
	"potentially":     true,
	"pp":              true,
	"predominantly":   true,
	"present":         true,
	"presumably":      true,
	"previously":      true,
	"primarily":       true,
	"probably":        true,
	"promptly":        true,
	"proud":           true,
	"provided":        true,
	"provides":        true,
	"put":             true,
	"q":               true,
	"que":             true,
	"quickly":         true,
	"quite":           true,
	"qv":              true,
	"r":               true,
	"ran":             true,
	"rather":          true,
	"rd":              true,
	"re":              true,
	"readily":         true,
	"really":          true,
	"reasonably":      true,
	"recent":          true,
	"recently":        true,
	"ref":             true,
	"refs":            true,
	"regarding":       true,
	"regardless":      true,
	"regards":         true,
	"related":         true,
	"relatively":      true,
	"research":        true,
	"respectively":    true,
	"resulted":        true,
	"resulting":       true,
	"results":         true,
	"right":           true,
	"round":           true,
	"run":             true,
	"s":               true,
	"said":            true,
	"same":            true,
	"saw":             true,
	"say":             true,
	"saying":          true,
	"says":            true,
	"sec":             true,
	"second":          true,
	"secondly":        true,
	"section":         true,
	"see":             true,
	"seeing":          true,
	"seem":            true,
	"seemed":          true,
	"seeming":         true,
	"seems":           true,
	"seen":            true,
	"self":            true,
	"selves":          true,
	"sensible":        true,
	"sent":            true,
	"serious":         true,
	"seriously":       true,
	"seven":           true,
	"several":         true,
	"shall":           true,
	"shan't":          true,
	"she":             true,
	"she'd":           true,
	"she'll":          true,
	"she's":           true,
	"shed":            true,
	"shes":            true,
	"should":          true,
	"shouldn't":       true,
	"show":            true,
	"showed":          true,
	"shown":           true,
	"showns":          true,
	"shows":           true,
	"side":            true,
	"significant":     true,
	"significantly":   true,
	"similar":         true,
	"similarly":       true,
	"since":           true,
	"sincere":         true,
	"six":             true,
	"sixty":           true,
	"slightly":        true,
	"so":              true,
	"some":            true,
	"somebody":        true,
	"someday":         true,
	"somehow":         true,
	"someone":         true,
	"somethan":        true,
	"something":       true,
	"sometime":        true,
	"sometimes":       true,
	"somewhat":        true,
	"somewhere":       true,
	"soon":            true,
	"sorry":           true,
	"specifically":    true,
	"specified":       true,
	"specify":         true,
	"specifying":      true,
	"state":           true,
	"states":          true,
	"still":           true,
	"stop":            true,
	"strongly":        true,
	"sub":             true,
	"substantially":   true,
	"successfully":    true,
	"such":            true,
	"sufficiently":    true,
	"suggest":         true,
	"sup":             true,
	"sure":            true,
	"system":          true,
	"t":               true,
	"t's":             true,
	"take":            true,
	"taken":           true,
	"taking":          true,
	"tell":            true,
	"ten":             true,
	"tends":           true,
	"th":              true,
	"than":            true,
	"thank":           true,
	"thanks":          true,
	"thanx":           true,
	"that":            true,
	"that'll":         true,
	"that's":          true,
	"that've":         true,
	"thats":           true,
	"the":             true,
	"their":           true,
	"theirs":          true,
	"them":            true,
	"themselves":      true,
	"then":            true,
	"thence":          true,
	"there":           true,
	"there'd":         true,
	"there'll":        true,
	"there're":        true,
	"there's":         true,
	"there've":        true,
	"thereafter":      true,
	"thereby":         true,
	"thered":          true,
	"therefore":       true,
	"therein":         true,
	"thereof":         true,
	"therere":         true,
	"theres":          true,
	"thereto":         true,
	"thereupon":       true,
	"these":           true,
	"they":            true,
	"they'd":          true,
	"they'll":         true,
	"they're":         true,
	"they've":         true,
	"theyd":           true,
	"theyre":          true,
	"thick":           true,
	"thin":            true,
	"thing":           true,
	"things":          true,
	"think":           true,
	"third":           true,
	"thirty":          true,
	"this":            true,
	"thorough":        true,
	"thoroughly":      true,
	"those":           true,
	"thou":            true,
	"though":          true,
	"thoughh":         true,
	"thousand":        true,
	"three":           true,
	"throug":          true,
	"through":         true,
	"throughout":      true,
	"thru":            true,
	"thus":            true,
	"til":             true,
	"till":            true,
	"tip":             true,
	"to":              true,
	"together":        true,
	"too":             true,
	"took":            true,
	"top":             true,
	"toward":          true,
	"towards":         true,
	"tried":           true,
	"tries":           true,
	"truly":           true,
	"try":             true,
	"trying":          true,
	"ts":              true,
	"twelve":          true,
	"twenty":          true,
	"twice":           true,
	"two":             true,
	"u":               true,
	"un":              true,
	"under":           true,
	"underneath":      true,
	"undoing":         true,
	"unfortunately":   true,
	"unless":          true,
	"unlike":          true,
	"unlikely":        true,
	"until":           true,
	"unto":            true,
	"up":              true,
	"upon":            true,
	"ups":             true,
	"upwards":         true,
	"us":              true,
	"use":             true,
	"used":            true,
	"useful":          true,
	"usefully":        true,
	"usefulness":      true,
	"uses":            true,
	"using":           true,
	"usually":         true,
	"uucp":            true,
	"v":               true,
	"value":           true,
	"various":         true,
	"versus":          true,
	"very":            true,
	"via":             true,
	"viz":             true,
	"vol":             true,
	"vols":            true,
	"vs":              true,
	"w":               true,
	"want":            true,
	"wants":           true,
	"was":             true,
	"wasn't":          true,
	"way":             true,
	"we":              true,
	"we'd":            true,
	"we'll":           true,
	"we're":           true,
	"we've":           true,
	"wed":             true,
	"welcome":         true,
	"well":            true,
	"went":            true,
	"were":            true,
	"weren't":         true,
	"what":            true,
	"what'll":         true,
	"what's":          true,
	"what've":         true,
	"whatever":        true,
	"whats":           true,
	"when":            true,
	"when's":          true,
	"whence":          true,
	"whenever":        true,
	"where":           true,
	"where's":         true,
	"whereafter":      true,
	"whereas":         true,
	"whereby":         true,
	"wherein":         true,
	"wheres":          true,
	"whereupon":       true,
	"wherever":        true,
	"whether":         true,
	"which":           true,
	"whichever":       true,
	"while":           true,
	"whilst":          true,
	"whim":            true,
	"whither":         true,
	"who":             true,
	"who'd":           true,
	"who'll":          true,
	"who's":           true,
	"whod":            true,
	"whoever":         true,
	"whole":           true,
	"whom":            true,
	"whomever":        true,
	"whos":            true,
	"whose":           true,
	"why":             true,
	"why's":           true,
	"widely":          true,
	"will":            true,
	"willing":         true,
	"wish":            true,
	"with":            true,
	"within":          true,
	"without":         true,
	"won't":           true,
	"wonder":          true,
	"words":           true,
	"world":           true,
	"would":           true,
	"wouldn't":        true,
	"www":             true,
	"x":               true,
	"y":               true,
	"yes":             true,
	"yet":             true,
	"you":             true,
	"you'd":           true,
	"you'll":          true,
	"you're":          true,
	"you've":          true,
	"youd":            true,
	"your":            true,
	"youre":           true,
	"yours":           true,
	"yourself":        true,
	"yourselves":      true,
	"z":               true,
	"zero":            true,
}

Functions

func PrintResults

func PrintResults(scores []score, config Config)

Types

type Config

type Config struct {
	BestFirst      bool
	FollowSymlinks bool
	Limit          int
	NoStemming     bool
	NoStoplist     bool
	ShowScores     bool
	Stoplist       *Stoplist
	Verbose        bool
}

type Corpus

type Corpus struct {
	// contains filtered or unexported fields
}

func NewCorpus

func NewCorpus(documents []*Document) *Corpus

func ParseCorpus

func ParseCorpus(query *Document, searchPaths []string, config *Config) *Corpus

func (*Corpus) SimilarDocuments

func (corpus *Corpus) SimilarDocuments(query *Document) []score

type Document

type Document struct {
	// contains filtered or unexported fields
}

func NewDocument

func NewDocument(rd io.Reader, config *Config) (*Document, error)

func ParseDocument added in v0.1.3

func ParseDocument(path string, config *Config) (*Document, error)

type Stoplist

type Stoplist map[string]bool

func ParseStoplist

func ParseStoplist(path string) (*Stoplist, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL