ranking

package

v0.4.6 Latest Latest Go to latest Published: Jun 16, 2022 License: Apache-2.0 Imports: 24 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

e.coding.net/cloudbase-100023652792/pathip/gorse

Documentation ¶

Index ¶

Constants
func Evaluate(estimator MatrixFactorization, testSet, trainSet *DataSet, ...) []float32
func GetModelName(m Model) string
func HR(targetSet *i32set.Set, rankList []int32) float32
func LoadDataFromBuiltIn(dataSetName string) (*DataSet, *DataSet, error)
func MAP(targetSet *i32set.Set, rankList []int32) float32
func MRR(targetSet *i32set.Set, rankList []int32) float32
func MarshalModel(w io.Writer, m Model) error
func NDCG(targetSet *i32set.Set, rankList []int32) float32
func Precision(targetSet *i32set.Set, rankList []int32) float32
func Rank(model MatrixFactorization, userId int32, candidates []int32, topN int) ([]int32, []float32)
func Recall(targetSet *i32set.Set, rankList []int32) float32
type BPR
- func NewBPR(params model.Params) *BPR
- func (bpr *BPR) Clear()
- func (bpr *BPR) Fit(trainSet, valSet *DataSet, config *FitConfig) Score
- func (bpr *BPR) GetItemFactor(itemIndex int32) []float32
- func (bpr *BPR) GetParamsGrid() model.ParamsGrid
- func (bpr *BPR) GetUserFactor(userIndex int32) []float32
- func (bpr *BPR) Init(trainSet *DataSet)
- func (bpr *BPR) InternalPredict(userIndex, itemIndex int32) float32
- func (bpr *BPR) Invalid() bool
- func (bpr *BPR) Marshal(w io.Writer) error
- func (bpr *BPR) Predict(userId, itemId string) float32
- func (bpr *BPR) SetParams(params model.Params)
- func (bpr *BPR) Unmarshal(r io.Reader) error
type BaseMatrixFactorization
- func (baseModel *BaseMatrixFactorization) Bytes() int
- func (baseModel *BaseMatrixFactorization) GetItemIndex() base.Index
- func (baseModel *BaseMatrixFactorization) GetUserIndex() base.Index
- func (baseModel *BaseMatrixFactorization) Init(trainSet *DataSet)
- func (baseModel *BaseMatrixFactorization) IsItemPredictable(itemIndex int32) bool
- func (baseModel *BaseMatrixFactorization) IsUserPredictable(userIndex int32) bool
- func (baseModel *BaseMatrixFactorization) Marshal(w io.Writer) error
- func (baseModel *BaseMatrixFactorization) Unmarshal(r io.Reader) error
type CCD
- func NewCCD(params model.Params) *CCD
- func (ccd *CCD) Clear()
- func (ccd *CCD) Fit(trainSet, valSet *DataSet, config *FitConfig) Score
- func (ccd *CCD) GetItemFactor(itemIndex int32) []float32
- func (ccd *CCD) GetParamsGrid() model.ParamsGrid
- func (ccd *CCD) GetUserFactor(userIndex int32) []float32
- func (ccd *CCD) Init(trainSet *DataSet)
- func (ccd *CCD) InternalPredict(userIndex, itemIndex int32) float32
- func (ccd *CCD) Invalid() bool
- func (ccd *CCD) Marshal(w io.Writer) error
- func (ccd *CCD) Predict(userId, itemId string) float32
- func (ccd *CCD) SetParams(params model.Params)
- func (ccd *CCD) Unmarshal(r io.Reader) error
type DataSet
- func LoadDataFromCSV(fileName, sep string, hasHeader bool) *DataSet
- func NewDirectIndexDataset() *DataSet
- func NewMapIndexDataset() *DataSet
- func (dataset *DataSet) AddFeedback(userId, itemId string, insertUserItem bool)
- func (dataset *DataSet) AddItem(itemId string)
- func (dataset *DataSet) AddUser(userId string)
- func (dataset *DataSet) Bytes() int
- func (dataset *DataSet) Count() int
- func (dataset *DataSet) GetIndex(i int) (int32, int32)
- func (dataset *DataSet) ItemCount() int
- func (dataset *DataSet) NegativeSample(excludeSet *DataSet, numCandidates int) [][]int32
- func (dataset *DataSet) SetNegatives(userId string, negatives []string)
- func (dataset *DataSet) Split(numTestUsers int, seed int64) (*DataSet, *DataSet)
- func (dataset *DataSet) UserCount() int
type FitConfig
- func NewFitConfig() *FitConfig
- func (config *FitConfig) LoadDefaultIfNil() *FitConfig
- func (config *FitConfig) SetJobs(nJobs int) *FitConfig
- func (config *FitConfig) SetTracker(tracker model.Tracker) *FitConfig
- func (config *FitConfig) SetVerbose(verbose int) *FitConfig
type MatrixFactorization
- func Clone(m MatrixFactorization) MatrixFactorization
- func UnmarshalModel(r io.Reader) (MatrixFactorization, error)
type Metric
type Model
type ModelSearcher
- func NewModelSearcher(nEpoch, nTrials, nJobs int) *ModelSearcher
- func (searcher *ModelSearcher) Fit(trainSet, valSet *DataSet, tracker model.Tracker, runner model.Runner) error
- func (searcher *ModelSearcher) GetBestModel() (string, MatrixFactorization, Score)
type ParamsSearchResult
- func GridSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, ...) ParamsSearchResult
- func RandomSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, ...) ParamsSearchResult
- func (r *ParamsSearchResult) AddScore(params model.Params, score Score)
type Score
type SnapshotManger
- func (sm *SnapshotManger) AddSnapshot(score Score, weights ...interface{})
- func (sm *SnapshotManger) AddSnapshotNoCopy(score Score, weights ...interface{})

Constants ¶

View Source

const (
	CollaborativeBPR = "bpr"
	CollaborativeCCD = "ccd"
)

Variables ¶

This section is empty.

Functions ¶

func Evaluate ¶

func Evaluate(estimator MatrixFactorization, testSet, trainSet *DataSet, topK, numCandidates, nJobs int, scorers ...Metric) []float32

Evaluate evaluates a model in top-n tasks.

func GetModelName ¶

func GetModelName(m Model) string

func HR ¶

func HR(targetSet *i32set.Set, rankList []int32) float32

HR means Hit Ratio.

func LoadDataFromBuiltIn ¶

func LoadDataFromBuiltIn(dataSetName string) (*DataSet, *DataSet, error)

LoadDataFromBuiltIn loads a built-in Data set. Now support:

func MAP ¶

func MAP(targetSet *i32set.Set, rankList []int32) float32

MAP means Mean Average Precision. mAP: http://sdsawtelle.github.io/blog/output/mean-average-precision-MAP-for-recommender-systems.html

func MRR ¶

func MRR(targetSet *i32set.Set, rankList []int32) float32

MRR means Mean Reciprocal Rank.

The mean reciprocal rank is a statistic measure for evaluating any process that produces a list of possible responses to a sample of queries, ordered by probability of correctness. The reciprocal rank of a query response is the multiplicative inverse of the rank of the first correct answer: 1 for first place, 1⁄2 for second place, 1⁄3 for third place and so on. The mean reciprocal rank is the average of the reciprocal ranks of results for a sample of queries Q:

MRR = \frac{1}{Q} \sum^{|Q|}_{i=1} \frac{1}{rank_i}

func MarshalModel ¶

func MarshalModel(w io.Writer, m Model) error

func NDCG ¶

func NDCG(targetSet *i32set.Set, rankList []int32) float32

NDCG means Normalized Discounted Cumulative Gain.

func Precision ¶

func Precision(targetSet *i32set.Set, rankList []int32) float32

Precision is the fraction of relevant ItemFeedback among the recommended ItemFeedback.

\frac{|relevant documents| \cap |retrieved documents|} {|{retrieved documents}|}

func Rank ¶

func Rank(model MatrixFactorization, userId int32, candidates []int32, topN int) ([]int32, []float32)

func Recall ¶

func Recall(targetSet *i32set.Set, rankList []int32) float32

Recall is the fraction of relevant ItemFeedback that have been recommended over the total amount of relevant ItemFeedback.

\frac{|relevant documents| \cap |retrieved documents|} {|{relevant documents}|}

Types ¶

type BPR ¶

type BPR struct {
	BaseMatrixFactorization
	// contains filtered or unexported fields
}

BPR means Bayesian Personal Ranking, is a pairwise learning algorithm for matrix factorization model with implicit feedback. The pairwise ranking between item i and j for user u is estimated by:

p(i >_u j) = \sigma( p_u^T (q_i - q_j) )

Hyper-parameters:

 Reg 		- The regularization parameter of the cost function that is
			  optimized. Default is 0.01.
 Lr 		- The learning rate of SGD. Default is 0.05.
 nFactors	- The number of latent factors. Default is 10.
 NEpochs	- The number of iteration of the SGD procedure. Default is 100.
 InitMean	- The mean of initial random latent factors. Default is 0.
 InitStdDev	- The standard deviation of initial random latent factors. Default is 0.001.

func NewBPR ¶

func NewBPR(params model.Params) *BPR

NewBPR creates a BPR model.

func (*BPR) Clear ¶

func (bpr *BPR) Clear()

func (*BPR) Fit ¶

func (bpr *BPR) Fit(trainSet, valSet *DataSet, config *FitConfig) Score

Fit the BPR model.

func (*BPR) GetItemFactor ¶

func (bpr *BPR) GetItemFactor(itemIndex int32) []float32

GetItemFactor returns the latent factor of an item.

func (*BPR) GetParamsGrid ¶

func (bpr *BPR) GetParamsGrid() model.ParamsGrid

func (*BPR) GetUserFactor ¶

func (bpr *BPR) GetUserFactor(userIndex int32) []float32

GetUserFactor returns the latent factor of a user.

func (*BPR) Init ¶

func (bpr *BPR) Init(trainSet *DataSet)

func (*BPR) InternalPredict ¶

func (bpr *BPR) InternalPredict(userIndex, itemIndex int32) float32

func (*BPR) Invalid ¶

func (bpr *BPR) Invalid() bool

func (*BPR) Marshal ¶

func (bpr *BPR) Marshal(w io.Writer) error

Marshal model into byte stream.

func (*BPR) Predict ¶

func (bpr *BPR) Predict(userId, itemId string) float32

Predict by the BPR model.

func (*BPR) SetParams ¶

func (bpr *BPR) SetParams(params model.Params)

SetParams sets hyper-parameters of the BPR model.

func (*BPR) Unmarshal ¶

func (bpr *BPR) Unmarshal(r io.Reader) error

Unmarshal model from byte stream.

type BaseMatrixFactorization ¶

type BaseMatrixFactorization struct {
	model.BaseModel
	UserIndex       base.Index
	ItemIndex       base.Index
	UserPredictable *bitset.BitSet
	ItemPredictable *bitset.BitSet
	// Model parameters
	UserFactor [][]float32 // p_u
	ItemFactor [][]float32 // q_i
}

func (*BaseMatrixFactorization) Bytes ¶

func (baseModel *BaseMatrixFactorization) Bytes() int

func (*BaseMatrixFactorization) GetItemIndex ¶

func (baseModel *BaseMatrixFactorization) GetItemIndex() base.Index

func (*BaseMatrixFactorization) GetUserIndex ¶

func (baseModel *BaseMatrixFactorization) GetUserIndex() base.Index

func (*BaseMatrixFactorization) Init ¶

func (baseModel *BaseMatrixFactorization) Init(trainSet *DataSet)

func (*BaseMatrixFactorization) IsItemPredictable ¶

func (baseModel *BaseMatrixFactorization) IsItemPredictable(itemIndex int32) bool

IsItemPredictable returns false if item has no feedback and its embedding vector never be trained.

func (*BaseMatrixFactorization) IsUserPredictable ¶

func (baseModel *BaseMatrixFactorization) IsUserPredictable(userIndex int32) bool

IsUserPredictable returns false if user has no feedback and its embedding vector never be trained.

func (*BaseMatrixFactorization) Marshal ¶

func (baseModel *BaseMatrixFactorization) Marshal(w io.Writer) error

Marshal model into byte stream.

func (*BaseMatrixFactorization) Unmarshal ¶

func (baseModel *BaseMatrixFactorization) Unmarshal(r io.Reader) error

Unmarshal model from byte stream.

type CCD ¶

type CCD struct {
	BaseMatrixFactorization
	// contains filtered or unexported fields
}

func NewCCD ¶

func NewCCD(params model.Params) *CCD

NewCCD creates a eALS model.

func (*CCD) Clear ¶

func (ccd *CCD) Clear()

func (*CCD) Fit ¶

func (ccd *CCD) Fit(trainSet, valSet *DataSet, config *FitConfig) Score

func (*CCD) GetItemFactor ¶

func (ccd *CCD) GetItemFactor(itemIndex int32) []float32

GetItemFactor returns latent factor of an item.

func (*CCD) GetParamsGrid ¶

func (ccd *CCD) GetParamsGrid() model.ParamsGrid

func (*CCD) GetUserFactor ¶

func (ccd *CCD) GetUserFactor(userIndex int32) []float32

GetUserFactor returns latent factor of a user.

func (*CCD) Init ¶

func (ccd *CCD) Init(trainSet *DataSet)

func (*CCD) InternalPredict ¶

func (ccd *CCD) InternalPredict(userIndex, itemIndex int32) float32

func (*CCD) Invalid ¶

func (ccd *CCD) Invalid() bool

func (*CCD) Marshal ¶

func (ccd *CCD) Marshal(w io.Writer) error

Marshal model into byte stream.

func (*CCD) Predict ¶

func (ccd *CCD) Predict(userId, itemId string) float32

Predict by the ALS model.

func (*CCD) SetParams ¶

func (ccd *CCD) SetParams(params model.Params)

SetParams sets hyper-parameters for the ALS model.

func (*CCD) Unmarshal ¶

func (ccd *CCD) Unmarshal(r io.Reader) error

Unmarshal model from byte stream.

type DataSet ¶

type DataSet struct {
	UserIndex      base.Index
	ItemIndex      base.Index
	FeedbackUsers  base.Array[int32]
	FeedbackItems  base.Array[int32]
	UserFeedback   [][]int32
	ItemFeedback   [][]int32
	Negatives      [][]int32
	ItemLabels     [][]int32
	UserLabels     [][]int32
	HiddenItems    []bool
	ItemCategories [][]string
	CategorySet    *strset.Set
	// statistics
	NumItemLabels    int32
	NumUserLabels    int32
	NumItemLabelUsed int
	NumUserLabelUsed int
}

DataSet contains preprocessed data structures for recommendation models.

func LoadDataFromCSV ¶

func LoadDataFromCSV(fileName, sep string, hasHeader bool) *DataSet

LoadDataFromCSV loads Data from a CSV file. The CSV file should be:

[optional header]
<userId 1> <sep> <itemId 1> <sep> <rating 1> <sep> <extras>
<userId 2> <sep> <itemId 2> <sep> <rating 2> <sep> <extras>
<userId 3> <sep> <itemId 3> <sep> <rating 3> <sep> <extras>
...

For example, the `u.Data` from MovieLens 100K is:

196\t242\t3\t881250949
186\t302\t3\t891717742
22\t377\t1\t878887116

func NewDirectIndexDataset ¶

func NewDirectIndexDataset() *DataSet

func NewMapIndexDataset ¶

func NewMapIndexDataset() *DataSet

NewMapIndexDataset creates a data set.

func (*DataSet) AddFeedback ¶

func (dataset *DataSet) AddFeedback(userId, itemId string, insertUserItem bool)

func (*DataSet) AddItem ¶

func (dataset *DataSet) AddItem(itemId string)

func (*DataSet) AddUser ¶

func (dataset *DataSet) AddUser(userId string)

func (*DataSet) Bytes ¶

func (dataset *DataSet) Bytes() int

func (*DataSet) Count ¶

func (dataset *DataSet) Count() int

func (*DataSet) GetIndex ¶

func (dataset *DataSet) GetIndex(i int) (int32, int32)

GetIndex gets the i-th record by <user index, item index, rating>.

func (*DataSet) ItemCount ¶

func (dataset *DataSet) ItemCount() int

ItemCount returns the number of ItemFeedback.

func (*DataSet) NegativeSample ¶

func (dataset *DataSet) NegativeSample(excludeSet *DataSet, numCandidates int) [][]int32

func (*DataSet) SetNegatives ¶

func (dataset *DataSet) SetNegatives(userId string, negatives []string)

func (*DataSet) Split ¶

func (dataset *DataSet) Split(numTestUsers int, seed int64) (*DataSet, *DataSet)

Split dataset by user-leave-one-out method. The argument `numTestUsers` determines the number of users in the test set. If numTestUsers is equal or greater than the number of total users or numTestUsers <= 0, all users are presented in the test set.

func (*DataSet) UserCount ¶

func (dataset *DataSet) UserCount() int

UserCount returns the number of UserFeedback.

type FitConfig ¶

type FitConfig struct {
	Jobs       int
	Verbose    int
	Candidates int
	TopK       int
	Tracker    model.Tracker
}

func NewFitConfig ¶

func NewFitConfig() *FitConfig

func (*FitConfig) LoadDefaultIfNil ¶

func (config *FitConfig) LoadDefaultIfNil() *FitConfig

func (*FitConfig) SetJobs ¶

func (config *FitConfig) SetJobs(nJobs int) *FitConfig

func (*FitConfig) SetTracker ¶

func (config *FitConfig) SetTracker(tracker model.Tracker) *FitConfig

func (*FitConfig) SetVerbose ¶

func (config *FitConfig) SetVerbose(verbose int) *FitConfig

type MatrixFactorization ¶

type MatrixFactorization interface {
	Model
	// Predict the rating given by a user (userId) to a item (itemId).
	Predict(userId, itemId string) float32
	// InternalPredict predicts rating given by a user index and a item index
	InternalPredict(userIndex, itemIndex int32) float32
	// GetUserIndex returns user index.
	GetUserIndex() base.Index
	// GetItemIndex returns item index.
	GetItemIndex() base.Index
	// IsUserPredictable returns false if user has no feedback and its embedding vector never be trained.
	IsUserPredictable(userIndex int32) bool
	// IsItemPredictable returns false if item has no feedback and its embedding vector never be trained.
	IsItemPredictable(itemIndex int32) bool
	// Marshal model into byte stream.
	Marshal(w io.Writer) error
	// Unmarshal model from byte stream.
	Unmarshal(r io.Reader) error
	// Bytes returns used memory.
	Bytes() int
}

func Clone ¶

func Clone(m MatrixFactorization) MatrixFactorization

Clone a model with deep copy.

func UnmarshalModel ¶

func UnmarshalModel(r io.Reader) (MatrixFactorization, error)

type Metric ¶

type Metric func(targetSet *i32set.Set, rankList []int32) float32

Metric is used by evaluators in personalized ranking tasks.

type Model ¶

type Model interface {
	model.Model
	// Fit a model with a train set and parameters.
	Fit(trainSet *DataSet, validateSet *DataSet, config *FitConfig) Score
	// GetItemIndex returns item index.
	GetItemIndex() base.Index
	// Marshal model into byte stream.
	Marshal(w io.Writer) error
	// Unmarshal model from byte stream.
	Unmarshal(r io.Reader) error
	// GetUserFactor returns latent factor of a user.
	GetUserFactor(userIndex int32) []float32
	// GetItemFactor returns latent factor of an item.
	GetItemFactor(itemIndex int32) []float32
}

type ModelSearcher ¶

type ModelSearcher struct {
	// contains filtered or unexported fields
}

ModelSearcher is a thread-safe personal ranking model searcher.

func NewModelSearcher ¶

func NewModelSearcher(nEpoch, nTrials, nJobs int) *ModelSearcher

NewModelSearcher creates a thread-safe personal ranking model searcher.

func (*ModelSearcher) Fit ¶

func (searcher *ModelSearcher) Fit(trainSet, valSet *DataSet, tracker model.Tracker, runner model.Runner) error

func (*ModelSearcher) GetBestModel ¶

func (searcher *ModelSearcher) GetBestModel() (string, MatrixFactorization, Score)

GetBestModel returns the optimal personal ranking model.

type ParamsSearchResult ¶

type ParamsSearchResult struct {
	BestModel  MatrixFactorization
	BestScore  Score
	BestParams model.Params
	BestIndex  int
	Scores     []Score
	Params     []model.Params
}

ParamsSearchResult contains the return of grid search.

func GridSearchCV ¶

func GridSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, paramGrid model.ParamsGrid,
	_ int64, fitConfig *FitConfig, runner model.Runner) ParamsSearchResult

GridSearchCV finds the best parameters for a model.

func RandomSearchCV ¶

func RandomSearchCV(estimator MatrixFactorization, trainSet *DataSet, testSet *DataSet, paramGrid model.ParamsGrid,
	numTrials int, seed int64, fitConfig *FitConfig, runner model.Runner) ParamsSearchResult

RandomSearchCV searches hyper-parameters by random.

func (*ParamsSearchResult) AddScore ¶

func (r *ParamsSearchResult) AddScore(params model.Params, score Score)

type Score ¶

type Score struct {
	NDCG      float32
	Precision float32
	Recall    float32
}

type SnapshotManger ¶

type SnapshotManger struct {
	BestWeights []interface{}
	BestScore   Score
}

SnapshotManger manages the best snapshot.

func (*SnapshotManger) AddSnapshot ¶

func (sm *SnapshotManger) AddSnapshot(score Score, weights ...interface{})

AddSnapshot adds a copied snapshot.

func (*SnapshotManger) AddSnapshotNoCopy ¶

func (sm *SnapshotManger) AddSnapshotNoCopy(score Score, weights ...interface{})

AddSnapshotNoCopy adds a snapshot without copy.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL