movielens

package
v0.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 6, 2022 License: AGPL-3.0 Imports: 18 Imported by: 0

README

MovieLens Example

Original Data: MovieLens 100k

SQLite DB file: movielens.db.zip

To run the tests, you need download the SQLite DB file and put it in the current directory.

# download and unzip the SQLite DB file
wget https://github.com/auxten/edgeRec/files/9895974/movielens.db.zip && unzip movielens.db.zip

SQL that split training set and test set by 80% and 20% user:


-- import data from csv, do it with any tool

select count(distinct userId) from ratings; -- 610 users

create table user as select distinct userId, 0 as is_train  from ratings;

-- choose 80% random user as train user
update user
set is_train = 1
where userId in
      (SELECT userId
       FROM (select distinct userId from ratings)
       ORDER BY RANDOM()
       LIMIT 488);

select count(*) from user where is_train != 1;

-- split train and test set of movielens ratings
create table ratings_train as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 1;
create table ratings_test as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 0;

select count(*) from ratings_train;
select count(*) from ratings_test;
select count(*) from ratings;

select count(distinct movieId) from movies

The DIN way to split dataset

There is another way to split the MovieLens-20m dataset with userId that is described in the Deep Interest Network paper.

MovieLens 20m

Related SQL:

create table movies
(
    movieId INTEGER,
    title   TEXT,
    genres  TEXT
);

create table ratings
(
    userId INTEGER,
    movieId INTEGER,
    rating FLOAT,
    timestamp INTEGER
);

create table tags
(
    userId    INTEGER,
    movieId   INTEGER,
    tag       TEXT,
    timestamp INTEGER
);

-- import data from csv, do it with any tool

select count(distinct userId) from ratings; -- 138,493 users

create table user as select distinct userId, 0 as is_train  from ratings;

-- choose 100000 random user as train user
update user
set is_train = 1
where userId in
      (SELECT userId
       FROM (select distinct userId from ratings)
       ORDER BY RANDOM()
       LIMIT 100000);

select count(*) from user where is_train != 1; -- 38,493 test users

-- split train and test set of movielens-20m ratings
create table ratings_train as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 1;
create table ratings_test as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 0;

select count(*) from ratings_train; --14,393,526
select count(*) from ratings_test;  --5,606,737
select count(*) from ratings;       --20,000,263

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BinarizeLabel

func BinarizeLabel(rating float64) float64

func BinarizeLabel32 added in v0.3.0

func BinarizeLabel32(rating float32) float32

func PreFillUbCache added in v0.2.0

func PreFillUbCache(ubc *ubcache.UserBehaviorCache, table string) (err error)

PreFillUbCache prefill ubcache with data from db `ub_test` or `ub_train`. the ub_train table is generated by SQL like:

	```sql
	create table ratings_train_desc as
		select r.userId, movieId, rating, timestamp
			from ratings_train r order by r.userId, timestamp desc;

 create table ub_train as
		select userId, group_concat(movieId) movieIds ,group_concat(timestamp) timestamps
			from ratings_train_desc group by userId order by timestamp;
	```

Sample from ub_train will be like:

31699, "246,247,252,260,265", "825638410,825638407,825638403,825638401,825638400"

Types

type MovielensRec added in v0.2.0

type MovielensRec struct {
	DataPath  string
	SampleCnt int
	// contains filtered or unexported fields
}

func (*MovielensRec) GetDashboardOverview added in v0.2.0

func (recSys *MovielensRec) GetDashboardOverview(ctx context.Context) (res rcmd.DashboardOverviewResult, err error)

func (*MovielensRec) GetItemFeature added in v0.2.0

func (recSys *MovielensRec) GetItemFeature(ctx context.Context, itemId int) (tensor rcmd.Tensor, err error)

func (*MovielensRec) GetItemsFeatureOverview added in v0.2.0

func (recSys *MovielensRec) GetItemsFeatureOverview(ctx context.Context, offset, size int, _ map[string][]string) (res rcmd.ItemOverviewResult, err error)

func (*MovielensRec) GetUserBehavior added in v0.2.0

func (recSys *MovielensRec) GetUserBehavior(ctx context.Context, userId int,
	maxLen int64, maxPk int64, maxTs int64) (itemSeq []int, err error)

func (*MovielensRec) GetUserFeature added in v0.2.0

func (recSys *MovielensRec) GetUserFeature(ctx context.Context, userId int) (tensor rcmd.Tensor, err error)

func (*MovielensRec) GetUsersFeatureOverview added in v0.2.0

func (recSys *MovielensRec) GetUsersFeatureOverview(ctx context.Context, offset, size int, _ map[string][]string) (res rcmd.UserItemOverviewResult, err error)

func (*MovielensRec) ItemSeqGenerator added in v0.2.0

func (recSys *MovielensRec) ItemSeqGenerator(ctx context.Context) (ret <-chan string, err error)

func (*MovielensRec) PreRank added in v0.2.0

func (recSys *MovielensRec) PreRank(ctx context.Context) (err error)

PreRank is called before rank, it can be used to prefill ub cache.

func (*MovielensRec) PreTrain added in v0.2.0

func (recSys *MovielensRec) PreTrain(ctx context.Context) (err error)

func (*MovielensRec) SampleGenerator added in v0.2.0

func (recSys *MovielensRec) SampleGenerator(_ context.Context) (ret <-chan rcmd.Sample, err error)

type YoutubeDnnImpl added in v0.4.0

type YoutubeDnnImpl struct {
	// contains filtered or unexported fields
}

func (*YoutubeDnnImpl) Fit added in v0.4.0

func (d *YoutubeDnnImpl) Fit(trainSample *rcmd.TrainSample) (pred rcmd.PredictAbstract, err error)

func (*YoutubeDnnImpl) Predict added in v0.4.0

func (d *YoutubeDnnImpl) Predict(X tensor.Tensor) tensor.Tensor

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL