The highest tagged major version is v8.

sharding

package module

v6.6.4+incompatible Latest Latest Go to latest Published: May 30, 2019 License: BSD-2-Clause Imports: 13 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/go-pg/sharding

Links

Open Source Insights

README ¶

PostgreSQL sharding for go-pg and Golang

This package uses a go-pg PostgreSQL client to help sharding your data across a set of PostgreSQL servers as described in Sharding & IDs at Instagram. In 2 words it maps many (2048-8192) logical shards implemented using PostgreSQL schemas to far fewer physical PostgreSQL servers.

API docs: http://godoc.org/github.com/go-pg/sharding. Examples: http://godoc.org/github.com/go-pg/sharding#pkg-examples.

Installation

To install:

go get github.com/go-pg/sharding

Quickstart

package sharding_test

import (
	"fmt"

	"github.com/go-pg/sharding"
	"github.com/go-pg/pg"
)

// Users are sharded by AccountId, i.e. users with same account id are
// placed on the same shard.
type User struct {
	tableName string `sql:"?shard.users"`

	Id        int64
	AccountId int64
	Name      string
	Emails    []string
}

func (u User) String() string {
	return u.Name
}

// CreateUser picks shard by account id and creates user in the shard.
func CreateUser(cluster *sharding.Cluster, user *User) error {
	return cluster.Shard(user.AccountId).Insert(user)
}

// GetUser splits shard from user id and fetches user from the shard.
func GetUser(cluster *sharding.Cluster, id int64) (*User, error) {
	var user User
	err := cluster.SplitShard(id).Model(&user).Where("id = ?", id).Select()
	return &user, err
}

// GetUsers picks shard by account id and fetches users from the shard.
func GetUsers(cluster *sharding.Cluster, accountId int64) ([]User, error) {
	var users []User
	err := cluster.Shard(accountId).Model(&users).Where("account_id = ?", accountId).Select()
	return users, err
}

// createShard creates database schema for a given shard.
func createShard(shard *pg.DB) error {
	queries := []string{
		`DROP SCHEMA IF EXISTS ?shard CASCADE`,
		`CREATE SCHEMA ?shard`,
		sqlFuncs,
		`CREATE TABLE ?shard.users (id bigint DEFAULT ?shard.next_id(), account_id int, name text, emails jsonb)`,
	}

	for _, q := range queries {
		_, err := shard.Exec(q)
		if err != nil {
			return err
		}
	}

	return nil
}

func ExampleCluster() {
	db := pg.Connect(&pg.Options{
		User: "postgres",
	})

	dbs := []*pg.DB{db} // list of physical PostgreSQL servers
	nshards := 2        // 2 logical shards
	// Create cluster with 1 physical server and 2 logical shards.
	cluster := sharding.NewCluster(dbs, nshards)

	// Create database schema for our logical shards.
	for i := 0; i < nshards; i++ {
		if err := createShard(cluster.Shard(int64(i))); err != nil {
			panic(err)
		}
	}

	// user1 will be created in shard1 because AccountId % nshards = shard1.
	user1 := &User{
		Name:      "user1",
		AccountId: 1,
		Emails:    []string{"user1@domain"},
	}
	err := CreateUser(cluster, user1)
	if err != nil {
		panic(err)
	}

	// user2 will be created in shard1 too because AccountId is the same.
	user2 := &User{
		Name:      "user2",
		AccountId: 1,
		Emails:    []string{"user2@domain"},
	}
	err = CreateUser(cluster, user2)
	if err != nil {
		panic(err)
	}

	// user3 will be created in shard0 because AccountId % nshards = shard0.
	user3 := &User{
		Name:      "user3",
		AccountId: 2,
		Emails:    []string{"user3@domain"},
	}
	err = CreateUser(cluster, user3)
	if err != nil {
		panic(err)
	}

	user, err := GetUser(cluster, user1.Id)
	if err != nil {
		panic(err)
	}

	users, err := GetUsers(cluster, 1)
	if err != nil {
		panic(err)
	}

	fmt.Println(user)
	fmt.Println(users[0], users[1])
	// Output: user1
	// user1 user2
}

const sqlFuncs = `
CREATE SEQUENCE ?shard.id_seq;

-- _next_id returns unique sortable id.
CREATE FUNCTION ?shard._next_id(tm timestamptz, shard_id int, seq_id bigint)
RETURNS bigint AS $$
DECLARE
  max_shard_id CONSTANT bigint := 2048;
  max_seq_id CONSTANT bigint := 4096;
  id bigint;
BEGIN
  shard_id := shard_id % max_shard_id;
  seq_id := seq_id % max_seq_id;
  id := (floor(extract(epoch FROM tm) * 1000)::bigint - ?epoch) << 23;
  id := id | (shard_id << 12);
  id := id | seq_id;
  RETURN id;
END;
$$
LANGUAGE plpgsql
IMMUTABLE;

CREATE FUNCTION ?shard.next_id()
RETURNS bigint AS $$
BEGIN
   RETURN ?shard._next_id(clock_timestamp(), ?shard_id, nextval('?shard.id_seq'));
END;
$$
LANGUAGE plpgsql;
`

Howto

Please use Golang PostgreSQL client docs to get the idea how to use this package.

Documentation ¶

Index ¶

Variables
func MaxId(tm time.Time) int64
func MinId(tm time.Time) int64
func SplitId(id int64) (tm time.Time, shardId int64, seqId int64)
type Cluster
- func NewCluster(dbs []*pg.DB, nshards int) *Cluster
- func NewClusterWithGen(dbs []*pg.DB, nshards int, gen *IdGen) *Cluster
type IdGen
- func NewIdGen(timeBits, shardBits, seqBits uint, epoch time.Time) *IdGen
type ShardIdGen
- func NewShardIdGen(shard int64, gen *IdGen) *ShardIdGen
type SubCluster
type UUID
- func NewUUID(shardId int64, tm time.Time) UUID
- func ParseUUID(b []byte) (UUID, error)

Examples ¶

Cluster

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	DefaultIdGen = NewIdGen(41, 11, 12, epoch)
)

Functions ¶

func MaxId ¶

func MaxId(tm time.Time) int64

MaxId returns max id for the time.

func MinId ¶

func MinId(tm time.Time) int64

MinId returns min id for the time.

func SplitId ¶

func SplitId(id int64) (tm time.Time, shardId int64, seqId int64)

SplitId splits id into time, shard id, and sequence id.

Types ¶

type Cluster ¶

type Cluster struct {
	// contains filtered or unexported fields
}

Cluster maps many (up to 2048) logical database shards implemented using PostgreSQL schemas to far fewer physical PostgreSQL servers.

Example ¶

package main

import (
	"fmt"

	"github.com/go-pg/sharding"

	"github.com/go-pg/pg"
)

// Users are sharded by AccountId, i.e. users with same account id are
// placed on same shard.
type User struct {
	tableName string `sql:"?shard.users"`

	Id        int64
	AccountId int64
	Name      string
	Emails    []string
}

func (u User) String() string {
	return u.Name
}

// CreateUser picks shard by account id and creates user in the shard.
func CreateUser(cluster *sharding.Cluster, user *User) error {
	return cluster.Shard(user.AccountId).Insert(user)
}

// GetUser splits shard from user id and fetches user from the shard.
func GetUser(cluster *sharding.Cluster, id int64) (*User, error) {
	var user User
	err := cluster.SplitShard(id).Model(&user).Where("id = ?", id).Select()
	return &user, err
}

// GetUsers picks shard by account id and fetches users from the shard.
func GetUsers(cluster *sharding.Cluster, accountId int64) ([]User, error) {
	var users []User
	err := cluster.Shard(accountId).Model(&users).Where("account_id = ?", accountId).Select()
	return users, err
}

// createShard creates database schema for a given shard.
func createShard(shard *pg.DB) error {
	queries := []string{
		`DROP SCHEMA IF EXISTS ?shard CASCADE`,
		`CREATE SCHEMA ?shard`,
		sqlFuncs,
		`CREATE TABLE ?shard.users (id bigint DEFAULT ?shard.next_id(), account_id int, name text, emails jsonb)`,
	}

	for _, q := range queries {
		_, err := shard.Exec(q)
		if err != nil {
			return err
		}
	}

	return nil
}

func main() {
	db := pg.Connect(&pg.Options{
		User: "postgres",
	})

	dbs := []*pg.DB{db} // list of physical PostgreSQL servers
	nshards := 2        // 2 logical shards
	// Create cluster with 1 physical server and 2 logical shards.
	cluster := sharding.NewCluster(dbs, nshards)

	// Create database schema for our logical shards.
	for i := 0; i < nshards; i++ {
		if err := createShard(cluster.Shard(int64(i))); err != nil {
			panic(err)
		}
	}

	// user1 will be created in shard1 because AccountId % nshards = shard1.
	user1 := &User{
		Name:      "user1",
		AccountId: 1,
		Emails:    []string{"user1@domain"},
	}
	err := CreateUser(cluster, user1)
	if err != nil {
		panic(err)
	}

	// user2 will be created in shard1 too AccountId is the same.
	user2 := &User{
		Name:      "user2",
		AccountId: 1,
		Emails:    []string{"user2@domain"},
	}
	err = CreateUser(cluster, user2)
	if err != nil {
		panic(err)
	}

	// user3 will be created in shard0 because AccountId % nshards = shard0.
	user3 := &User{
		Name:      "user3",
		AccountId: 2,
		Emails:    []string{"user3@domain"},
	}
	err = CreateUser(cluster, user3)
	if err != nil {
		panic(err)
	}

	user, err := GetUser(cluster, user1.Id)
	if err != nil {
		panic(err)
	}

	users, err := GetUsers(cluster, 1)
	if err != nil {
		panic(err)
	}

	fmt.Println(user)
	fmt.Println(users[0], users[1])
}

const sqlFuncs = `
CREATE SEQUENCE ?shard.id_seq;

-- _next_id returns unique sortable id.
CREATE FUNCTION ?shard._next_id(tm timestamptz, shard_id int, seq_id bigint)
RETURNS bigint AS $$
DECLARE
  our_epoch CONSTANT bigint := 1262304000000;
  max_shard_id CONSTANT bigint := 2048;
  max_seq_id CONSTANT bigint := 4096;
  id bigint;
BEGIN
  shard_id := shard_id % max_shard_id;
  seq_id := seq_id % max_seq_id;
  id := (floor(extract(epoch FROM tm) * 1000)::bigint - our_epoch) << 23;
  id := id | (shard_id << 12);
  id := id | seq_id;
  RETURN id;
END;
$$
LANGUAGE plpgsql
IMMUTABLE;

CREATE FUNCTION ?shard.next_id()
RETURNS bigint AS $$
BEGIN
   RETURN ?shard._next_id(clock_timestamp(), ?shard_id, nextval('?shard.id_seq'));
END;
$$
LANGUAGE plpgsql;
`

Output:

user1
user1 user2

func NewCluster ¶

func NewCluster(dbs []*pg.DB, nshards int) *Cluster

func NewClusterWithGen ¶

func NewClusterWithGen(dbs []*pg.DB, nshards int, gen *IdGen) *Cluster

NewClusterWithGen returns new PostgreSQL cluster consisting of physical dbs and running nshards logical shards.

func (*Cluster) Close ¶

func (cl *Cluster) Close() error

func (*Cluster) DB ¶

func (cl *Cluster) DB(number int64) *pg.DB

DB maps the number to the corresponding database server.

func (*Cluster) DBs ¶

func (cl *Cluster) DBs() []*pg.DB

DBs returns list of database servers in the cluster.

func (*Cluster) ForEachDB ¶

func (cl *Cluster) ForEachDB(fn func(db *pg.DB) error) error

ForEachDB concurrently calls the fn on each database in the cluster.

func (*Cluster) ForEachNShards ¶

func (cl *Cluster) ForEachNShards(n int, fn func(shard *pg.DB) error) error

ForEachNShards concurrently calls the fn on each N shards in the cluster.

func (*Cluster) ForEachShard ¶

func (cl *Cluster) ForEachShard(fn func(shard *pg.DB) error) error

ForEachShard concurrently calls the fn on each shard in the cluster. It is the same as ForEachNShards(1, fn).

func (*Cluster) Shard ¶

func (cl *Cluster) Shard(number int64) *pg.DB

Shard maps the number to the corresponding shard in the cluster.

func (*Cluster) Shards ¶

func (cl *Cluster) Shards(db *pg.DB) []*pg.DB

Shards returns list of shards running in the db. If db is nil all shards are returned.

func (*Cluster) SplitShard ¶

func (cl *Cluster) SplitShard(id int64) *pg.DB

SplitShard uses SplitId to extract shard id from the id and then returns corresponding Shard in the cluster.

func (*Cluster) SubCluster ¶

func (cl *Cluster) SubCluster(number int64, size int) *SubCluster

SubCluster returns a subset of the cluster of the given size.

type IdGen ¶

type IdGen struct {
	// contains filtered or unexported fields
}

func NewIdGen ¶

func NewIdGen(timeBits, shardBits, seqBits uint, epoch time.Time) *IdGen

func (*IdGen) MaxId ¶

func (g *IdGen) MaxId(tm time.Time, shard int64) int64

MaxId returns max id for the time.

func (*IdGen) NextId ¶

func (g *IdGen) NextId(tm time.Time, shard, seq int64) int64

NextId returns incremental id for the time. Note that you can only generate 4096 unique numbers per millisecond.

func (*IdGen) NumShards ¶

func (g *IdGen) NumShards() int

func (*IdGen) SplitId ¶

func (g *IdGen) SplitId(id int64) (tm time.Time, shardId int64, seqId int64)

SplitId splits id into time, shard id, and sequence id.

type ShardIdGen ¶

type ShardIdGen struct {
	// contains filtered or unexported fields
}

IdGen generates sortable unique int64 numbers that consist of: - 41 bits for time in milliseconds. - 11 bits for shard id. - 12 bits for auto-incrementing sequence.

As a result we can generate 4096 ids per millisecond for each of 2048 shards. Minimum supported time is 1975-02-28, maximum is 2044-12-31.

func NewShardIdGen ¶

func NewShardIdGen(shard int64, gen *IdGen) *ShardIdGen

NewShardIdGen returns id generator for the shard.

func (*ShardIdGen) MaxId ¶

func (g *ShardIdGen) MaxId(tm time.Time) int64

MaxId returns max id for the time.

func (*ShardIdGen) NextId ¶

func (g *ShardIdGen) NextId(tm time.Time) int64

NextId returns incremental id for the time. Note that you can only generate 4096 unique numbers per millisecond.

func (*ShardIdGen) SplitId ¶

func (g *ShardIdGen) SplitId(id int64) (tm time.Time, shardId int64, seqId int64)

SplitId splits id into time, shard id, and sequence id.

type SubCluster ¶

type SubCluster struct {
	// contains filtered or unexported fields
}

SubCluster is a subset of the cluster.

func (*SubCluster) ForEachNShards ¶

func (cl *SubCluster) ForEachNShards(n int, fn func(shard *pg.DB) error) error

ForEachNShards concurrently calls the fn on each N shards in the subcluster.

func (*SubCluster) ForEachShard ¶

func (cl *SubCluster) ForEachShard(fn func(shard *pg.DB) error) error

ForEachShard concurrently calls the fn on each shard in the subcluster. It is the same as ForEachNShards(1, fn).

func (*SubCluster) Shard ¶

func (cl *SubCluster) Shard(number int64) *pg.DB

Shard maps the number to the corresponding shard in the subscluster.

func (*SubCluster) SplitShard ¶

func (cl *SubCluster) SplitShard(id int64) *pg.DB

SplitShard uses SplitId to extract shard id from the id and then returns corresponding Shard in the subcluster.

type UUID ¶

type UUID [uuidLen]byte

func NewUUID ¶

func NewUUID(shardId int64, tm time.Time) UUID

func ParseUUID ¶

func ParseUUID(b []byte) (UUID, error)

func (UUID) AppendValue ¶

func (u UUID) AppendValue(b []byte, quote int) ([]byte, error)

func (*UUID) IsZero ¶

func (u *UUID) IsZero() bool

func (*UUID) Scan ¶

func (u *UUID) Scan(b interface{}) error

func (*UUID) ShardId ¶

func (u *UUID) ShardId() int64

func (*UUID) Split ¶

func (u *UUID) Split() (shardId int64, tm time.Time)

func (UUID) String ¶

func (u UUID) String() string

func (*UUID) Time ¶

func (u *UUID) Time() time.Time

func (UUID) Value ¶

func (u UUID) Value() (driver.Value, error)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL