storage

package
v0.0.0-...-feafc9d Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 23, 2015 License: Apache-2.0 Imports: 33 Imported by: 0

Documentation

Overview

Package storage provides access to the Store and Range abstractions. Each Cockroach node handles one or more stores, each of which multiplexes to one or more ranges, identified by [start, end) keys. Ranges are contiguous regions of the keyspace. Each range implements an instance of the Raft consensus algorithm to synchronize participating range replicas.

Each store is represented by a single engine.Engine instance. The ranges hosted by a store all have access to the same engine, but write to only a range-limited keyspace within it. Ranges access the underlying engine via the MVCC interface, which provides historical versioned values.

Package storage is a generated protocol buffer package.

It is generated from these files:
	cockroach/storage/status.proto

It has these top-level messages:
	StoreStatus
Example (Rebalancing)
// Model a set of stores in a cluster,
// randomly adding / removing stores and adding bytes.
g := gossip.New(nil, 0, nil)
stopper := stop.NewStopper()
defer stopper.Stop()
sp := NewStorePool(g, TestTimeUntilStoreDeadOff, stopper)
alloc := MakeAllocator(sp, RebalancingOptions{AllowRebalance: true, Deterministic: true})
alloc.randGen = rand.New(rand.NewSource(0))

var wg sync.WaitGroup
g.RegisterCallback(gossip.MakePrefixPattern(gossip.KeyStorePrefix), func(_ string, _ []byte) { wg.Done() })

const generations = 100
const nodes = 20

// Initialize testStores.
var testStores [nodes]testStore
for i := 0; i < len(testStores); i++ {
	testStores[i].StoreID = roachpb.StoreID(i)
	testStores[i].Node = roachpb.NodeDescriptor{NodeID: roachpb.NodeID(i)}
	testStores[i].Capacity = roachpb.StoreCapacity{Capacity: 1 << 30, Available: 1 << 30}
}
// Initialize the cluster with a single range.
testStores[0].add(alloc.randGen.Int63n(1 << 20))

for i := 0; i < generations; i++ {
	// First loop through test stores and add data.
	wg.Add(len(testStores))
	for j := 0; j < len(testStores); j++ {
		// Add a pretend range to the testStore if there's already one.
		if testStores[j].Capacity.RangeCount > 0 {
			testStores[j].add(alloc.randGen.Int63n(1 << 20))
		}
		key := gossip.MakeStoreKey(roachpb.StoreID(j))
		if err := g.AddInfoProto(key, &testStores[j].StoreDescriptor, 0); err != nil {
			panic(err)
		}
	}
	wg.Wait()

	// Next loop through test stores and maybe rebalance.
	for j := 0; j < len(testStores); j++ {
		ts := &testStores[j]
		if alloc.ShouldRebalance(ts.StoreID) {
			target := alloc.RebalanceTarget(ts.StoreID, roachpb.Attributes{}, []roachpb.ReplicaDescriptor{{NodeID: ts.Node.NodeID, StoreID: ts.StoreID}})
			if target != nil {
				testStores[j].rebalance(&testStores[int(target.StoreID)], alloc.randGen.Int63n(1<<20))
			}
		}
	}

	// Output store capacities as hexidecimal 2-character values.
	if i%(generations/50) == 0 {
		var maxBytes int64
		for j := 0; j < len(testStores); j++ {
			bytes := testStores[j].Capacity.Capacity - testStores[j].Capacity.Available
			if bytes > maxBytes {
				maxBytes = bytes
			}
		}
		if maxBytes > 0 {
			for j := 0; j < len(testStores); j++ {
				endStr := " "
				if j == len(testStores)-1 {
					endStr = ""
				}
				bytes := testStores[j].Capacity.Capacity - testStores[j].Capacity.Available
				fmt.Printf("%03d%s", (999*bytes)/maxBytes, endStr)
			}
			fmt.Printf("\n")
		}
	}
}

var totBytes int64
var totRanges int32
for i := 0; i < len(testStores); i++ {
	totBytes += testStores[i].Capacity.Capacity - testStores[i].Capacity.Available
	totRanges += testStores[i].Capacity.RangeCount
}
fmt.Printf("Total bytes=%d, ranges=%d\n", totBytes, totRanges)
Output:

138 000 000 000 000 000 000 999 000 000 000 000 000 000 000 000 000 000 000 000
922 319 000 000 000 239 000 999 000 000 000 000 000 214 073 000 000 000 190 000
999 505 480 000 634 352 421 644 212 331 396 144 000 242 419 275 000 000 727 028
999 678 908 705 350 558 549 714 651 824 895 694 000 373 610 490 372 106 492 796
932 701 763 999 660 706 571 702 787 945 848 678 062 692 762 413 603 252 513 882
937 656 875 984 734 717 676 685 910 895 847 841 349 754 864 463 722 377 655 999
885 701 805 999 647 744 802 659 778 834 830 725 569 761 922 587 684 458 693 935
813 650 709 931 583 733 843 619 793 881 768 658 565 713 956 598 733 594 656 999
873 727 721 999 544 812 848 666 817 943 831 658 556 769 927 554 799 733 670 869
937 765 827 999 543 875 907 670 929 997 913 768 621 853 922 618 878 832 733 937
902 819 744 988 547 904 922 688 879 999 812 710 554 789 890 591 808 865 658 932
870 873 846 997 596 937 899 765 864 969 855 751 577 824 951 579 858 908 653 999
880 833 856 999 640 918 932 774 920 930 869 739 686 784 853 553 885 941 685 986
874 797 808 999 645 925 928 781 920 956 859 762 678 761 819 627 899 941 725 959
886 801 835 999 638 984 927 825 968 958 860 760 813 716 800 638 908 908 798 945
860 840 836 973 634 999 944 834 977 923 848 769 846 728 836 605 865 915 781 896
859 864 891 993 633 961 999 863 951 885 857 791 864 731 871 656 868 920 790 879
866 845 896 999 688 966 998 904 942 864 861 815 867 756 879 704 919 940 804 888
825 850 876 983 712 945 999 885 943 870 854 838 848 771 825 701 939 940 809 885
821 872 915 999 711 927 968 928 963 898 846 865 863 814 858 719 935 951 818 877
829 868 940 999 729 919 938 911 957 905 846 872 860 844 872 724 920 941 844 832
825 848 901 999 736 882 911 926 937 935 876 901 824 870 892 714 902 927 844 846
837 861 921 999 872 890 875 911 894 939 868 921 871 894 887 740 905 948 881 879
876 879 956 999 893 889 875 910 910 953 880 898 900 895 905 736 920 965 918 916
921 897 909 999 924 907 895 936 955 974 901 902 933 937 929 763 909 997 914 944
930 882 892 995 925 910 907 942 911 952 915 922 936 911 944 819 891 999 906 953
904 867 889 989 913 890 877 932 931 937 936 927 939 915 936 843 901 999 937 915
936 916 872 937 920 901 900 917 928 972 949 936 917 923 934 897 896 999 913 907
979 963 909 954 923 950 953 942 969 999 975 970 925 942 971 948 933 993 958 952
978 950 903 942 905 949 937 927 948 991 980 948 921 929 932 946 920 999 942 937
962 958 937 947 910 967 949 934 945 983 966 951 917 918 957 965 923 999 961 957
972 953 942 921 891 955 949 921 964 977 969 951 927 930 953 953 928 999 950 936
965 978 953 928 896 963 964 928 983 986 948 972 963 921 953 970 928 999 937 941
961 964 936 930 930 969 964 930 964 973 965 983 974 928 958 969 949 999 941 961
977 963 931 955 959 979 970 942 941 965 963 980 951 948 966 971 954 999 962 950
988 966 932 960 951 973 978 942 953 964 967 989 941 959 986 956 950 999 962 954
989 950 955 951 940 985 964 958 941 982 970 981 946 968 999 965 959 984 979 939
976 959 962 956 959 975 955 974 966 988 980 991 956 967 992 953 961 999 959 925
983 979 962 973 970 983 962 970 997 999 987 997 970 969 997 973 972 996 981 940
980 959 958 968 954 983 952 966 973 974 978 981 975 947 995 977 956 999 972 932
960 973 955 982 956 970 944 947 979 978 985 977 962 939 999 986 945 995 965 922
956 983 941 982 957 958 945 950 985 973 987 986 944 947 984 999 948 959 961 914
970 972 946 968 968 978 951 945 980 973 988 981 956 957 967 999 955 945 966 913
967 976 943 974 973 969 951 948 994 963 986 984 949 958 984 999 951 945 980 926
960 955 933 954 973 964 943 929 973 943 988 991 948 959 981 999 950 951 978 927
954 948 912 953 960 968 937 931 977 936 979 978 945 950 970 999 945 949 971 925
953 965 922 947 963 966 933 919 992 943 973 991 933 953 975 999 949 947 970 907
951 983 923 958 979 972 945 936 999 941 982 987 946 960 973 975 959 940 970 925
954 982 909 948 973 967 945 938 999 950 994 976 957 960 970 968 964 952 966 932
956 964 896 944 963 953 924 940 999 936 981 961 939 956 965 951 937 937 959 908
Total bytes=1042395713, ranges=1919

Index

Examples

Constants

View Source
const (
	// TestTimeUntilStoreDead is the the test value for TimeUntilStoreDead to
	// quickly mark stores as dead.
	TestTimeUntilStoreDead = 5 * time.Millisecond

	// TestTimeUntilStoreDeadOff is the test value for TimeUntilStoreDead that
	// prevents the store pool from marking stores as dead.
	TestTimeUntilStoreDeadOff = 24 * time.Hour
)
View Source
const (
	// DefaultHeartbeatInterval is how often heartbeats are sent from the
	// transaction coordinator to a live transaction. These keep it from
	// being preempted by other transactions writing the same keys. If a
	// transaction fails to be heartbeat within 2x the heartbeat interval,
	// it may be aborted by conflicting txns.
	DefaultHeartbeatInterval = 5 * time.Second
)
View Source
const (

	// DefaultLeaderLeaseDuration is the default duration of the leader lease.
	DefaultLeaderLeaseDuration = time.Second
)

raftInitialLogIndex is the starting point for the raft log. We bootstrap the raft membership by synthesizing a snapshot as if there were some discarded prefix to the log, so we must begin the log at an arbitrary index greater than 1.

View Source
const (
	// GCResponseCacheExpiration is the expiration duration for response
	// cache entries.
	GCResponseCacheExpiration = 1 * time.Hour
)
View Source
const (
	// MinTSCacheWindow specifies the minimum duration to hold entries in
	// the cache before allowing eviction. After this window expires,
	// transactions writing to this node with timestamps lagging by more
	// than minCacheWindow will necessarily have to advance their commit
	// timestamp.
	MinTSCacheWindow = 10 * time.Second
)
View Source
const (

	// ReplicaGCQueueInactivityThreshold is the inactivity duration after which
	// a range will be considered for garbage collection. Exported for testing.
	ReplicaGCQueueInactivityThreshold = 10 * 24 * time.Hour // 10 days
)

Variables

View Source
var (
	ErrInvalidLengthStatus = fmt.Errorf("proto: negative length found during unmarshaling")
	ErrIntOverflowStatus   = fmt.Errorf("proto: integer overflow")
)
View Source
var (

	// TestStoreContext has some fields initialized with values relevant
	// in tests.
	TestStoreContext = StoreContext{
		RaftTickInterval:           100 * time.Millisecond,
		RaftHeartbeatIntervalTicks: 1,
		RaftElectionTimeoutTicks:   2,
		ScanInterval:               10 * time.Minute,
	}
)
View Source
var TestingCommandFilter func(roachpb.Request, roachpb.Header) error

TestingCommandFilter may be set in tests to intercept the handling of commands and artificially generate errors. Return nil to continue with regular processing or non-nil to terminate processing with the returned error. Note that in a multi-replica test this filter will be run once for each replica and must produce consistent results each time. Should only be used in tests in the storage and storage_test packages.

Functions

func DeleteRange

func DeleteRange(txn *client.Txn, b *client.Batch, key roachpb.RKey) error

DeleteRange removes a range from the RangeTree. This should only be called from operations that remove ranges, such as AdminMerge.

func InsertRange

func InsertRange(txn *client.Txn, b *client.Batch, key roachpb.RKey) error

InsertRange adds a new range to the RangeTree. This should only be called from operations that create new ranges, such as AdminSplit.

func ProcessStoreEvent

func ProcessStoreEvent(l StoreEventListener, event interface{})

ProcessStoreEvent dispatches an event on the StoreEventListener.

func SetupRangeTree

func SetupRangeTree(batch engine.Engine, ms *engine.MVCCStats, timestamp roachpb.Timestamp, startKey roachpb.RKey) error

SetupRangeTree creates a new RangeTree. This should only be called as part of store.BootstrapRange.

Types

type Allocator

type Allocator struct {
	// contains filtered or unexported fields
}

Allocator makes allocation decisions based on available capacity in other stores which match the required attributes for a desired range replica.

When choosing a new allocation target, three candidates from available stores meeting a max fraction of bytes used threshold (maxFractionUsedThreshold) are chosen at random and the least loaded of the three is selected in order to bias loading towards a more balanced cluster, while still spreading load over all available servers. "Load" is defined according to fraction of bytes used, if greater than minFractionUsedThreshold; otherwise it's defined according to range count.

When choosing a rebalance target, a random store is selected from amongst the set of stores with fraction of bytes within rebalanceFromMean from the mean.

func MakeAllocator

func MakeAllocator(storePool *StorePool, options RebalancingOptions) Allocator

MakeAllocator creates a new allocator using the specified StorePool.

func (*Allocator) AllocateTarget

func (a *Allocator) AllocateTarget(required roachpb.Attributes, existing []roachpb.ReplicaDescriptor, relaxConstraints bool,
	filter func(storeDesc *roachpb.StoreDescriptor, count, used *stat) bool) (*roachpb.StoreDescriptor, error)

AllocateTarget returns a suitable store for a new allocation with the required attributes. Nodes already accommodating existing replicas are ruled out as targets. If relaxConstraints is true, then the required attributes will be relaxed as necessary, from least specific to most specific, in order to allocate a target. If needed, a filter function can be added that further filter the results. The function will be passed the storeDesc and the used and new counts. It returns a bool indicating inclusion or exclusion from the set of stores being considered.

func (*Allocator) ComputeAction

func (a *Allocator) ComputeAction(zone config.ZoneConfig, desc *roachpb.RangeDescriptor) (
	AllocatorAction, float64)

ComputeAction determines the exact operation needed to repair the supplied range, as governed by the supplied zone configuration. It returns the required action that should be taken and a replica on which the action should be performed.

func (Allocator) RebalanceTarget

func (a Allocator) RebalanceTarget(storeID roachpb.StoreID, required roachpb.Attributes, existing []roachpb.ReplicaDescriptor) *roachpb.StoreDescriptor

RebalanceTarget returns a suitable store for a rebalance target with required attributes. Rebalance targets are selected via the same mechanism as AllocateTarget(), except the chosen target must follow some additional criteria. Namely, if chosen, it must further the goal of balancing the cluster.

The supplied parameters are the StoreID of the replica being rebalanced, the required attributes for the replica being rebalanced, and a list of the existing replicas of the range (which must include the replica being rebalanced).

Simply ignoring a rebalance opportunity in the event that the target chosen by AllocateTarget() doesn't fit balancing criteria is perfectly fine, as other stores in the cluster will also be doing their probabilistic best to rebalance. This helps prevent a stampeding herd targeting an abnormally under-utilized store.

func (Allocator) RemoveTarget

func (a Allocator) RemoveTarget(existing []roachpb.ReplicaDescriptor) (roachpb.ReplicaDescriptor, error)

RemoveTarget returns a suitable replica to remove from the provided replica set. It attempts to consider which of the provided replicas would be the best candidate for removal.

TODO(mrtracy): removeTarget eventually needs to accept the attributes from the zone config associated with the provided replicas. This will allow it to make correct decisions in the case of ranges with heterogeneous replica requirements (i.e. multiple data centers).

func (Allocator) ShouldRebalance

func (a Allocator) ShouldRebalance(storeID roachpb.StoreID) bool

ShouldRebalance returns whether the specified store should attempt to rebalance a replica to another store.

type AllocatorAction

type AllocatorAction int

AllocatorAction enumerates the various replication adjustments that may be recommended by the allocator.

const (
	AllocatorNoop AllocatorAction
	AllocatorRemove
	AllocatorAdd
	AllocatorRemoveDead
)

These are the possible allocator actions.

type BeginScanRangesEvent

type BeginScanRangesEvent struct {
	StoreID roachpb.StoreID
}

BeginScanRangesEvent occurs when the store is about to scan over all ranges. During such a scan, each existing range will be published to the feed as a RegisterRangeEvent with the Scan flag set. This is used because downstream consumers may be tracking statistics via the Deltas in UpdateRangeEvent; this event informs subscribers to clear currently cached values.

type CommandQueue

type CommandQueue struct {
	// contains filtered or unexported fields
}

A CommandQueue maintains an interval tree of keys or key ranges for executing commands. New commands affecting keys or key ranges must wait on already-executing commands which overlap their key range.

Before executing, a command invokes GetWait() to initialize a WaitGroup with the number of overlapping commands which are already running. The wait group is waited on by the caller for confirmation that all overlapping, pending commands have completed and the pending command can proceed.

After waiting, a command is added to the queue's already-executing set via Add(). Add accepts a parameter indicating whether the command is read-only. Read-only commands don't need to wait on other read-only commands, so the wait group returned via GetWait() doesn't include read-only on read-only overlapping commands as an optimization.

Once commands complete, Remove() is invoked to remove the executing command and decrement the counts on any pending WaitGroups, possibly signaling waiting commands who were gated by the executing command's affected key(s).

CommandQueue is not thread safe.

func NewCommandQueue

func NewCommandQueue() *CommandQueue

NewCommandQueue returns a new command queue.

func (*CommandQueue) Add

func (cq *CommandQueue) Add(readOnly bool, spans ...roachpb.Span) []interface{}

Add adds commands to the queue which affect the specified key ranges. Ranges without an end key affect only the start key. The returned interface is the key for the command queue and must be re-supplied on subsequent invocation of Remove().

Add should be invoked after waiting on already-executing, overlapping commands via the WaitGroup initialized through GetWait().

func (*CommandQueue) Clear

func (cq *CommandQueue) Clear()

Clear removes all executing commands, signaling any waiting commands.

func (*CommandQueue) GetWait

func (cq *CommandQueue) GetWait(readOnly bool, wg *sync.WaitGroup, spans ...roachpb.Span)

GetWait initializes the supplied wait group with the number of executing commands which overlap the specified key ranges. If an end key is empty, it only affects the start key. The caller should call wg.Wait() to wait for confirmation that all gating commands have completed or failed, and then call Add() to add the keys to the command queue. readOnly is true if the requester is a read-only command; false for read-write.

func (*CommandQueue) Remove

func (cq *CommandQueue) Remove(keys []interface{})

Remove is invoked to signal that the command associated with the specified key has completed and should be removed. Any pending commands waiting on this command will be signaled if this is the only command upon which they are still waiting.

Remove is invoked after a mutating command has been committed to the Raft log and applied to the underlying state machine. Similarly, Remove is invoked after a read-only command has been executed against the underlying state machine.

type EndScanRangesEvent

type EndScanRangesEvent struct {
	StoreID roachpb.StoreID
}

EndScanRangesEvent occurs when the store has finished scanning all ranges. Every BeginScanRangeEvent will eventually be followed by an EndScanRangeEvent.

type MergeRangeEvent

type MergeRangeEvent struct {
	StoreID roachpb.StoreID
	Merged  UpdateRangeEvent
	Removed RemoveRangeEvent
}

MergeRangeEvent occurs whenever a range is merged into another. This Event contains two component events: an UpdateRangeEvent for the range which subsumed the other, and a RemoveRangeEvent for the range that was subsumed.

type NotBootstrappedError

type NotBootstrappedError struct{}

A NotBootstrappedError indicates that an engine has not yet been bootstrapped due to a store identifier not being present.

func (*NotBootstrappedError) Error

func (e *NotBootstrappedError) Error() string

Error formats error.

type RangeManager

type RangeManager interface {
	// Accessors for shared state.
	ClusterID() string
	StoreID() roachpb.StoreID
	Clock() *hlc.Clock
	Engine() engine.Engine
	DB() *client.DB

	Gossip() *gossip.Gossip

	Stopper() *stop.Stopper
	EventFeed() StoreEventFeed
	Context(context.Context) context.Context

	// Range and replica manipulation methods.
	LookupReplica(start, end roachpb.RKey) *Replica
	GetReplica(rangeID roachpb.RangeID) (*Replica, error)
	MergeRange(subsumingRng *Replica, updatedEndKey roachpb.RKey, subsumedRangeID roachpb.RangeID) error
	NewRangeDescriptor(start, end roachpb.RKey, replicas []roachpb.ReplicaDescriptor) (*roachpb.RangeDescriptor, error)
	NewSnapshot() engine.Engine
	ProposeRaftCommand(cmdIDKey, roachpb.RaftCommand) <-chan error
	RemoveReplica(rng *Replica) error
	Tracer() *tracer.Tracer
	SplitRange(origRng, newRng *Replica) error
	// contains filtered or unexported methods
}

A RangeManager is an interface satisfied by Store through which ranges contained in the store can access the methods required for splitting. TODO(tschottdorf): consider moving LocalSender to storage, in which case this can be unexported.

type RebalancingOptions

type RebalancingOptions struct {
	// AllowRebalance allows this store to attempt to rebalance its own
	// replicas to other stores.
	AllowRebalance bool

	// Deterministic makes rebalance decisions deterministic, based on
	// current cluster statistics. If this flag is not set, rebalance operations
	// will have random behavior. This flag is intended to be set for testing
	// purposes only.
	Deterministic bool
}

RebalancingOptions are configurable options which effect the way that the replicate queue will handle rebalancing opportunities.

type RegisterRangeEvent

type RegisterRangeEvent struct {
	StoreID roachpb.StoreID
	Desc    *roachpb.RangeDescriptor
	Stats   engine.MVCCStats
	Scan    bool
}

RegisterRangeEvent occurs in two scenarios. Firstly, while a store broadcasts its list of ranges to initialize one or more new accumulators (with Scan set to true), or secondly, when a new range is initialized on the store (for example through replication), with Scan set to false. This event includes the Range's RangeDescriptor and current MVCCStats.

type RemoveRangeEvent

type RemoveRangeEvent struct {
	StoreID roachpb.StoreID
	Desc    *roachpb.RangeDescriptor
	Stats   engine.MVCCStats
}

RemoveRangeEvent occurs whenever a Range is removed from a store. This structure includes the Range's RangeDescriptor and the Range's previous MVCCStats before it was removed.

type Replica

type Replica struct {
	sync.RWMutex // Protects the following fields:
	// contains filtered or unexported fields
}

A Replica is a contiguous keyspace with writes managed via an instance of the Raft consensus algorithm. Many ranges may exist in a store and they are unlikely to be contiguous. Ranges are independent units and are responsible for maintaining their own integrity by replacing failed replicas, splitting and merging as appropriate.

func NewReplica

func NewReplica(desc *roachpb.RangeDescriptor, rm RangeManager) (*Replica, error)

NewReplica initializes the replica using the given metadata.

func (*Replica) AdminMerge

AdminMerge extends this range to subsume the range that comes next in the key space. The merge is performed inside of a distributed transaction which writes the updated range descriptor for the subsuming range and deletes the range descriptor for the subsumed one. It also updates the range addressing metadata. The handover of responsibility for the reassigned key range is carried out seamlessly through a merge trigger carried out as part of the commit of that transaction. A merge requires that the two ranges are collocated on the same set of replicas.

The supplied RangeDescriptor is used as a form of optimistic lock. See the comment of "AdminSplit" for more information on this pattern.

func (*Replica) AdminSplit

AdminSplit divides the range into into two ranges, using either args.SplitKey (if provided) or an internally computed key that aims to roughly equipartition the range by size. The split is done inside of a distributed txn which writes updated and new range descriptors, and updates the range addressing metadata. The handover of responsibility for the reassigned key range is carried out seamlessly through a split trigger carried out as part of the commit of that transaction.

The supplied RangeDescriptor is used as a form of optimistic lock. An operation which might split a range should obtain a copy of the range's current descriptor before making the decision to split. If the decision is affirmative the descriptor is passed to AdminSplit, which performs a Conditional Put on the RangeDescriptor to ensure that no other operation has modified the range in the time the decision was being made. TODO(tschottdorf): should assert that split key is not a local key.

func (*Replica) Append

func (r *Replica) Append(entries []raftpb.Entry) error

Append implements the multiraft.WriteableGroupStorage interface.

func (*Replica) ApplySnapshot

func (r *Replica) ApplySnapshot(snap raftpb.Snapshot) error

ApplySnapshot implements the multiraft.WriteableGroupStorage interface.

func (*Replica) ChangeReplicas

func (r *Replica) ChangeReplicas(changeType roachpb.ReplicaChangeType, replica roachpb.ReplicaDescriptor, desc *roachpb.RangeDescriptor) error

ChangeReplicas adds or removes a replica of a range. The change is performed in a distributed transaction and takes effect when that transaction is committed. When removing a replica, only the NodeID and StoreID fields of the Replica are used.

The supplied RangeDescriptor is used as a form of optimistic lock. See the comment of "AdminSplit" for more information on this pattern.

func (*Replica) ConditionalPut

ConditionalPut sets the value for a specified key only if the expected value matches. If not, the return value contains the actual value.

func (*Replica) ContainsKey

func (r *Replica) ContainsKey(key roachpb.Key) bool

ContainsKey returns whether this range contains the specified key.

func (*Replica) ContainsKeyRange

func (r *Replica) ContainsKeyRange(start, end roachpb.Key) bool

ContainsKeyRange returns whether this range contains the specified key range from start to end.

func (*Replica) Delete

Delete deletes the key and value specified by key.

func (*Replica) DeleteRange

DeleteRange deletes the range of key/value pairs specified by start and end keys.

func (*Replica) Desc

func (r *Replica) Desc() *roachpb.RangeDescriptor

Desc atomically returns the range's descriptor.

func (*Replica) Destroy

func (r *Replica) Destroy() error

Destroy cleans up all data associated with this range, leaving a tombstone.

func (*Replica) EndTransaction

EndTransaction either commits or aborts (rolls back) an extant transaction according to the args.Commit parameter. TODO(tschottdorf): return nil reply on any error. The error itself must be the authoritative source of information.

func (*Replica) Entries

func (r *Replica) Entries(lo, hi, maxBytes uint64) ([]raftpb.Entry, error)

Entries implements the raft.Storage interface. Note that maxBytes is advisory and this method will always return at least one entry even if it exceeds maxBytes. Passing maxBytes equal to zero disables size checking. TODO(bdarnell): consider caching for recent entries, if rocksdb's builtin caching is insufficient.

func (*Replica) FirstIndex

func (r *Replica) FirstIndex() (uint64, error)

FirstIndex implements the raft.Storage interface.

func (*Replica) GC

GC iterates through the list of keys to garbage collect specified in the arguments. MVCCGarbageCollect is invoked on each listed key along with the expiration timestamp. The GC metadata specified in the args is persisted after GC.

func (*Replica) Get

Get returns the value for a specified key.

func (*Replica) GetGCMetadata

func (r *Replica) GetGCMetadata() (*roachpb.GCMetadata, error)

GetGCMetadata reads the latest GC metadata for this range.

func (*Replica) GetLastVerificationTimestamp

func (r *Replica) GetLastVerificationTimestamp() (roachpb.Timestamp, error)

GetLastVerificationTimestamp reads the timestamp at which the range's data was last verified.

func (*Replica) GetMVCCStats

func (r *Replica) GetMVCCStats() engine.MVCCStats

GetMVCCStats returns a copy of the MVCC stats object for this range.

func (*Replica) GetMaxBytes

func (r *Replica) GetMaxBytes() int64

GetMaxBytes atomically gets the range maximum byte limit.

func (*Replica) GetReplica

func (r *Replica) GetReplica() *roachpb.ReplicaDescriptor

GetReplica returns the replica for this range from the range descriptor. Returns nil if the replica is not found.

func (*Replica) HeartbeatTxn

HeartbeatTxn updates the transaction status and heartbeat timestamp after receiving transaction heartbeat messages from coordinator. Returns the updated transaction.

func (*Replica) Increment

Increment increments the value (interpreted as varint64 encoded) and returns the newly incremented value (encoded as varint64). If no value exists for the key, zero is incremented.

func (*Replica) InitialState

func (r *Replica) InitialState() (raftpb.HardState, raftpb.ConfState, error)

InitialState implements the raft.Storage interface.

func (*Replica) IsFirstRange

func (r *Replica) IsFirstRange() bool

IsFirstRange returns true if this is the first range.

func (*Replica) LastIndex

func (r *Replica) LastIndex() (uint64, error)

LastIndex implements the raft.Storage interface.

func (*Replica) LeaderLease

LeaderLease sets the leader lease for this range. The command fails only if the desired start timestamp collides with a previous lease. Otherwise, the start timestamp is wound back to right after the expiration of the previous lease (or zero). If this range replica is already the lease holder, the expiration will be extended or shortened as indicated. For a new lease, all duties required of the range leader are commenced, including clearing the command queue and timestamp cache.

func (*Replica) Less

func (r *Replica) Less(i btree.Item) bool

Less returns true if the range's end key is less than the given item's key.

func (*Replica) Merge

Merge is used to merge a value into an existing key. Merge is an efficient accumulation operation which is exposed by RocksDB, used by Cockroach for the efficient accumulation of certain values. Due to the difficulty of making these operations transactional, merges are not currently exposed directly to clients. Merged values are explicitly not MVCC data.

func (*Replica) PushTxn

PushTxn resolves conflicts between concurrent txns (or between a non-transactional reader or writer and a txn) in several ways depending on the statuses and priorities of the conflicting transactions. The PushTxn operation is invoked by a "pusher" (the writer trying to abort a conflicting txn or the reader trying to push a conflicting txn's commit timestamp forward), who attempts to resolve a conflict with a "pushee" (args.PushTxn -- the pushee txn whose intent(s) caused the conflict). A pusher is either transactional, in which case PushTxn is completely initialized, or not, in which case the PushTxn has only the priority set.

Txn already committed/aborted: If pushee txn is committed or aborted return success.

Txn Timeout: If pushee txn entry isn't present or its LastHeartbeat timestamp isn't set, use its as LastHeartbeat. If current time - LastHeartbeat > 2 * DefaultHeartbeatInterval, then the pushee txn should be either pushed forward, aborted, or confirmed not pending, depending on value of Request.PushType.

Old Txn Epoch: If persisted pushee txn entry has a newer Epoch than PushTxn.Epoch, return success, as older epoch may be removed.

Lower Txn Priority: If pushee txn has a lower priority than pusher, adjust pushee's persisted txn depending on value of args.PushType. If args.PushType is ABORT_TXN, set txn.Status to ABORTED, and priority to one less than the pusher's priority and return success. If args.PushType is PUSH_TIMESTAMP, set txn.Timestamp to just after PushTo.

Higher Txn Priority: If pushee txn has a higher priority than pusher, return TransactionPushError. Transaction will be retried with priority one less than the pushee's higher priority.

func (*Replica) Put

Put sets the value for a specified key.

func (*Replica) RangeLookup

RangeLookup is used to look up RangeDescriptors - a RangeDescriptor is a metadata structure which describes the key range and replica locations of a distinct range in the cluster.

RangeDescriptors are stored as values in the cockroach cluster's key-value store. However, they are always stored using special "Range Metadata keys", which are "ordinary" keys with a special prefix prepended. The Range Metadata Key for an ordinary key can be generated with the `keys.RangeMetaKey(key)` function. The RangeDescriptor for the range which contains a given key can be retrieved by generating its Range Metadata Key and dispatching it to RangeLookup.

Note that the Range Metadata Key sent to RangeLookup is NOT the key at which the desired RangeDescriptor is stored. Instead, this method returns the RangeDescriptor stored at the _lowest_ existing key which is _greater_ than the given key. The returned RangeDescriptor will thus contain the ordinary key which was originally used to generate the Range Metadata Key sent to RangeLookup.

The "Range Metadata Key" for a range is built by appending the end key of the range to the respective meta prefix.

Lookups for range metadata keys usually want to read inconsistently, but some callers need a consistent result; both are supported.

This method has an important optimization in the inconsistent case: instead of just returning the request RangeDescriptor, it also returns a slice of additional range descriptors immediately consecutive to the desired RangeDescriptor. This is intended to serve as a sort of caching pre-fetch, so that the requesting nodes can aggressively cache RangeDescriptors which are likely to be desired by their current workload. The Reverse flag specifies whether descriptors are prefetched in descending or ascending order.

func (*Replica) ReplicaDescriptor

func (r *Replica) ReplicaDescriptor(replicaID roachpb.ReplicaID) (roachpb.ReplicaDescriptor, error)

ReplicaDescriptor returns information about the given member of this replica's range.

func (*Replica) ResolveIntent

ResolveIntent resolves a write intent from the specified key according to the status of the transaction which created it.

func (*Replica) ResolveIntentRange

ResolveIntentRange resolves write intents in the specified key range according to the status of the transaction which created it.

func (*Replica) ReverseScan

ReverseScan scans the key range specified by start key through end key in descending order up to some maximum number of results.

func (*Replica) Scan

Scan scans the key range specified by start key through end key in ascending order up to some maximum number of results.

func (*Replica) Send

Send adds a command for execution on this range. The command's affected keys are verified to be contained within the range and the range's leadership is confirmed. The command is then dispatched either along the read-only execution path or the read-write Raft command queue. TODO(tschottdorf): use BatchRequest w/o pointer receiver.

func (*Replica) SetHardState

func (r *Replica) SetHardState(st raftpb.HardState) error

SetHardState implements the multiraft.WriteableGroupStorage interface.

func (*Replica) SetLastVerificationTimestamp

func (r *Replica) SetLastVerificationTimestamp(timestamp roachpb.Timestamp) error

SetLastVerificationTimestamp writes the timestamp at which the range's data was last verified.

func (*Replica) SetMaxBytes

func (r *Replica) SetMaxBytes(maxBytes int64)

SetMaxBytes atomically sets the maximum byte limit before split. This value is cached by the range for efficiency.

func (*Replica) Snapshot

func (r *Replica) Snapshot() (raftpb.Snapshot, error)

Snapshot implements the raft.Storage interface.

func (*Replica) String

func (r *Replica) String() string

String returns a string representation of the range.

func (*Replica) Term

func (r *Replica) Term(i uint64) (uint64, error)

Term implements the raft.Storage interface.

func (*Replica) TruncateLog

TruncateLog discards a prefix of the raft log.

type ReplicationStatusEvent

type ReplicationStatusEvent struct {
	StoreID roachpb.StoreID

	// Per-range availability information, which is currently computed by
	// periodically polling the ranges of each store.
	// TODO(mrtracy): See if this information could be computed incrementally
	// from other events.
	LeaderRangeCount     int32
	ReplicatedRangeCount int32
	AvailableRangeCount  int32
}

ReplicationStatusEvent contains statistics on the replication status of the ranges in the store.

Because these statistics cannot currently be computed from other events, this event should be periodically broadcast by the store independently of other operations.

type ResponseCache

type ResponseCache struct {
	// contains filtered or unexported fields
}

A ResponseCache provides idempotence for request retries. Each request to a range specifies a ClientCmdID in the request header which uniquely identifies a client command. After commands have been replicated via Raft, they are executed against the state machine and the results are stored in the ResponseCache.

The ResponseCache stores responses in the underlying engine, using keys derived from the Range ID and the ClientCmdID.

A ResponseCache is not thread safe. Access to it is serialized through Raft.

func NewResponseCache

func NewResponseCache(rangeID roachpb.RangeID) *ResponseCache

NewResponseCache returns a new response cache. Every range replica maintains a response cache, not just the leader. However, when a replica loses or gains leadership of the Raft consensus group, the inflight map should be cleared.

func (*ResponseCache) ClearData

func (rc *ResponseCache) ClearData(e engine.Engine) error

ClearData removes all items stored in the persistent cache. It does not alter the inflight map.

func (*ResponseCache) CopyFrom

func (rc *ResponseCache) CopyFrom(e engine.Engine, originRangeID roachpb.RangeID) error

CopyFrom copies all the cached results from the originRangeID response cache into this one. Note that the cache will not be locked while copying is in progress. Failures decoding individual cache entries return an error. The copy is done directly using the engine instead of interpreting values through MVCC for efficiency.

func (*ResponseCache) CopyInto

func (rc *ResponseCache) CopyInto(e engine.Engine, destRangeID roachpb.RangeID) error

CopyInto copies all the cached results from this response cache into the destRangeID response cache. Failures decoding individual cache entries return an error.

func (*ResponseCache) GetResponse

GetResponse looks up a response matching the specified cmdID. If the response is found, it is returned along with its associated error. If the response is not found, nil is returned for both the response and its error. In all cases, the third return value is the error returned from the engine when reading the on-disk cache.

func (*ResponseCache) PutResponse

func (rc *ResponseCache) PutResponse(e engine.Engine, cmdID roachpb.ClientCmdID, replyWithErr roachpb.ResponseWithError) error

PutResponse writes a response and an error associated with it to the cache for the specified cmdID.

type SplitRangeEvent

type SplitRangeEvent struct {
	StoreID  roachpb.StoreID
	Original UpdateRangeEvent
	New      RegisterRangeEvent
}

SplitRangeEvent occurs whenever a range is split in two. This Event actually contains two other events: an UpdateRangeEvent for the Range which originally existed, and a RegisterRangeEvent for the range created via the split.

type StartStoreEvent

type StartStoreEvent struct {
	StoreID   roachpb.StoreID
	StartedAt int64
}

StartStoreEvent occurs whenever a store is initially started.

type Store

type Store struct {
	Ident roachpb.StoreIdent
	// contains filtered or unexported fields
}

A Store maintains a map of ranges by start key. A Store corresponds to one physical device.

func NewStore

func NewStore(ctx StoreContext, eng engine.Engine, nodeDesc *roachpb.NodeDescriptor) *Store

NewStore returns a new instance of a store.

func (*Store) AddReplicaTest

func (s *Store) AddReplicaTest(rng *Replica) error

AddReplicaTest adds the replica to the store's replica map and to the sorted replicasByKey slice. To be used only by unittests.

func (*Store) AppliedIndex

func (s *Store) AppliedIndex(groupID roachpb.RangeID) (uint64, error)

AppliedIndex implements the multiraft.StateMachine interface.

func (*Store) Attrs

func (s *Store) Attrs() roachpb.Attributes

Attrs returns the attributes of the underlying store.

func (*Store) Bootstrap

func (s *Store) Bootstrap(ident roachpb.StoreIdent, stopper *stop.Stopper) error

Bootstrap writes a new store ident to the underlying engine. To ensure that no crufty data already exists in the engine, it scans the engine contents before writing the new store ident. The engine should be completely empty. It returns an error if called on a non-empty engine.

func (*Store) BootstrapRange

func (s *Store) BootstrapRange(initialValues []roachpb.KeyValue) error

BootstrapRange creates the first range in the cluster and manually writes it to the store. Default range addressing records are created for meta1 and meta2. Default configurations for zones are created. All configs are specified for the empty key prefix, meaning they apply to the entire database. The zone requires three replicas with no other specifications. It also adds the range tree and the root node, the first range, to it. The 'initialValues' are written as well after each value's checksum is initalized.

func (*Store) Capacity

func (s *Store) Capacity() (roachpb.StoreCapacity, error)

Capacity returns the capacity of the underlying storage engine.

func (*Store) Clock

func (s *Store) Clock() *hlc.Clock

Clock accessor.

func (*Store) ClusterID

func (s *Store) ClusterID() string

ClusterID accessor.

func (*Store) Context

func (s *Store) Context(ctx context.Context) context.Context

Context returns a base context to pass along with commands being executed, derived from the supplied context (which is allowed to be nil).

func (*Store) DB

func (s *Store) DB() *client.DB

DB accessor.

func (*Store) Descriptor

func (s *Store) Descriptor() (*roachpb.StoreDescriptor, error)

Descriptor returns a StoreDescriptor including current store capacity information.

func (*Store) DisableReplicaGCQueue

func (s *Store) DisableReplicaGCQueue(disabled bool)

DisableReplicaGCQueue disables or enables the replica GC queue. Exposed only for testing.

func (*Store) Engine

func (s *Store) Engine() engine.Engine

Engine accessor.

func (*Store) EventFeed

func (s *Store) EventFeed() StoreEventFeed

EventFeed accessor.

func (*Store) ForceReplicaGCScan

func (s *Store) ForceReplicaGCScan(t util.Tester)

ForceReplicaGCScan iterates over all ranges and enqueues any that may need to be GC'd. Exposed only for testing.

func (*Store) ForceReplicationScan

func (s *Store) ForceReplicationScan(t util.Tester)

ForceReplicationScan iterates over all ranges and enqueues any that need to be replicated. Exposed only for testing.

func (*Store) GetReplica

func (s *Store) GetReplica(rangeID roachpb.RangeID) (*Replica, error)

GetReplica fetches a replica by Range ID. Returns an error if no replica is found.

func (*Store) GetStatus

func (s *Store) GetStatus() (*StoreStatus, error)

GetStatus fetches the latest store status from the stored value on the cluster. Returns nil if the scanner has not yet run. The scanner runs once every ctx.ScanInterval.

func (*Store) Gossip

func (s *Store) Gossip() *gossip.Gossip

Gossip accessor.

func (*Store) GossipStore

func (s *Store) GossipStore()

GossipStore broadcasts the store on the gossip network.

func (*Store) GroupLocker

func (s *Store) GroupLocker() sync.Locker

GroupLocker implements the multiraft.Storage interface.

func (*Store) GroupStorage

func (s *Store) GroupStorage(groupID roachpb.RangeID, replicaID roachpb.ReplicaID) (multiraft.WriteableGroupStorage, error)

GroupStorage implements the multiraft.Storage interface.

func (*Store) IsStarted

func (s *Store) IsStarted() bool

IsStarted returns true if the Store has been started.

func (*Store) LookupReplica

func (s *Store) LookupReplica(start, end roachpb.RKey) *Replica

LookupReplica looks up a replica via binary search over the "replicasByKey" btree. Returns nil if no replica is found for specified key range. Note that the specified keys are transformed using Key.Address() to ensure we lookup replicas correctly for local keys. When end is nil, a replica that contains start is looked up.

func (*Store) MergeRange

func (s *Store) MergeRange(subsumingRng *Replica, updatedEndKey roachpb.RKey, subsumedRangeID roachpb.RangeID) error

MergeRange expands the subsuming range to absorb the subsumed range. This merge operation will fail if the two ranges are not collocated on the same store. Must be called from the processRaft goroutine.

func (*Store) NewRangeDescriptor

func (s *Store) NewRangeDescriptor(start, end roachpb.RKey, replicas []roachpb.ReplicaDescriptor) (*roachpb.RangeDescriptor, error)

NewRangeDescriptor creates a new descriptor based on start and end keys and the supplied roachpb.Replicas slice. It allocates new replica IDs to fill out the supplied replicas.

func (*Store) NewSnapshot

func (s *Store) NewSnapshot() engine.Engine

NewSnapshot creates a new snapshot engine.

func (*Store) ProposeRaftCommand

func (s *Store) ProposeRaftCommand(idKey cmdIDKey, cmd roachpb.RaftCommand) <-chan error

ProposeRaftCommand submits a command to raft. The command is processed asynchronously and an error or nil will be written to the returned channel when it is committed or aborted (but note that committed does mean that it has been applied to the range yet).

func (*Store) PublishStatus

func (s *Store) PublishStatus() error

PublishStatus publishes periodically computed status events to the store's events feed. This method itself should be periodically called by some external mechanism.

func (*Store) RaftStatus

func (s *Store) RaftStatus(rangeID roachpb.RangeID) *raft.Status

RaftStatus returns the current raft status of the given range.

func (*Store) RemoveReplica

func (s *Store) RemoveReplica(rep *Replica) error

RemoveReplica removes the replica from the store's replica map and from the sorted replicasByKey btree.

func (*Store) ReplicaCount

func (s *Store) ReplicaCount() int

ReplicaCount returns the number of replicas contained by this store.

func (*Store) ReplicaDescriptor

func (s *Store) ReplicaDescriptor(groupID roachpb.RangeID, replicaID roachpb.ReplicaID) (roachpb.ReplicaDescriptor, error)

ReplicaDescriptor implements the multiraft.Storage interface.

func (*Store) ReplicaIDForStore

func (s *Store) ReplicaIDForStore(groupID roachpb.RangeID, storeID roachpb.StoreID) (roachpb.ReplicaID, error)

ReplicaIDForStore implements the multiraft.Storage interface.

func (*Store) ReplicasFromSnapshot

func (s *Store) ReplicasFromSnapshot(snap raftpb.Snapshot) ([]roachpb.ReplicaDescriptor, error)

ReplicasFromSnapshot implements the multiraft.Storage interface.

func (*Store) Send

Send fetches a range based on the header's replica, assembles method, args & reply into a Raft Cmd struct and executes the command using the fetched range.

func (*Store) SetRangeRetryOptions

func (s *Store) SetRangeRetryOptions(ro retry.Options)

SetRangeRetryOptions sets the retry options used for this store. For unittests only.

func (*Store) SplitRange

func (s *Store) SplitRange(origRng, newRng *Replica) error

SplitRange shortens the original range to accommodate the new range. The new range is added to the ranges map and the rangesByKey btree.

func (*Store) Start

func (s *Store) Start(stopper *stop.Stopper) error

Start the engine, set the GC and read the StoreIdent.

func (*Store) StartedAt

func (s *Store) StartedAt() int64

StartedAt returns the timestamp at which the store was most recently started.

func (*Store) Stopper

func (s *Store) Stopper() *stop.Stopper

Stopper accessor.

func (*Store) StoreID

func (s *Store) StoreID() roachpb.StoreID

StoreID accessor.

func (*Store) String

func (s *Store) String() string

String formats a store for debug output.

func (*Store) Tracer

func (s *Store) Tracer() *tracer.Tracer

Tracer accessor.

func (*Store) WaitForInit

func (s *Store) WaitForInit()

WaitForInit waits for any asynchronous processes begun in Start() to complete their initialization. In particular, this includes gossiping. In some cases this may block until the range GC queue has completed its scan. Only for testing.

type StoreContext

type StoreContext struct {
	Clock     *hlc.Clock
	DB        *client.DB
	Gossip    *gossip.Gossip
	StorePool *StorePool
	Transport multiraft.Transport

	// RangeRetryOptions are the retry options when retryable errors are
	// encountered sending commands to ranges.
	RangeRetryOptions retry.Options

	// RaftTickInterval is the resolution of the Raft timer; other raft timeouts
	// are defined in terms of multiples of this value.
	RaftTickInterval time.Duration

	// RaftHeartbeatIntervalTicks is the number of ticks that pass between heartbeats.
	RaftHeartbeatIntervalTicks int

	// RaftElectionTimeoutTicks is the number of ticks that must pass before a follower
	// considers a leader to have failed and calls a new election. Should be significantly
	// higher than RaftHeartbeatIntervalTicks. The raft paper recommends a value of 150ms
	// for local networks.
	RaftElectionTimeoutTicks int

	// ScanInterval is the default value for the scan interval
	ScanInterval time.Duration

	// ScanMaxIdleTime is the maximum time the scanner will be idle between ranges.
	// If enabled (> 0), the scanner may complete in less than ScanInterval for small
	// stores.
	ScanMaxIdleTime time.Duration

	// TimeUntilStoreDead is the time after which if there is no new gossiped
	// information about a store, it can be considered dead.
	TimeUntilStoreDead time.Duration

	// RebalancingOptions configures how the store will attempt to rebalance its
	// replicas to other stores.
	RebalancingOptions RebalancingOptions

	// EventFeed is a feed to which this store will publish events.
	EventFeed *util.Feed

	// Tracer is a request tracer.
	Tracer *tracer.Tracer
}

A StoreContext encompasses the auxiliary objects and configuration required to create a store. All fields holding a pointer or an interface are required to create a store; the rest will have sane defaults set if omitted.

func (*StoreContext) Valid

func (sc *StoreContext) Valid() bool

Valid returns true if the StoreContext is populated correctly. We don't check for Gossip and DB since some of our tests pass that as nil.

type StoreEventFeed

type StoreEventFeed struct {
	// contains filtered or unexported fields
}

StoreEventFeed is a helper structure which publishes store-specific events to a util.Feed. The target feed may be shared by multiple StoreEventFeeds. If the target feed is nil, event methods become no-ops.

func NewStoreEventFeed

func NewStoreEventFeed(id roachpb.StoreID, feed *util.Feed) StoreEventFeed

NewStoreEventFeed creates a new StoreEventFeed which publishes events for a specific store to the supplied feed.

type StoreEventListener

type StoreEventListener interface {
	OnRegisterRange(event *RegisterRangeEvent)
	OnUpdateRange(event *UpdateRangeEvent)
	OnRemoveRange(event *RemoveRangeEvent)
	OnSplitRange(event *SplitRangeEvent)
	OnMergeRange(event *MergeRangeEvent)
	OnStartStore(event *StartStoreEvent)
	OnBeginScanRanges(event *BeginScanRangesEvent)
	OnEndScanRanges(event *EndScanRangesEvent)
	OnStoreStatus(event *StoreStatusEvent)
	OnReplicationStatus(event *ReplicationStatusEvent)
}

StoreEventListener is an interface that can be implemented by objects which listen for events published by stores.

type StoreList

type StoreList struct {
	// contains filtered or unexported fields
}

StoreList holds a list of store descriptors and associated count and used stats for those stores.

type StorePool

type StorePool struct {
	// contains filtered or unexported fields
}

StorePool maintains a list of all known stores in the cluster and information on their health.

func NewStorePool

func NewStorePool(g *gossip.Gossip, timeUntilStoreDead time.Duration, stopper *stop.Stopper) *StorePool

NewStorePool creates a StorePool and registers the store updating callback with gossip.

type StoreStatus

type StoreStatus struct {
	Desc                 cockroach_roachpb.StoreDescriptor               `protobuf:"bytes,1,opt,name=desc" json:"desc"`
	NodeID               github_com_cockroachdb_cockroach_roachpb.NodeID `protobuf:"varint,2,opt,name=node_id,casttype=github.com/cockroachdb/cockroach/roachpb.NodeID" json:"node_id"`
	RangeCount           int32                                           `protobuf:"varint,3,opt,name=range_count" json:"range_count"`
	StartedAt            int64                                           `protobuf:"varint,4,opt,name=started_at" json:"started_at"`
	UpdatedAt            int64                                           `protobuf:"varint,5,opt,name=updated_at" json:"updated_at"`
	Stats                cockroach_storage_engine.MVCCStats              `protobuf:"bytes,6,opt,name=stats" json:"stats"`
	LeaderRangeCount     int32                                           `protobuf:"varint,7,opt,name=leader_range_count" json:"leader_range_count"`
	ReplicatedRangeCount int32                                           `protobuf:"varint,8,opt,name=replicated_range_count" json:"replicated_range_count"`
	AvailableRangeCount  int32                                           `protobuf:"varint,9,opt,name=available_range_count" json:"available_range_count"`
}

StoreStatus contains the stats needed to calculate the current status of a store.

func (*StoreStatus) GetAvailableRangeCount

func (m *StoreStatus) GetAvailableRangeCount() int32

func (*StoreStatus) GetDesc

func (*StoreStatus) GetLeaderRangeCount

func (m *StoreStatus) GetLeaderRangeCount() int32

func (*StoreStatus) GetNodeID

func (*StoreStatus) GetRangeCount

func (m *StoreStatus) GetRangeCount() int32

func (*StoreStatus) GetReplicatedRangeCount

func (m *StoreStatus) GetReplicatedRangeCount() int32

func (*StoreStatus) GetStartedAt

func (m *StoreStatus) GetStartedAt() int64

func (*StoreStatus) GetStats

func (*StoreStatus) GetUpdatedAt

func (m *StoreStatus) GetUpdatedAt() int64

func (*StoreStatus) Marshal

func (m *StoreStatus) Marshal() (data []byte, err error)

func (*StoreStatus) MarshalTo

func (m *StoreStatus) MarshalTo(data []byte) (int, error)

func (*StoreStatus) ProtoMessage

func (*StoreStatus) ProtoMessage()

func (*StoreStatus) Reset

func (m *StoreStatus) Reset()

func (*StoreStatus) Size

func (m *StoreStatus) Size() (n int)

func (*StoreStatus) String

func (m *StoreStatus) String() string

func (*StoreStatus) Unmarshal

func (m *StoreStatus) Unmarshal(data []byte) error

type StoreStatusEvent

type StoreStatusEvent struct {
	Desc *roachpb.StoreDescriptor
}

StoreStatusEvent contains the current descriptor for the given store.

Because the descriptor contains information that cannot currently be computed from other events, this event should be periodically broadcast by the store independently of other operations.

type TimestampCache

type TimestampCache struct {
	// contains filtered or unexported fields
}

A TimestampCache maintains an interval tree FIFO cache of keys or key ranges and the timestamps at which they were most recently read or written. If a timestamp was read or written by a transaction, the txn ID is stored with the timestamp to avoid advancing timestamps on successive requests from the same transaction.

The cache also maintains a low-water mark which is the most recently evicted entry's timestamp. This value always ratchets with monotonic increases. The low water mark is initialized to the current system time plus the maximum clock offset.

func NewTimestampCache

func NewTimestampCache(clock *hlc.Clock) *TimestampCache

NewTimestampCache returns a new timestamp cache with supplied hybrid clock.

func (*TimestampCache) Add

func (tc *TimestampCache) Add(start, end roachpb.Key, timestamp roachpb.Timestamp, txnID []byte, readOnly bool)

Add the specified timestamp to the cache as covering the range of keys from start to end. If end is nil, the range covers the start key only. txnID is nil for no transaction. readOnly specifies whether the command adding this timestamp was read-only or not.

func (*TimestampCache) Clear

func (tc *TimestampCache) Clear(clock *hlc.Clock)

Clear clears the cache and resets the low water mark to the current time plus the maximum clock offset.

func (*TimestampCache) GetMax

func (tc *TimestampCache) GetMax(start, end roachpb.Key, txnID []byte) (roachpb.Timestamp, roachpb.Timestamp)

GetMax returns the maximum read and write timestamps which overlap the interval spanning from start to end. Cached timestamps matching the specified txnID are not considered. If no part of the specified range is overlapped by timestamps in the cache, the low water timestamp is returned for both read and write timestamps.

The txn ID prevents restarts with a pattern like: read("a"), write("a"). The read adds a timestamp for "a". Then the write (for the same transaction) would get that as the max timestamp and be forced to increment it. This allows timestamps from the same txn to be ignored.

func (*TimestampCache) MergeInto

func (tc *TimestampCache) MergeInto(dest *TimestampCache, clear bool)

MergeInto merges all entries from this timestamp cache into the dest timestamp cache. The clear parameter, if true, copies the values of lowWater and latest and clears the destination cache before merging in the source.

func (*TimestampCache) SetLowWater

func (tc *TimestampCache) SetLowWater(lowWater roachpb.Timestamp)

SetLowWater sets the cache's low water mark, which is the minimum value the cache will return from calls to GetMax().

type UpdateRangeEvent

type UpdateRangeEvent struct {
	StoreID roachpb.StoreID
	Desc    *roachpb.RangeDescriptor
	Stats   engine.MVCCStats
	Method  roachpb.Method
	Delta   engine.MVCCStats
}

UpdateRangeEvent occurs whenever a Range is modified. This structure includes the basic range information, but also includes a second set of MVCCStats containing the delta from the Range's previous stats. If the update did not modify any statistics, this delta may be nil.

Directories

Path Synopsis
Package engine provides low-level storage.
Package engine provides low-level storage.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL