local

package
v0.0.0-...-503c688 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 8, 2023 License: Apache-2.0, Apache-2.0 Imports: 84 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CompactionLowerThreshold = 512 * units.MiB
	CompactionUpperThreshold = 32 * units.GiB
)

compaction threshold

Variables

View Source
var (
	MinRowID  = common.EncodeIntRowID(math.MinInt64)
	ZeroRowID = common.EncodeIntRowID(0)
)

static vars for rowID

View Source
var (
	// RunInTest indicates whether the current process is running in test.
	RunInTest bool
	// LastAlloc is the last ID allocator.
	LastAlloc manual.Allocator
)
View Source
var BuildDuplicateTaskForTest = func(m *DupeDetector) ([]dupTask, error) {
	return m.buildDupTasks()
}

BuildDuplicateTaskForTest is only used for test.

View Source
var CheckTiFlashVersionForTest = checkTiFlashVersion

CheckTiFlashVersionForTest is only used for tests.

View Source
var TiFlashReplicaQueryForTest = tiFlashReplicaQuery

TiFlashReplicaQueryForTest is only used for tests.

Functions

func CheckDiskQuota

func CheckDiskQuota(mgr DiskUsage, quota int64) (
	largeEngines []uuid.UUID,
	inProgressLargeEngines int,
	totalDiskSize int64,
	totalMemSize int64,
)

CheckDiskQuota verifies if the total engine file size is below the given quota. If the quota is exceeded, this method returns an array of engines, which after importing can decrease the total size below quota.

func EstimateCompactionThreshold

func EstimateCompactionThreshold(files []mydump.FileInfo, cp *checkpoints.TableCheckpoint, factor int64) int64

EstimateCompactionThreshold estimate SST files compression threshold by total row file size with a higher compression threshold, the compression time increases, but the iteration time decreases. Try to limit the total SST files number under 500. But size compress 32GB SST files cost about 20min, we set the upper bound to 32GB to avoid too long compression time. factor is the non-clustered(1 for data engine and number of non-clustered index count for index engine).

func NewEncodingBuilder

func NewEncodingBuilder(ctx context.Context) encode.EncodingBuilder

NewEncodingBuilder creates an KVEncodingBuilder with local backend implementation.

func NewTargetInfoGetter

func NewTargetInfoGetter(tls *common.TLS, db *sql.DB, pdAddr string) backend.TargetInfoGetter

NewTargetInfoGetter creates an TargetInfoGetter with local backend implementation.

func VerifyRLimit

func VerifyRLimit(estimateMaxFiles RlimT) error

VerifyRLimit checks whether the open-file limit is large enough. In Local-backend, we need to read and write a lot of L0 SST files, so we need to check system max open files limit.

Types

type Backend

type Backend struct {
	BackendConfig
	// contains filtered or unexported fields
}

Backend is a local backend.

func NewBackend

func NewBackend(
	ctx context.Context,
	tls *common.TLS,
	config BackendConfig,
	regionSizeGetter TableRegionSizeGetter,
) (*Backend, error)

NewBackend creates new connections to tikv.

func (*Backend) BatchSplitRegions

func (local *Backend) BatchSplitRegions(
	ctx context.Context,
	region *split.RegionInfo,
	keys [][]byte,
) (*split.RegionInfo, []*split.RegionInfo, error)

BatchSplitRegions will split regions by the given split keys and tries to scatter new regions. If split/scatter fails because new region is not ready, this function will not return error.

func (*Backend) CleanupEngine

func (local *Backend) CleanupEngine(ctx context.Context, engineUUID uuid.UUID) error

CleanupEngine cleanup the engine and reclaim the space.

func (*Backend) Close

func (local *Backend) Close()

Close the local backend.

func (*Backend) CloseEngine

func (local *Backend) CloseEngine(ctx context.Context, cfg *backend.EngineConfig, engineUUID uuid.UUID) error

CloseEngine closes backend engine by uuid.

func (*Backend) EngineFileSizes

func (local *Backend) EngineFileSizes() (res []backend.EngineFileSize)

EngineFileSizes implements DiskUsage interface.

func (*Backend) FlushAllEngines

func (local *Backend) FlushAllEngines(parentCtx context.Context) (err error)

FlushAllEngines flush all engines.

func (*Backend) FlushEngine

func (local *Backend) FlushEngine(ctx context.Context, engineID uuid.UUID) error

FlushEngine ensure the written data is saved successfully, to make sure no data lose after restart

func (*Backend) GetDupeController

func (local *Backend) GetDupeController(dupeConcurrency int, errorMgr *errormanager.ErrorManager) *DupeController

GetDupeController returns a new dupe controller.

func (*Backend) GetImportedKVCount

func (local *Backend) GetImportedKVCount(engineUUID uuid.UUID) int64

GetImportedKVCount returns the number of imported KV pairs of some engine.

func (*Backend) GetPDClient

func (local *Backend) GetPDClient() pd.Client

GetPDClient returns the PD client.

func (*Backend) ImportEngine

func (local *Backend) ImportEngine(ctx context.Context, engineUUID uuid.UUID, regionSplitSize, regionSplitKeys int64) error

ImportEngine imports an engine to TiKV.

func (*Backend) LocalWriter

func (local *Backend) LocalWriter(_ context.Context, cfg *backend.LocalWriterConfig, engineUUID uuid.UUID) (backend.EngineWriter, error)

LocalWriter returns a new local writer.

func (*Backend) OpenEngine

func (local *Backend) OpenEngine(ctx context.Context, cfg *backend.EngineConfig, engineUUID uuid.UUID) error

OpenEngine must be called with holding mutex of Engine.

func (*Backend) ResetEngine

func (local *Backend) ResetEngine(ctx context.Context, engineUUID uuid.UUID) error

ResetEngine reset the engine and reclaim the space.

func (*Backend) RetryImportDelay

func (*Backend) RetryImportDelay() time.Duration

RetryImportDelay returns the delay time before retrying to import a file.

func (*Backend) ShouldPostProcess

func (*Backend) ShouldPostProcess() bool

ShouldPostProcess returns true if the backend should post process the data.

func (*Backend) SplitAndScatterRegionByRanges

func (local *Backend) SplitAndScatterRegionByRanges(
	ctx context.Context,
	ranges []Range,
	tableInfo *checkpoints.TidbTableInfo,
	needSplit bool,
	regionSplitSize int64,
) (err error)

SplitAndScatterRegionByRanges include region split & scatter operation just like br. we can simply call br function, but we need to change some function signature of br When the ranges total size is small, we can skip the split to avoid generate empty regions. TODO: remove this file and use br internal functions

func (*Backend) SplitAndScatterRegionInBatches

func (local *Backend) SplitAndScatterRegionInBatches(
	ctx context.Context,
	ranges []Range,
	tableInfo *checkpoints.TidbTableInfo,
	needSplit bool,
	regionSplitSize int64,
	batchCnt int,
) error

SplitAndScatterRegionInBatches splits&scatter regions in batches. Too many split&scatter requests may put a lot of pressure on TiKV and PD.

func (*Backend) TotalMemoryConsume

func (local *Backend) TotalMemoryConsume() int64

TotalMemoryConsume returns the total memory usage of the local backend.

func (*Backend) UnsafeImportAndReset

func (local *Backend) UnsafeImportAndReset(ctx context.Context, engineUUID uuid.UUID, regionSplitSize, regionSplitKeys int64) error

UnsafeImportAndReset forces the backend to import the content of an engine into the target and then reset the engine to empty. This method will not close the engine. Make sure the engine is flushed manually before calling this method.

type BackendConfig

type BackendConfig struct {
	// comma separated list of PD endpoints.
	PDAddr        string
	LocalStoreDir string
	// max number of cached grpc.ClientConn to a store.
	// note: this is not the limit of actual connections, each grpc.ClientConn can have one or more of it.
	MaxConnPerStore int
	// compress type when write or ingest into tikv
	ConnCompressType config.CompressionType
	// concurrency of generateJobForRange and import(write & ingest) workers
	WorkerConcurrency      int
	KVWriteBatchSize       int
	RegionSplitBatchSize   int
	RegionSplitConcurrency int
	CheckpointEnabled      bool
	// memory table size of pebble. since pebble can have multiple mem tables, the max memory used is
	// MemTableSize * MemTableStopWritesThreshold, see pebble.Options for more details.
	MemTableSize            int
	LocalWriterMemCacheSize int64
	// whether check TiKV capacity before write & ingest.
	ShouldCheckTiKV    bool
	DupeDetectEnabled  bool
	DuplicateDetectOpt DupDetectOpt
	// max write speed in bytes per second to each store(burst is allowed), 0 means no limit
	StoreWriteBWLimit int
	// When TiKV is in normal mode, ingesting too many SSTs will cause TiKV write stall.
	// To avoid this, we should check write stall before ingesting SSTs. Note that, we
	// must check both leader node and followers in client side, because followers will
	// not check write stall as long as ingest command is accepted by leader.
	ShouldCheckWriteStall bool
	// soft limit on the number of open files that can be used by pebble DB.
	// the minimum value is 128.
	MaxOpenFiles int
	KeyspaceName string
	// the scope when pause PD schedulers.
	PausePDSchedulerScope config.PausePDSchedulerScope
}

BackendConfig is the config for local backend.

func NewBackendConfig

func NewBackendConfig(cfg *config.Config, maxOpenFiles int, keyspaceName string) BackendConfig

NewBackendConfig creates a new BackendConfig.

type ChecksumManager

type ChecksumManager interface {
	Checksum(ctx context.Context, tableInfo *checkpoints.TidbTableInfo) (*RemoteChecksum, error)
}

ChecksumManager is a manager that manages checksums.

func NewTiDBChecksumExecutor

func NewTiDBChecksumExecutor(db *sql.DB) ChecksumManager

NewTiDBChecksumExecutor creates a new tidb checksum executor.

type DiskUsage

type DiskUsage interface {
	// EngineFileSizes obtains the size occupied locally of all engines managed
	// by this backend. This method is used to compute disk quota.
	// It can return nil if the content are all stored remotely.
	EngineFileSizes() (res []backend.EngineFileSize)
}

DiskUsage is an interface to obtain the size occupied locally of all engines

type DupDetectOpt

type DupDetectOpt struct {
	ReportErrOnDup bool
}

DupDetectOpt is the option for duplicate detection.

type DupKVStream

type DupKVStream interface {
	// Next returns the next key-value pair or any error it encountered.
	// At the end of the stream, the error is io.EOF.
	Next() (key, val []byte, err error)
	// Close closes the stream.
	Close() error
}

DupKVStream is a streaming interface for collecting duplicate key-value pairs.

type DupKVStreamImpl

type DupKVStreamImpl struct {
	// contains filtered or unexported fields
}

DupKVStreamImpl implements the interface of DupKVStream. It collects duplicate key-value pairs from a pebble.DB.

func NewLocalDupKVStream

func NewLocalDupKVStream(dupDB *pebble.DB, keyAdapter KeyAdapter, keyRange tidbkv.KeyRange) *DupKVStreamImpl

NewLocalDupKVStream creates a new DupKVStreamImpl with the given duplicate db and key range.

func (*DupKVStreamImpl) Close

func (s *DupKVStreamImpl) Close() error

Close implements the interface of DupKVStream.

func (*DupKVStreamImpl) Next

func (s *DupKVStreamImpl) Next() (key, val []byte, err error)

Next implements the interface of DupKVStream.

type DupeController

type DupeController struct {
	// contains filtered or unexported fields
}

DupeController is used to collect duplicate keys from local and remote data source and resolve duplication.

func (*DupeController) CollectLocalDuplicateRows

func (local *DupeController) CollectLocalDuplicateRows(ctx context.Context, tbl table.Table, tableName string, opts *encode.SessionOptions) (hasDupe bool, err error)

CollectLocalDuplicateRows collect duplicate keys from local db. We will store the duplicate keys which may be repeated with other keys in local data source.

func (*DupeController) CollectRemoteDuplicateRows

func (local *DupeController) CollectRemoteDuplicateRows(ctx context.Context, tbl table.Table, tableName string, opts *encode.SessionOptions) (hasDupe bool, err error)

CollectRemoteDuplicateRows collect duplicate keys from remote TiKV storage. This keys may be duplicate with the data import by other lightning.

func (*DupeController) ResolveDuplicateRows

func (local *DupeController) ResolveDuplicateRows(ctx context.Context, tbl table.Table, tableName string, algorithm config.DuplicateResolutionAlgorithm) (err error)

ResolveDuplicateRows resolves duplicated rows by deleting/inserting data according to the required algorithm.

type DupeDetector

type DupeDetector struct {
	// contains filtered or unexported fields
}

DupeDetector provides methods to collect and decode duplicated KV pairs into row data. The results are stored into the errorMgr. this object can only be used once, either for local or remote deduplication.

func NewDupeDetector

func NewDupeDetector(
	tbl table.Table,
	tableName string,
	splitCli split.SplitClient,
	tikvCli *tikv.KVStore,
	tikvCodec tikv.Codec,
	errMgr *errormanager.ErrorManager,
	sessOpts *encode.SessionOptions,
	concurrency int,
	logger log.Logger,
) (*DupeDetector, error)

NewDupeDetector creates a new DupeDetector.

func (*DupeDetector) CollectDuplicateRowsFromDupDB

func (m *DupeDetector) CollectDuplicateRowsFromDupDB(ctx context.Context, dupDB *pebble.DB, keyAdapter KeyAdapter) error

CollectDuplicateRowsFromDupDB collects duplicates from the duplicate DB and records all duplicate row info into errorMgr.

func (*DupeDetector) CollectDuplicateRowsFromTiKV

func (m *DupeDetector) CollectDuplicateRowsFromTiKV(ctx context.Context, importClientFactory ImportClientFactory) error

CollectDuplicateRowsFromTiKV collects duplicates from the remote TiKV and records all duplicate row info into errorMgr.

func (*DupeDetector) HasDuplicate

func (m *DupeDetector) HasDuplicate() bool

HasDuplicate returns true if there are duplicated KV pairs.

func (*DupeDetector) RecordDataConflictError

func (m *DupeDetector) RecordDataConflictError(ctx context.Context, stream DupKVStream) error

RecordDataConflictError records data conflicts to errorMgr. The key received from stream must be a row key.

func (*DupeDetector) RecordIndexConflictError

func (m *DupeDetector) RecordIndexConflictError(ctx context.Context, stream DupKVStream, tableID int64, indexInfo *model.IndexInfo) error

RecordIndexConflictError records index conflicts to errorMgr. The key received from stream must be an index key.

type Engine

type Engine struct {
	UUID uuid.UUID
	// contains filtered or unexported fields
}

Engine is a local engine.

func (*Engine) Cleanup

func (e *Engine) Cleanup(dataDir string) error

Cleanup remove meta and db files

func (*Engine) Close

func (e *Engine) Close() error

Close closes the engine and release all resources.

func (*Engine) Exist

func (e *Engine) Exist(dataDir string) error

Exist checks if db folder existing (meta sometimes won't flush before lightning exit)

func (*Engine) TotalMemorySize

func (e *Engine) TotalMemorySize() int64

TotalMemorySize returns the total memory size of the engine.

type ImportClientFactory

type ImportClientFactory interface {
	Create(ctx context.Context, storeID uint64) (sst.ImportSSTClient, error)
	Close()
}

ImportClientFactory is factory to create new import client for specific store.

type Iter

type Iter interface {
	// Seek seek to specify position.
	// if key not found, seeks next key position in iter.
	Seek(key []byte) bool
	// Error return current error on this iter.
	Error() error
	// First moves this iter to the first key.
	First() bool
	// Last moves this iter to the last key.
	Last() bool
	// Valid check this iter reach the end.
	Valid() bool
	// Next moves this iter forward.
	Next() bool
	// Key represents current position pair's key.
	Key() []byte
	// Value represents current position pair's Value.
	Value() []byte
	// Close close this iter.
	Close() error
	// OpType represents operations of pair. currently we have two types.
	// 1. Put
	// 2. Delete
	OpType() sst.Pair_OP
}

Iter abstract iterator method for Ingester.

type KeyAdapter

type KeyAdapter interface {
	// Encode encodes the key with its corresponding rowID. It appends the encoded key to dst and returns the
	// resulting slice. The encoded key is guaranteed to be in ascending order for comparison.
	Encode(dst []byte, key []byte, rowID []byte) []byte

	// Decode decodes the original key to dst. It appends the encoded key to dst and returns the resulting slice.
	Decode(dst []byte, data []byte) ([]byte, error)

	// EncodedLen returns the encoded key length.
	EncodedLen(key []byte, rowID []byte) int
}

KeyAdapter is used to encode and decode keys.

type Range

type Range struct {
	// contains filtered or unexported fields
}

Range record start and end key for localStoreDir.DB so we can write it to tikv in streaming

type RangePropertiesCollector

type RangePropertiesCollector struct {
	// contains filtered or unexported fields
}

RangePropertiesCollector collects range properties for each range.

func (*RangePropertiesCollector) Add

func (c *RangePropertiesCollector) Add(key pebble.InternalKey, value []byte) error

Add implements `pebble.TablePropertyCollector`. Add implements `TablePropertyCollector.Add`.

func (*RangePropertiesCollector) Finish

func (c *RangePropertiesCollector) Finish(userProps map[string]string) error

Finish implements `pebble.TablePropertyCollector`.

func (*RangePropertiesCollector) Name

Name implements `pebble.TablePropertyCollector`.

type RemoteChecksum

type RemoteChecksum struct {
	Schema     string
	Table      string
	Checksum   uint64
	TotalKVs   uint64
	TotalBytes uint64
}

RemoteChecksum represents a checksum result got from tidb.

func (*RemoteChecksum) IsEqual

func (rc *RemoteChecksum) IsEqual(other *verification.KVChecksum) bool

IsEqual checks whether the checksum is equal to the other.

type RemoteDupKVStream

type RemoteDupKVStream struct {
	// contains filtered or unexported fields
}

RemoteDupKVStream implements the interface of DupKVStream. It collects duplicate key-value pairs from a TiKV region.

func NewRemoteDupKVStream

func NewRemoteDupKVStream(
	ctx context.Context,
	region *split.RegionInfo,
	keyRange tidbkv.KeyRange,
	importClientFactory ImportClientFactory,
) (*RemoteDupKVStream, error)

NewRemoteDupKVStream creates a new RemoteDupKVStream.

func (*RemoteDupKVStream) Close

func (s *RemoteDupKVStream) Close() error

Close implements the interface of DupKVStream.

func (*RemoteDupKVStream) Next

func (s *RemoteDupKVStream) Next() (key, val []byte, err error)

Next implements the interface of DupKVStream.

type RlimT

type RlimT = uint64

RlimT is the type of rlimit values.

func GetSystemRLimit

func GetSystemRLimit() (RlimT, error)

GetSystemRLimit returns the current open-file limit.

type StoreWriteLimiter

type StoreWriteLimiter interface {
	WaitN(ctx context.Context, storeID uint64, n int) error
	Limit() int
}

StoreWriteLimiter is used to limit the write rate of a store.

type TableRegionSizeGetter

type TableRegionSizeGetter interface {
	GetTableRegionSize(ctx context.Context, tableID int64) (map[uint64]int64, error)
}

TableRegionSizeGetter get table region size.

type TableRegionSizeGetterImpl

type TableRegionSizeGetterImpl struct {
	DB *sql.DB
}

TableRegionSizeGetterImpl implements TableRegionSizeGetter.

func (*TableRegionSizeGetterImpl) GetTableRegionSize

func (g *TableRegionSizeGetterImpl) GetTableRegionSize(ctx context.Context, tableID int64) (map[uint64]int64, error)

GetTableRegionSize implements TableRegionSizeGetter.

type TiKVChecksumManager

type TiKVChecksumManager struct {
	// contains filtered or unexported fields
}

TiKVChecksumManager is a manager that can compute checksum of a table using TiKV.

func NewTiKVChecksumManager

func NewTiKVChecksumManager(client kv.Client, pdClient pd.Client, distSQLScanConcurrency uint) *TiKVChecksumManager

NewTiKVChecksumManager return a new tikv checksum manager

func (*TiKVChecksumManager) Checksum

Checksum implements the ChecksumManager interface.

type Writer

type Writer struct {
	sync.Mutex
	// contains filtered or unexported fields
}

Writer is used to write data into a SST file.

func (*Writer) AppendRows

func (w *Writer) AppendRows(ctx context.Context, columnNames []string, rows encode.Rows) error

AppendRows appends rows to the SST file.

func (*Writer) Close

Close implements backend.ChunkFlushStatus.

func (*Writer) IsSynced

func (w *Writer) IsSynced() bool

IsSynced implements backend.ChunkFlushStatus.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL