driver

package
v0.0.0-...-b4f3a91 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 26, 2022 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const (
	JuiceFSDriver          v1alpha1.DriverName = "juicefs"
	JuiceFSCacheDirOption                      = "cache-dir"
	JuiceFSCacheSizeOption                     = "cache-size"
	JuiceFSCSIDriverName                       = "csi.juicefs.com"
)
View Source
const (
	JuiceFSSecretName    string = "name"
	JuiceFSSecretStorage string = "storage"
	JuiceFSSecretMetaURL string = "metaurl"
	JuiceFSSecretBucket  string = "bucket"
	JuiceFSSecretSK      string = "secret-key"
	JuiceFSSecretAK      string = "access-key"
)
View Source
const (
	DefaultDriver = JuiceFSDriver
)

Variables

View Source
var (
	JuiceFSSecretDataKeys      []string
	JuiceFSSupportStorage      []string
	JuiceFSDefaultMountOptions *v1alpha1.JuiceFSMountOptions
)
View Source
var (
	StorageClassName = "paddle-operator"
)

Functions

This section is empty.

Types

type BaseDriver

type BaseDriver struct {
	Name v1alpha1.DriverName
}

func (*BaseDriver) CreateCacheStatus

func (d *BaseDriver) CreateCacheStatus(opt *common.ServerOptions, status *v1alpha1.CacheStatus) error

func (*BaseDriver) CreateClearJobOptions

func (d *BaseDriver) CreateClearJobOptions(opt *v1alpha1.ClearJobOptions, ctx *common.RequestContext) error

func (*BaseDriver) CreatePVC

func (d *BaseDriver) CreatePVC(pvc *v1.PersistentVolumeClaim, ctx *common.RequestContext) error

CreatePVC create persistent volume claim, and it will be used by runtime server and PaddleJob worker pods

func (*BaseDriver) CreateRmrJobOptions

func (d *BaseDriver) CreateRmrJobOptions(opt *v1alpha1.RmrJobOptions, ctx *common.RequestContext) error

func (*BaseDriver) CreateService

func (d *BaseDriver) CreateService(service *v1.Service, ctx *common.RequestContext) error

CreateService create service for runtime StatefulSet server

func (*BaseDriver) DoClearJob

func (d *BaseDriver) DoClearJob(ctx context.Context, opt *v1alpha1.ClearJobOptions, log logr.Logger) error

DoClearJob clear the cache data in folders specified by options

func (*BaseDriver) GetLabel

func (d *BaseDriver) GetLabel(sampleSetName string) string

GetLabel label is concatenated by PaddleLabel、driver name and SampleSet name

func (*BaseDriver) GetRuntimeName

func (d *BaseDriver) GetRuntimeName(sampleSetName string) string

func (*BaseDriver) GetServiceName

func (d *BaseDriver) GetServiceName(sampleSetName string) string

type Driver

type Driver interface {
	// CreatePV create persistent volume by specified driver
	CreatePV(pv *v1.PersistentVolume, ctx *common.RequestContext) error

	// CreatePVC create persistent volume claim for PaddleJob
	CreatePVC(pvc *v1.PersistentVolumeClaim, ctx *common.RequestContext) error

	// GetLabel get the label to mark pv、pvc and nodes which have cached data
	GetLabel(sampleSetName string) string

	// CreateService create a service for runtime StatefulSet
	CreateService(service *v1.Service, ctx *common.RequestContext) error

	// GetServiceName get the name of runtime StatefulSet service
	GetServiceName(sampleSetName string) string

	// CreateRuntime create runtime StatefulSet to manager cache data
	CreateRuntime(ds *appv1.StatefulSet, ctx *common.RequestContext) error

	// GetRuntimeName get the runtime StatefulSet name
	GetRuntimeName(sampleSetName string) string

	// CreateSyncJobOptions create the options of sync job, the controller will post it to runtime server
	CreateSyncJobOptions(opt *v1alpha1.SyncJobOptions, ctx *common.RequestContext) error

	// CreateWarmupJobOptions create the options of warmup job, this method now only use by SampleJob Controller
	CreateWarmupJobOptions(opt *v1alpha1.WarmupJobOptions, ctx *common.RequestContext) error

	// CreateRmrJobOptions create the options of rmr job, used by SampleJob controller
	CreateRmrJobOptions(opt *v1alpha1.RmrJobOptions, ctx *common.RequestContext) error

	// CreateClearJobOptions create the options of clear job, used by SampleJob controller
	CreateClearJobOptions(opt *v1alpha1.ClearJobOptions, ctx *common.RequestContext) error

	// CreateCacheStatus get the data status in mount and cache paths
	CreateCacheStatus(opt *common.ServerOptions, status *v1alpha1.CacheStatus) error

	// DoSyncJob call by runtime server, sync data from remote storage to cache engine
	DoSyncJob(ctx context.Context, opt *v1alpha1.SyncJobOptions, log logr.Logger) error

	// DoClearJob call by runtime server, clear the cached data
	DoClearJob(ctx context.Context, opt *v1alpha1.ClearJobOptions, log logr.Logger) error

	// DoWarmupJob call by runtime server, warmup data to local storage on each node respectively
	DoWarmupJob(ctx context.Context, opt *v1alpha1.WarmupJobOptions, log logr.Logger) error

	// DoRmrJob call by runtime server, remove the data of specified path from cache engine
	DoRmrJob(ctx context.Context, opt *v1alpha1.RmrJobOptions, log logr.Logger) error
}

func GetDriver

func GetDriver(name v1alpha1.DriverName) (Driver, error)

GetDriver get csi driver by name, return error if not found

type JuiceFS

type JuiceFS struct {
	BaseDriver
}

func NewJuiceFSDriver

func NewJuiceFSDriver() *JuiceFS

func (*JuiceFS) CreatePV

func (j *JuiceFS) CreatePV(pv *v1.PersistentVolume, ctx *common.RequestContext) error

CreatePV create JuiceFS persistent volume with mount options. How to set parameters of pv can refer to https://github.com/juicedata/juicefs-csi-driver/tree/master/examples/static-provisioning-mount-options

func (*JuiceFS) CreateRuntime

func (j *JuiceFS) CreateRuntime(ds *appv1.StatefulSet, ctx *common.RequestContext) error

func (*JuiceFS) CreateSyncJobOptions

func (j *JuiceFS) CreateSyncJobOptions(opt *v1alpha1.SyncJobOptions, ctx *common.RequestContext) error

CreateSyncJobOptions create sync job options by the information from request context, the options is used by controller to request runtime server do sync data task asynchronously. TODO: Support different uri format for all storage in JuiceFSSupportStorage, some data storage may need additional secret setting in v1alpha1.Source.SecretRef more info: https://github.com/juicedata/juicesync

func (*JuiceFS) CreateWarmupJobOptions

func (j *JuiceFS) CreateWarmupJobOptions(opt *v1alpha1.WarmupJobOptions, ctx *common.RequestContext) error

func (*JuiceFS) DoRmrJob

func (j *JuiceFS) DoRmrJob(ctx context.Context, opt *v1alpha1.RmrJobOptions, log logr.Logger) error

DoRmrJob delete the data of JuiceFS storage backend under the specified paths. TODO: there some bugs in JuiceFS rmr command, after rmr paths the sync command can't work correctly in container, but posix rm can work well with JuiceFS sync command.

func (*JuiceFS) DoSyncJob

func (j *JuiceFS) DoSyncJob(ctx context.Context, opt *v1alpha1.SyncJobOptions, log logr.Logger) error

DoSyncJob sync data from source databases to JuiceFS backend object storage, this job will only work in the first runtime server. According to the design concept of container, it is not a good practice to specify --worker option when do sync job. When executor sync command in first runtime server, the data will be automatically warmed up to this node, this may bring duplicate cached data problem in kubernetes cluster. TODO: clean cached data after sync command done or is there a better way?

func (*JuiceFS) DoWarmupJob

func (j *JuiceFS) DoWarmupJob(ctx context.Context, opt *v1alpha1.WarmupJobOptions, log logr.Logger) error

DoWarmupJob warmup data from remote object storage to cache nodes, this can speed up model training process in kubernetes cluster TODO: different cache nodes should warmup different data, the warmup Strategy should match the sampler api defined in paddle.io submodule, like RandomSampler/SequenceSampler/DistributedBatchSampler etc... More information: https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/io/Overview_cn.html

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL