base

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 3, 2024 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ContainersReady string = "ContainersReady"
	PodInitialized  string = "Initialized"
	PodReady        string = "Ready"
	PodScheduled    string = "PodScheduled"
)
View Source
const (
	ConditionTrue    string = "True"
	ConditionFalse   string = "False"
	ConditionUnknown string = "Unknown"
)
View Source
const (
	ConditionUnavailable  string = "Unavailable"
	ConditionInitializing string = "Initializing"
	ConditionScheduling   string = "Scheduling"
	ConditionRunning      string = "Running"
)
View Source
const (
	DefaultSchedulerName     = v1.DefaultSchedulerName
	GPUSchedulerName         = "gpu-scheduler"
	VGPUSchedulerName        = "volcano" // TODO change to vocano when in production
	CPUPriorityClass         = "jeeves-cpu-compute"
	GPUPriorityClass         = "jeeves-gpu-compute"
	HPCPriorityClass         = "jeeves-hpc-compute"
	VGPUPriorityClass        = "jeeves-gpu-compute" // TODO change to jeeves-vgpu-compute when in production
	ResourceNvidiaGPU        = "nvidia.com/gpu"
	ResourceVendorVGPU       = "baidu.com/cgpu"
	ResourceVendorGPUPercent = "baidu.com/cgpu_core"
	ResourceVendorGPUMem     = "baidu.com/cgpu_memory"
	ResourceRdma             = "rdma/hca"
	ResourceInfinityBand     = "mellanox.com/InfiniBand"
	A800PriorityClass        = "jeeves-a800-compute"

	GPUSeriesA100     = "a100"
	GPUSeriesA800     = "a800"
	A100TolerationKey = "gpu-series"
)

Variables

This section is empty.

Functions

func ConvertConditionDetailStructToString

func ConvertConditionDetailStructToString(conditionDetails []*ConditionDetail) (string, error)

Types

type CPUSchedulingStrategy

type CPUSchedulingStrategy struct {
	Raw Resource
}

func (CPUSchedulingStrategy) Annotations

func (r CPUSchedulingStrategy) Annotations() map[string]string

func (CPUSchedulingStrategy) Labels

func (r CPUSchedulingStrategy) Labels() map[string]string

func (CPUSchedulingStrategy) NodeSelectorTerms

func (r CPUSchedulingStrategy) NodeSelectorTerms() []v1.NodeSelectorTerm

func (CPUSchedulingStrategy) PreferredSchedulingTerms

func (r CPUSchedulingStrategy) PreferredSchedulingTerms() []v1.PreferredSchedulingTerm

func (CPUSchedulingStrategy) PriorityClassName

func (r CPUSchedulingStrategy) PriorityClassName() string

func (CPUSchedulingStrategy) SchedulerName

func (r CPUSchedulingStrategy) SchedulerName() string

func (CPUSchedulingStrategy) Tolerations

func (r CPUSchedulingStrategy) Tolerations() []v1.Toleration

type ConditionDetail

type ConditionDetail struct {
	Type            string `json:"type"`
	Reason          string `json:"reason"`
	Message         string `json:"message"`
	Status          string `json:"status"`
	ConditionStatus string `json:"condition_status"`
}

func ConvertConditionDetailStringToStruct

func ConvertConditionDetailStringToStruct(str string) ([]*ConditionDetail, error)

type GPUSchedulingStrategy

type GPUSchedulingStrategy struct {
	Raw       Resource
	CanBorrow bool
}

func (GPUSchedulingStrategy) Annotations

func (r GPUSchedulingStrategy) Annotations() map[string]string

func (GPUSchedulingStrategy) Labels

func (r GPUSchedulingStrategy) Labels() map[string]string

func (GPUSchedulingStrategy) NodeSelectorTerms

func (r GPUSchedulingStrategy) NodeSelectorTerms() []v1.NodeSelectorTerm

func (GPUSchedulingStrategy) PreferredSchedulingTerms

func (r GPUSchedulingStrategy) PreferredSchedulingTerms() []v1.PreferredSchedulingTerm

func (GPUSchedulingStrategy) PriorityClassName

func (r GPUSchedulingStrategy) PriorityClassName() string

func (GPUSchedulingStrategy) SchedulerName

func (r GPUSchedulingStrategy) SchedulerName() string

func (GPUSchedulingStrategy) Tolerations

func (r GPUSchedulingStrategy) Tolerations() []v1.Toleration

type HETStrategy

type HETStrategy struct {
	Raw    Resource
	Region string
}

Heterogeneous Architecture Strategy Gives guaranteed resource in pure-CPU computation, buf halven the host resource requests when GPU is used. when in pure-CPU computation, node selection is done in a tiling manner, while in GPU compuation, scheduler will select used nodes with available resources first, leaving bulks of free resource for large job like multi-GPU training.

func (HETStrategy) GetRegion

func (r HETStrategy) GetRegion() string

func (HETStrategy) Limits

func (r HETStrategy) Limits() Resource

func (HETStrategy) Merge

func (r HETStrategy) Merge(resource Resource)

func (HETStrategy) Requests

func (r HETStrategy) Requests() Resource

func (HETStrategy) Resource

func (r HETStrategy) Resource() Resource

func (HETStrategy) SchedulingStrategy

func (r HETStrategy) SchedulingStrategy() SchedulingStrategy

type HalfMemoryResourceStrategy

type HalfMemoryResourceStrategy struct {
	Raw             Resource
	Region          string
	UseGPUScheduler bool
	CanBorrow       bool
}

func (HalfMemoryResourceStrategy) GetRegion

func (r HalfMemoryResourceStrategy) GetRegion() string

func (HalfMemoryResourceStrategy) Limits

func (HalfMemoryResourceStrategy) Merge

func (r HalfMemoryResourceStrategy) Merge(resource Resource)

func (HalfMemoryResourceStrategy) Requests

func (r HalfMemoryResourceStrategy) Requests() Resource

func (HalfMemoryResourceStrategy) Resource

func (r HalfMemoryResourceStrategy) Resource() Resource

func (HalfMemoryResourceStrategy) SchedulingStrategy

func (r HalfMemoryResourceStrategy) SchedulingStrategy() SchedulingStrategy

type JobPodsStatus

type JobPodsStatus struct {
	Name       string             `json:"name"`
	Conditions []*ConditionDetail `json:"conditions"`
}

type Resource

type Resource struct {
	CPUNum           uint   `json:"cpu_num" gorm:"not null;default:1"`
	GPUNum           uint   `json:"gpu_num" gorm:"not null;default:0"`
	GPUPercent       uint   `json:"gpu_percent" gorm:"not null;default:100"`
	GPUMem           uint   `json:"gpu_mem" gorm:"not null;default:12"`
	MemorySize       uint   `json:"memory_size" gorm:"not null;default:4"`
	EphemeralStorage uint   `json:"-" gorm:"not null;default:800"`
	GPUSeries        string `json:"gpu_series" gorm:"type:varchar(32);default:'pascal'"`
}

func (Resource) ResourceList

func (r Resource) ResourceList(region string) v1.ResourceList

type ResourceStrategy

type ResourceStrategy interface {
	Requests() Resource
	Limits() Resource
	Resource() Resource
	Merge(Resource)
	SchedulingStrategy() SchedulingStrategy
	GetRegion() string
}

type SchedulingStrategy

type SchedulingStrategy interface {
	Labels() map[string]string
	Annotations() map[string]string
	SchedulerName() string
	PriorityClassName() string
	Tolerations() []v1.Toleration
	PreferredSchedulingTerms() []v1.PreferredSchedulingTerm
	NodeSelectorTerms() []v1.NodeSelectorTerm
}

type VGPUSchedulingStrategy

type VGPUSchedulingStrategy struct {
	Raw       Resource
	CanBorrow bool
}

func (VGPUSchedulingStrategy) Annotations

func (r VGPUSchedulingStrategy) Annotations() map[string]string

func (VGPUSchedulingStrategy) Labels

func (r VGPUSchedulingStrategy) Labels() map[string]string

func (VGPUSchedulingStrategy) NodeSelectorTerms

func (r VGPUSchedulingStrategy) NodeSelectorTerms() []v1.NodeSelectorTerm

func (VGPUSchedulingStrategy) PreferredSchedulingTerms

func (r VGPUSchedulingStrategy) PreferredSchedulingTerms() []v1.PreferredSchedulingTerm

func (VGPUSchedulingStrategy) PriorityClassName

func (r VGPUSchedulingStrategy) PriorityClassName() string

func (VGPUSchedulingStrategy) SchedulerName

func (r VGPUSchedulingStrategy) SchedulerName() string

func (VGPUSchedulingStrategy) Tolerations

func (r VGPUSchedulingStrategy) Tolerations() []v1.Toleration

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL