scheduler

package
v0.1.23 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 12, 2022 License: MIT Imports: 29 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// SuccessSynced is used as part of the Event 'reason' when a Foo is synced
	SuccessSynced = "Synced"
	// ErrResourceExists is used as part of the Event 'reason' when a Foo fails
	// to sync due to a Deployment of the same name already existing.
	ErrResourceExists = "ErrResourceExists"

	// MessageResourceExists is the message used for Events when a resource
	// fails to sync due to a Deployment already existing
	MessageResourceExists = "Resource %q already exists and is not managed by SharePod"
	// MessageResourceSynced is the message used for an Event fired when a Foo
	// is synced successfully
	MessageResourceSynced = "SharePod scheduled successfully"

	KubeShareScheduleAffinity     = "kubeshare/sched_affinity"
	KubeShareScheduleAntiAffinity = "kubeshare/sched_anti-affinity"
	KubeShareScheduleExclusion    = "kubeshare/sched_exclusion"
)

Variables

This section is empty.

Functions

func GPUAffinityFilter

func GPUAffinityFilter(nodeResources NodeResources, sharepod *corev1.Pod)

func GPUAntiAffinityFilter

func GPUAntiAffinityFilter(nodeResources NodeResources, sharepod *corev1.Pod)

func GPUExclusionFilter

func GPUExclusionFilter(nodeResources NodeResources, sharepod *corev1.Pod)

func ScheduleAlgorithmBestFit

func ScheduleAlgorithmBestFit(isGPUPod bool, gpu_request float64, gpu_mem int64, sharepod *corev1.Pod, nodeResources NodeResources) (schedNodeName string, schedGPUID string)

Types

type Controller

type Controller struct {
	// contains filtered or unexported fields
}

Controller is the controller implementation for Foo resources

func NewController

func NewController(
	kubeclientset kubernetes.Interface,
	kubeshareclientset clientset.Interface,
	nodeInformer coreinformers.NodeInformer,
	podInformer coreinformers.PodInformer,
	kubeshareInformer informers.SharePodInformer) *Controller

func (*Controller) Run

func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) error

type GPUInfo

type GPUInfo struct {
	GPUFreeReq int64
	// GPUFreeMem in bytes
	GPUFreeMem int64

	GPUAffinityTags     []string
	GPUAntiAffinityTags []string
	// len(GPUExclusionTags) should be only one
	GPUExclusionTags []string
}

func (*GPUInfo) DeepCopy

func (this *GPUInfo) DeepCopy() *GPUInfo

type NodeResource

type NodeResource struct {
	CpuTotal int64
	MemTotal int64
	GpuTotal int
	// GpuMemTotal in bytes
	GpuMemTotal int64
	CpuFree     int64
	MemFree     int64
	/* Available GPU calculate */
	// Total GPU count - Pods using nvidia.com/gpu
	GpuFreeCount int
	// GPUs available usage (1.0 - SharePod usage)
	// GPUID to integer index mapping
	GpuFree map[string]*GPUInfo

	GPUID2GPU map[string]*GPUInfo
	// UUID -> Port (string)
	UUID2Port map[string]string

	// port in use
	PodManagerPortBitmap *bitmap.RRBitmap
	PodIP                string
}

func (*NodeResource) DeepCopy

func (this *NodeResource) DeepCopy() *NodeResource

type NodeResources

type NodeResources map[string]*NodeResource

NodeResources: Available resources in cluster to schedule Training Jobs

func (*NodeResources) DeepCopy

func (this *NodeResources) DeepCopy() *NodeResources

func (*NodeResources) PrintMe

func (this *NodeResources) PrintMe()

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL