Documentation ¶
Index ¶
- Constants
- Variables
- func BuildKubeConfig(sConfig *Config) *rest.Config
- type AffinityGroup
- type AffinityGroupList
- type AffinityGroupMemberBindInfo
- type AffinityGroupMemberSpec
- type AffinityGroupSpec
- type AffinityGroupStatus
- type CellAddress
- type CellType
- type CellTypeSpec
- type Config
- type LazyPreemptionStatus
- type ObjectMeta
- type PhysicalCellSpec
- type PhysicalClusterSpec
- type PodBindInfo
- type PodPlacementInfo
- type PodSchedulingSpec
- type ReservationId
- type ReservedCellSpec
- type VirtualCellSpec
- type VirtualClusterName
- type VirtualClusterSpec
- type WebServerError
- type WebServerPaths
Constants ¶
const ( ComponentName = "hivedscheduler" GroupName = "hivedscheduler.microsoft.com" DefaultConfigFilePath = "./hivedscheduler.yaml" UnlimitedValue = -1 // To leverage this scheduler, at least one container in the Pod should contain // below resource limit with any positive int16 value. ResourceNamePodSchedulingEnable = GroupName + "/pod-scheduling-enable" // To leverage this scheduler, the Pod should contain below annotation in // PodSchedulingSpec YAML format. AnnotationKeyPodSchedulingSpec = GroupName + "/pod-scheduling-spec" // To leverage this scheduler, if one container in the Pod want to use the // allocated GPUs for the whole Pod, it should contain below env. // env: // - name: NVIDIA_VISIBLE_DEVICES // valueFrom: // fieldRef: // fieldPath: metadata.annotations['hivedscheduler.microsoft.com/pod-gpu-isolation'] // The annotation referred by the env will be populated by scheduler when bind the pod. // // Notes: // 1. The scheduler directly delivers GPU isolation decision to // nvidia-container-runtime through Pod Env: NVIDIA_VISIBLE_DEVICES. // 2. If multiple containers in the Pod contain the env, the allocated GPUs are // all visible to them, so it is these containers' freedom to control how // to share these GPUs. EnvNameNvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" AnnotationKeyPodGpuIsolation = GroupName + "/pod-gpu-isolation" // Populated by this scheduler, used to track and recover allocated placement. // It is in PodBindInfo YAML format. AnnotationKeyPodBindInfo = GroupName + "/pod-bind-info" // Priority Range of Guaranteed Pod. MaxGuaranteedPriority = int32(1000) MinGuaranteedPriority = int32(0) // Priority of Opportunistic Pod. OpportunisticPriority = int32(-1) )
///////////////////////////////////////////////////////////////////////////////////// General Constants /////////////////////////////////////////////////////////////////////////////////////
const ( RootPath = "/" VersionPath = RootPath + "v1" // Scheduler Extender API: API with K8S Default Scheduler ExtenderPath = VersionPath + "/extender" FilterPath = ExtenderPath + "/filter" BindPath = ExtenderPath + "/bind" PreemptPath = ExtenderPath + "/preempt" // Scheduler Inspect API: API to inspect current scheduling status // Notes: // 1. Both Binding and Bound AffinityGroups/Pods are considered as Allocated. InspectPath = VersionPath + "/inspect" // Inspect current allocated AffinityGroup(s) AffinityGroupsPath = InspectPath + "/affinitygroups/" )
///////////////////////////////////////////////////////////////////////////////////// WebServer Constants /////////////////////////////////////////////////////////////////////////////////////
Variables ¶
var DefaultKubeConfigFilePath = os.Getenv("HOME") + "/.kube/config"
var EnvValueKubeApiServerAddress = os.Getenv("KUBE_APISERVER_ADDRESS")
var EnvValueKubeConfigFilePath = os.Getenv("KUBECONFIG")
Functions ¶
func BuildKubeConfig ¶
Types ¶
type AffinityGroup ¶
type AffinityGroup struct { ObjectMeta `json:"metadata"` Status AffinityGroupStatus `json:"status"` }
type AffinityGroupList ¶ added in v0.17.0
type AffinityGroupList struct {
Items []AffinityGroup `json:"items"`
}
type AffinityGroupMemberBindInfo ¶ added in v0.16.0
type AffinityGroupMemberBindInfo struct {
PodPlacements []PodPlacementInfo `yaml:"podPlacements"`
}
type AffinityGroupMemberSpec ¶ added in v0.16.0
type AffinityGroupSpec ¶ added in v0.16.0
type AffinityGroupSpec struct { Name string `yaml:"name"` Members []AffinityGroupMemberSpec `yaml:"members"` }
type AffinityGroupStatus ¶ added in v0.17.0
type AffinityGroupStatus struct {
LazyPreemptionStatus *LazyPreemptionStatus `json:"lazyPreemptionStatus"`
}
type CellAddress ¶
type CellAddress string
///////////////////////////////////////////////////////////////////////////////////// General Types /////////////////////////////////////////////////////////////////////////////////////
type CellType ¶
type CellType string
///////////////////////////////////////////////////////////////////////////////////// General Types /////////////////////////////////////////////////////////////////////////////////////
type CellTypeSpec ¶
type Config ¶
type Config struct { // KubeApiServerAddress is default to ${KUBE_APISERVER_ADDRESS}. // KubeConfigFilePath is default to ${KUBECONFIG} then falls back to ${HOME}/.kube/config. // // If both KubeApiServerAddress and KubeConfigFilePath after defaulting are still empty, falls back to the // [k8s inClusterConfig](https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod). // // If both KubeApiServerAddress and KubeConfigFilePath after defaulting are not empty, // KubeApiServerAddress overrides the server address specified in the file referred by KubeConfigFilePath. // // If only KubeApiServerAddress after defaulting is not empty, it should be an insecure ApiServer address (can be got from // [Insecure ApiServer](https://kubernetes.io/docs/reference/access-authn-authz/controlling-access/#api-server-ports-and-ips) or // [kubectl proxy](https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#using-kubectl-proxy)) // which does not enforce authentication. // // If only KubeConfigFilePath after defaulting is not empty, it should be an valid // [KubeConfig File](https://kubernetes.io/docs/tasks/access-application-cluster/configure-access-multiple-clusters/#explore-the-home-kube-directory) // which inlines or refers the valid // [ApiServer Credential Files](https://kubernetes.io/docs/reference/access-authn-authz/controlling-access/#transport-security). // // Address should be in format http[s]://host:port KubeApiServerAddress *string `yaml:"kubeApiServerAddress"` KubeConfigFilePath *string `yaml:"kubeConfigFilePath"` // WebServer // Default to :9096 WebServerAddress *string `yaml:"webServerAddress"` // Specify a threshold for PodBindAttempts, that after it is exceeded, an extra // Pod binding will be executed forcefully. ForcePodBindThreshold *int32 `yaml:"forcePodBindThreshold"` // If a Pod is decided to be PodWaiting, it will block the whole scheduling by // WaitingPodSchedulingBlockMilliSec. // Large value can be used to achieve stronger FIFO scheduling by sacrificing // the scheduling throughput. // This is a workaround until PodMaxBackoffSeconds can be configured for // K8S Default Scheduler. WaitingPodSchedulingBlockMilliSec *int64 `yaml:"waitingPodSchedulingBlockMilliSec"` // Specify the whole physical cluster // TODO: Automatically construct it based on node info from GPU and Network Device Plugins PhysicalCluster *PhysicalClusterSpec `yaml:"physicalCluster"` // Specify all the virtual clusters belongs to the physical cluster VirtualClusters *map[VirtualClusterName]VirtualClusterSpec `yaml:"virtualClusters"` }
func InitRawConfig ¶ added in v0.17.0
type LazyPreemptionStatus ¶ added in v0.17.0
type ObjectMeta ¶ added in v0.17.0
type ObjectMeta struct {
Name string `json:"name"`
}
WebServer Exposed Objects: Align with K8S Objects
type PhysicalCellSpec ¶
type PhysicalCellSpec struct { CellType CellType `yaml:"cellType"` CellAddress CellAddress `yaml:"cellAddress"` ReservationId ReservationId `yaml:"reservationId"` CellChildren []PhysicalCellSpec `yaml:"cellChildren,omitempty"` }
Specify physical Cell instances.
type PhysicalClusterSpec ¶
type PhysicalClusterSpec struct { CellTypes map[CellType]CellTypeSpec `yaml:"cellTypes"` PhysicalCells []PhysicalCellSpec `yaml:"physicalCells"` }
Physical cluster definition
type PodBindInfo ¶
type PodBindInfo struct { Node string `yaml:"node"` // node to bind GpuIsolation []int32 `yaml:"gpuIsolation"` // GPUs to bind CellChain string `yaml:"cellChain"` // cell chain selected AffinityGroupBindInfo []AffinityGroupMemberBindInfo `yaml:"affinityGroupBindInfo"` }
Used to recover scheduler allocated resource
type PodPlacementInfo ¶ added in v0.16.0
type PodPlacementInfo struct { PhysicalNode string `yaml:"physicalNode"` PhysicalGpuIndices []int32 `yaml:"physicalGpuIndices"` // preassigned cell types used by the pods. used to locate the virtual cells // when adding an allocated pod PreassignedCellTypes []CellType `yaml:"preassignedCellTypes"` }
type PodSchedulingSpec ¶
type PodSchedulingSpec struct { VirtualCluster VirtualClusterName `yaml:"virtualCluster"` Priority int32 `yaml:"priority"` ReservationId ReservationId `yaml:"reservationId"` GpuType string `yaml:"gpuType"` GpuNumber int32 `yaml:"gpuNumber"` GangReleaseEnable bool `yaml:"gangReleaseEnable"` LazyPreemptionEnable bool `yaml:"lazyPreemptionEnable"` AffinityGroup *AffinityGroupSpec `yaml:"affinityGroup"` }
type ReservationId ¶
type ReservationId string
///////////////////////////////////////////////////////////////////////////////////// General Types /////////////////////////////////////////////////////////////////////////////////////
type ReservedCellSpec ¶
type ReservedCellSpec struct {
ReservationId ReservationId `yaml:"reservationId"`
}
type VirtualCellSpec ¶
type VirtualClusterSpec ¶
type VirtualClusterSpec struct { VirtualCells []VirtualCellSpec `yaml:"virtualCells"` ReservedCells []ReservedCellSpec `yaml:"reservedCells,omitempty"` }
type WebServerError ¶
func NewWebServerError ¶
func NewWebServerError(code int, message string) *WebServerError
func (*WebServerError) Error ¶
func (err *WebServerError) Error() string
type WebServerPaths ¶
type WebServerPaths struct {
Paths []string `json:"paths"`
}