mxnet

package
v1.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 25, 2021 License: Apache-2.0 Imports: 36 Imported by: 0

Documentation

Overview

Package controller provides a Kubernetes controller for a MXJob resource.

Package controller provides a Kubernetes controller for a MXJob resource.

Package controller provides a Kubernetes controller for a MXJob resource.

Package controller provides a Kubernetes controller for a MXJob resource.

Package controller provides a Kubernetes controller for a MXJob resource.

Index

Constants

View Source
const (
	FailedDeleteJobReason     = "FailedDeleteJob"
	SuccessfulDeleteJobReason = "SuccessfulDeleteJob"
)

Variables

View Source
var (
	// KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc.
	// IndexerInformer uses a delta queue, therefore for deletes we have to use this
	// key function but it should be just fine for non delete events.
	KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc

	// DefaultMXControllerConfiguration is the suggested mxnet-operator configuration for production.
	DefaultMXControllerConfiguration = common.JobControllerConfiguration{
		ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 15 * time.Second},
		EnableGangScheduling:     false,
	}

	// DefaultCleanPodPolicy is the default clean pod policy controller assign the new Job if not exist
	DefaultCleanPodPolicy = commonv1.CleanPodPolicyNone
)

Functions

func ContainSchedulerSpec

func ContainSchedulerSpec(mxJob *mxv1.MXJob) bool

func GetPortFromMXJob

func GetPortFromMXJob(mxJob *mxv1.MXJob, rtype commonv1.ReplicaType) (int32, error)

GetPortFromMXJob gets the port of mxnet container.

func NewUnstructuredMXJobInformer

func NewUnstructuredMXJobInformer(restConfig *restclientset.Config, namespace string) mxjobinformersv1.MXJobInformer

Types

type ClusterSpec

type ClusterSpec map[string][]UrlPort

ClusterSpec represents a cluster Mxnet specification.

type LabelsSpec

type LabelsSpec map[string]string

LabelsSpec represents a label specification.

type MXConfig

type MXConfig struct {
	// Cluster represents a Mxnet ClusterSpec.
	Cluster ClusterSpec `json:"cluster"`
	// Labels include all label of task.
	Labels LabelsSpec `json:"labels"`
	// Task include information of current node.
	Task TaskSpec `json:"task"`
}

MXConfig is a struct representing the distributed Mxnet config. This struct is turned into an environment variable MX_CONFIG which is used by Mxnet processes to configure themselves.

type MXController

type MXController struct {
	common.JobController
	// contains filtered or unexported fields
}

MXController is the type for MXJob Controller, which manages the lifecycle of MXJobs.

func NewMXController

func NewMXController(

	mxJobInformer mxjobinformersv1.MXJobInformer,
	kubeClientSet kubeclientset.Interface,
	mxJobClientSet mxjobclientset.Interface,
	volcanoClientSet volcanoclient.Interface,
	kubeInformerFactory kubeinformers.SharedInformerFactory,

	mxJobInformerFactory mxjobinformers.SharedInformerFactory,
	option options.ServerOption) *MXController

NewMXController returns a new MXJob controller.

func (*MXController) ControllerName

func (tc *MXController) ControllerName() string

func (*MXController) DeleteJob added in v1.1.0

func (tc *MXController) DeleteJob(job interface{}) error

func (*MXController) GetAPIGroupVersion

func (tc *MXController) GetAPIGroupVersion() schema.GroupVersion

func (*MXController) GetAPIGroupVersionKind

func (tc *MXController) GetAPIGroupVersionKind() schema.GroupVersionKind

func (*MXController) GetDefaultContainerName added in v1.1.0

func (tc *MXController) GetDefaultContainerName() string

func (*MXController) GetDefaultContainerPortName added in v1.1.0

func (tc *MXController) GetDefaultContainerPortName() string

func (*MXController) GetGroupNameLabelKey

func (tc *MXController) GetGroupNameLabelKey() string

func (*MXController) GetGroupNameLabelValue

func (tc *MXController) GetGroupNameLabelValue() string

func (*MXController) GetJobFromAPIClient

func (tc *MXController) GetJobFromAPIClient(namespace, name string) (metav1.Object, error)

func (*MXController) GetJobFromInformerCache

func (tc *MXController) GetJobFromInformerCache(namespace, name string) (metav1.Object, error)

func (*MXController) GetJobNameLabelKey

func (tc *MXController) GetJobNameLabelKey() string

func (*MXController) GetJobRoleKey

func (tc *MXController) GetJobRoleKey() string

func (*MXController) GetPodsForJob added in v1.1.0

func (tc *MXController) GetPodsForJob(job interface{}) ([]*corev1.Pod, error)

func (*MXController) GetReplicaIndexLabelKey

func (tc *MXController) GetReplicaIndexLabelKey() string

func (*MXController) GetReplicaTypeLabelKey

func (tc *MXController) GetReplicaTypeLabelKey() string

func (*MXController) GetServicesForJob added in v1.1.0

func (tc *MXController) GetServicesForJob(job interface{}) ([]*corev1.Service, error)

func (*MXController) IsMasterRole added in v1.1.0

func (tc *MXController) IsMasterRole(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, rtype commonv1.ReplicaType, index int) bool

func (*MXController) NewMXJobInformer

func (tc *MXController) NewMXJobInformer(mxJobInformerFactory mxjobinformers.SharedInformerFactory) mxjobinformersv1.MXJobInformer

NewMXJobInformer returns MXJobInformer from the given factory.

func (*MXController) Run

func (tc *MXController) Run(threadiness int, stopCh <-chan struct{}) error

Run will set up the event handlers for types we are interested in, as well as syncing informer caches and starting workers. It will block until stopCh is closed, at which point it will shutdown the workqueue and wait for workers to finish processing their current work items.

func (*MXController) SetClusterSpec added in v1.1.0

func (tc *MXController) SetClusterSpec(job interface{}, podTemplate *corev1.PodTemplateSpec, rtype, index string) error

func (*MXController) UpdateJobStatus added in v1.1.0

func (tc *MXController) UpdateJobStatus(job interface{}, replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec, jobStatus *commonv1.JobStatus) error

func (*MXController) UpdateJobStatusInApiServer added in v1.1.0

func (tc *MXController) UpdateJobStatusInApiServer(job interface{}, jobStatus *commonv1.JobStatus) error

UpdateJobStatusInApiServer updates the status of the given MXJob.

type TaskSpec

type TaskSpec struct {
	Type  string `json:"type"`
	Index int    `json:"index"`
}

TaskSpec is the specification for a task (server or worker ...) of the MXJob.

type UrlPort added in v1.1.0

type UrlPort struct {
	Url  string `json:"url"`
	Port int    `json:"port"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL