Documentation ¶
Index ¶
- Constants
- func ConvertJobTemplateToMPIJob(jt drmaa2interface.JobTemplate) (kubeflow.MPIJobSpec, error)
- func CreateJob(ctx context.Context, mpiClient clientset.Interface, mpiJob *kubeflow.MPIJob, ...) (*kubeflow.MPIJob, error)
- func DeleteJob(ctx context.Context, mpiClient clientset.Interface, namespace, jobName string) error
- func DescribeJob(ctx context.Context, mpiClient clientset.Interface, namespace, jobName string) (*kubeflow.MPIJob, error)
- func GetClient(restConfig *rest.Config) (clientset.Interface, error)
- func GetJobInfo(ctx context.Context, mpiClient clientset.Interface, namespace, jobName string) (drmaa2interface.JobInfo, error)
- func GetJobState(ctx context.Context, mpiClient clientset.Interface, namespace, jobName string) (drmaa2interface.JobState, string, error)
- func GetLauncherResourceLimitExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
- func GetLauncherResourceRequestExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
- func GetMPIImplementationExtension(jt drmaa2interface.JobTemplate) kubeflow.MPIImplementation
- func GetRunAsUserExtension(jt drmaa2interface.JobTemplate) int64
- func GetSSHMountPathExtension(jt drmaa2interface.JobTemplate) string
- func GetSlotsPerWorkerExtension(jt drmaa2interface.JobTemplate) int32
- func GetWorkerCommandExtension(jt drmaa2interface.JobTemplate) []string
- func GetWorkerImageExtension(jt drmaa2interface.JobTemplate) string
- func GetWorkerResourceLimitExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
- func GetWorkerResourceRequestExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
- func InstallMPIOperator(kubeconfigPath string) error
- func JobInfoFromMPIJob(mpiJob *kubeflow.MPIJob) (jobInfo drmaa2interface.JobInfo)
- func JobStateFromCondition(lastCondition common.JobCondition) (drmaa2interface.JobState, string, error)
- func ListJobs(ctx context.Context, mpiClient clientset.Interface, namespace string) ([]kubeflow.MPIJob, error)
- func NewMPIJob(spec kubeflow.MPIJobSpec) (job kubeflow.MPIJob)
- func NewRestConfig(kubeConfigPath string) (*rest.Config, error)
- func SetLauncherResourceLimitExtension(jt drmaa2interface.JobTemplate, limits v1.ResourceList) drmaa2interface.JobTemplate
- func SetLauncherResourceRequestsExtension(jt drmaa2interface.JobTemplate, requests v1.ResourceList) drmaa2interface.JobTemplate
- func SetMPIImplementationExtension(jt drmaa2interface.JobTemplate, mpiImplementation string) drmaa2interface.JobTemplate
- func SetRunAsUserExtension(jt drmaa2interface.JobTemplate, runAsUser int64) drmaa2interface.JobTemplate
- func SetSSHMountPathExtension(jt drmaa2interface.JobTemplate, sshMountPath string) drmaa2interface.JobTemplate
- func SetSlotsPerWorkerExtension(jt drmaa2interface.JobTemplate, slotsPerWorker int32) drmaa2interface.JobTemplate
- func SetVolumeMounts(jt drmaa2interface.JobTemplate, vm []VolumeMountSpec) drmaa2interface.JobTemplate
- func SetWorkerCommandExtension(jt drmaa2interface.JobTemplate, workerCommand ...string) drmaa2interface.JobTemplate
- func SetWorkerImageExtension(jt drmaa2interface.JobTemplate, workerImage string) drmaa2interface.JobTemplate
- func SetWorkerResourceLimitExtension(jt drmaa2interface.JobTemplate, limits v1.ResourceList) drmaa2interface.JobTemplate
- func SetWorkerResourceRequestsExtension(jt drmaa2interface.JobTemplate, requests v1.ResourceList) drmaa2interface.JobTemplate
- type MPIOperatorTracker
- func (t *MPIOperatorTracker) AddArrayJob(jt drmaa2interface.JobTemplate, begin int, end int, step int, maxParallel int) (string, error)
- func (t *MPIOperatorTracker) AddJob(jobTemplate drmaa2interface.JobTemplate) (string, error)
- func (t *MPIOperatorTracker) DeleteJob(jobID string) error
- func (t *MPIOperatorTracker) JobControl(jobID string, action string) error
- func (t *MPIOperatorTracker) JobInfo(jobID string) (drmaa2interface.JobInfo, error)
- func (t *MPIOperatorTracker) JobState(jobID string) (drmaa2interface.JobState, string, error)
- func (t *MPIOperatorTracker) ListArrayJobs(arrayjobID string) ([]string, error)
- func (MPIOperatorTracker) ListJobCategories() ([]string, error)
- func (t *MPIOperatorTracker) ListJobs() ([]string, error)
- func (t *MPIOperatorTracker) Wait(jobID string, timeout time.Duration, states ...drmaa2interface.JobState) error
- type VolumeMountSpec
Constants ¶
const ExtensionMPIImplementation = "mpiImplementation"
const ExtensionRunAsUser = "runAsUser"
const ExtensionSSHMountPath = "sshMountPath"
const ExtensionSlotsPerWorker = "slotsPerWorker"
const ExtensionWorkerCommand = "workerCommand"
const ExtensionWorkerImage = "workerImage"
Variables ¶
This section is empty.
Functions ¶
func ConvertJobTemplateToMPIJob ¶
func ConvertJobTemplateToMPIJob(jt drmaa2interface.JobTemplate) (kubeflow.MPIJobSpec, error)
func DescribeJob ¶
func GetJobInfo ¶
func GetJobState ¶
func GetLauncherResourceLimitExtension ¶
func GetLauncherResourceLimitExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
func GetLauncherResourceRequestExtension ¶
func GetLauncherResourceRequestExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
func GetMPIImplementationExtension ¶
func GetMPIImplementationExtension(jt drmaa2interface.JobTemplate) kubeflow.MPIImplementation
func GetRunAsUserExtension ¶
func GetRunAsUserExtension(jt drmaa2interface.JobTemplate) int64
func GetSSHMountPathExtension ¶
func GetSSHMountPathExtension(jt drmaa2interface.JobTemplate) string
func GetSlotsPerWorkerExtension ¶
func GetSlotsPerWorkerExtension(jt drmaa2interface.JobTemplate) int32
func GetWorkerCommandExtension ¶
func GetWorkerCommandExtension(jt drmaa2interface.JobTemplate) []string
func GetWorkerImageExtension ¶
func GetWorkerImageExtension(jt drmaa2interface.JobTemplate) string
func GetWorkerResourceLimitExtension ¶
func GetWorkerResourceLimitExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
func GetWorkerResourceRequestExtension ¶
func GetWorkerResourceRequestExtension(jt drmaa2interface.JobTemplate) v1.ResourceList
func InstallMPIOperator ¶
func JobInfoFromMPIJob ¶
func JobInfoFromMPIJob(mpiJob *kubeflow.MPIJob) (jobInfo drmaa2interface.JobInfo)
func JobStateFromCondition ¶
func JobStateFromCondition(lastCondition common.JobCondition) (drmaa2interface.JobState, string, error)
func SetLauncherResourceLimitExtension ¶
func SetLauncherResourceLimitExtension(jt drmaa2interface.JobTemplate, limits v1.ResourceList) drmaa2interface.JobTemplate
func SetLauncherResourceRequestsExtension ¶
func SetLauncherResourceRequestsExtension(jt drmaa2interface.JobTemplate, requests v1.ResourceList) drmaa2interface.JobTemplate
func SetMPIImplementationExtension ¶
func SetMPIImplementationExtension(jt drmaa2interface.JobTemplate, mpiImplementation string) drmaa2interface.JobTemplate
func SetRunAsUserExtension ¶
func SetRunAsUserExtension(jt drmaa2interface.JobTemplate, runAsUser int64) drmaa2interface.JobTemplate
func SetSSHMountPathExtension ¶
func SetSSHMountPathExtension(jt drmaa2interface.JobTemplate, sshMountPath string) drmaa2interface.JobTemplate
func SetSlotsPerWorkerExtension ¶
func SetSlotsPerWorkerExtension(jt drmaa2interface.JobTemplate, slotsPerWorker int32) drmaa2interface.JobTemplate
func SetVolumeMounts ¶
func SetVolumeMounts(jt drmaa2interface.JobTemplate, vm []VolumeMountSpec) drmaa2interface.JobTemplate
func SetWorkerCommandExtension ¶
func SetWorkerCommandExtension(jt drmaa2interface.JobTemplate, workerCommand ...string) drmaa2interface.JobTemplate
func SetWorkerImageExtension ¶
func SetWorkerImageExtension(jt drmaa2interface.JobTemplate, workerImage string) drmaa2interface.JobTemplate
func SetWorkerResourceLimitExtension ¶
func SetWorkerResourceLimitExtension(jt drmaa2interface.JobTemplate, limits v1.ResourceList) drmaa2interface.JobTemplate
func SetWorkerResourceRequestsExtension ¶
func SetWorkerResourceRequestsExtension(jt drmaa2interface.JobTemplate, requests v1.ResourceList) drmaa2interface.JobTemplate
Types ¶
type MPIOperatorTracker ¶
type MPIOperatorTracker struct {
// contains filtered or unexported fields
}
func NewMPIOperatorTracker ¶
func NewMPIOperatorTracker(kubeconfigPath string, testInstallMPIOperator bool) (*MPIOperatorTracker, error)
func (*MPIOperatorTracker) AddArrayJob ¶
func (t *MPIOperatorTracker) AddArrayJob(jt drmaa2interface.JobTemplate, begin int, end int, step int, maxParallel int) (string, error)
AddArrayJob makes a mass submission of jobs defined by the same job template. Many HPC workload manager support job arrays for submitting 10s of thousands of similar jobs by one call. The additional parameters define how many jobs are submitted by defining a TASK_ID range. Begin is the first task ID (like 1), end is the last task ID (like 10), step is a positive integeger which defines the increments from one task ID to the next task ID (like 1). maxParallel is an arguments representating an optional functionality which instructs the backend to limit maxParallel tasks of this job arary to run in parallel. Note, that jobs use the TASK_ID environment variable to identifiy which task they are and determine that way what to do (like which data set is accessed).
func (*MPIOperatorTracker) AddJob ¶
func (t *MPIOperatorTracker) AddJob(jobTemplate drmaa2interface.JobTemplate) (string, error)
AddJob typically submits or starts a new job at the backend. The function returns the unique job ID or an error if job submission (or starting of the job in case there is no queueing system) has failed.
func (*MPIOperatorTracker) DeleteJob ¶
func (t *MPIOperatorTracker) DeleteJob(jobID string) error
DeleteJob removes a job from a potential internal database. It does not stop a job. A job must be in an endstate (terminated, failed) in order to call DeleteJob. In case of an error or the job is not in an end state error must be returned. If the backend does not support cleaning up resources for a finished job nil should be returned.
func (*MPIOperatorTracker) JobControl ¶
func (t *MPIOperatorTracker) JobControl(jobID string, action string) error
JobControl sends a request to the backend to either "terminate", "suspend", "resume", "hold", or "release" a job. The strings are fixed and are defined by the JobControl constants. This could change in the future to be limited only to constants representing the actions. When the request is not accepted by the system the function must return an error.
func (*MPIOperatorTracker) JobInfo ¶
func (t *MPIOperatorTracker) JobInfo(jobID string) (drmaa2interface.JobInfo, error)
JobInfo returns the job status of a job in form of a JobInfo struct or an error.
func (*MPIOperatorTracker) JobState ¶
func (t *MPIOperatorTracker) JobState(jobID string) (drmaa2interface.JobState, string, error)
JobState returns the DRMAA2 state and substate (free form string) of the job.
func (*MPIOperatorTracker) ListArrayJobs ¶
func (t *MPIOperatorTracker) ListArrayJobs(arrayjobID string) ([]string, error)
ListArrayJobs returns all job IDs an job array ID (or array job ID) represents or an error.
func (MPIOperatorTracker) ListJobCategories ¶
func (MPIOperatorTracker) ListJobCategories() ([]string, error)
ListJobCategories returns a list of job categories which can be used in the JobCategory field of the job template. The list is informational. An example is returning a list of supported container images. AddJob() and AddArrayJob() processes a JobTemplate and hence also the JobCategory field.
func (*MPIOperatorTracker) ListJobs ¶
func (t *MPIOperatorTracker) ListJobs() ([]string, error)
ListJobs returns all visible job IDs or an error.
func (*MPIOperatorTracker) Wait ¶
func (t *MPIOperatorTracker) Wait(jobID string, timeout time.Duration, states ...drmaa2interface.JobState) error
Wait blocks until the job is either in one of the given states, the max. waiting time (specified by timeout) is reached or an other internal error occured (like job was not found). In case of a timeout also an error must be returned.
type VolumeMountSpec ¶
type VolumeMountSpec struct { MountPath string // path to mount volume inside the container ReadOnly bool VolumeType string // like pvc, or cm/configmap VolumeName string // like pvc-name or configmap-name }
func GetVolumeMounts ¶
func GetVolumeMounts(jt drmaa2interface.JobTemplate) []VolumeMountSpec