controllers

package
v1.11.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 4, 2022 License: Apache-2.0 Imports: 47 Imported by: 1

Documentation

Index

Constants

View Source
const (
	// DefaultContainerdConfigFile indicates default config file path for containerd
	DefaultContainerdConfigFile = "/etc/containerd/config.toml"
	// DefaultContainerdSocketFile indicates default containerd socket file
	DefaultContainerdSocketFile = "/run/containerd/containerd.sock"
	// DefaultDockerConfigFile indicates default config file path for docker
	DefaultDockerConfigFile = "/etc/docker/daemon.json"
	// DefaultDockerSocketFile indicates default docker socket file
	DefaultDockerSocketFile = "/var/run/docker.sock"
	// TrustedCAConfigMapName indicates configmap with custom user CA injected
	TrustedCAConfigMapName = "gpu-operator-trusted-ca"
	// TrustedCABundleFileName indicates custom user ca certificate filename
	TrustedCABundleFileName = "ca-bundle.crt"
	// TrustedCABundleMountDir indicates target mount directory of user ca bundle
	TrustedCABundleMountDir = "/etc/pki/ca-trust/extracted/pem"
	// TrustedCACertificate indicates injected CA certificate name
	TrustedCACertificate = "tls-ca-bundle.pem"
	// VGPULicensingConfigMountPath indicates target mount path for vGPU licensing configuration file
	VGPULicensingConfigMountPath = "/drivers/gridd.conf"
	// VGPULicensingFileName is the vGPU licensing configuration filename
	VGPULicensingFileName = "gridd.conf"
	// NLSClientTokenMountPath inidicates the target mount path for NLS client config token file (.tok)
	NLSClientTokenMountPath = "/drivers/ClientConfigToken/client_configuration_token.tok"
	// NLSClientTokenFileName is the NLS client config token filename
	NLSClientTokenFileName = "client_configuration_token.tok"
	// VGPUTopologyConfigMountPath indicates target mount path for vGPU topology daemon configuration file
	VGPUTopologyConfigMountPath = "/etc/nvidia/nvidia-topologyd.conf"
	// VGPUTopologyConfigFileName is the vGPU topology daemon configuration filename
	VGPUTopologyConfigFileName = "nvidia-topologyd.conf"
	// DefaultRuntimeClass represents "nvidia" RuntimeClass
	DefaultRuntimeClass = "nvidia"
	// DriverInstallPathVolName represents volume name for driver install path provided to toolkit
	DriverInstallPathVolName = "driver-install-path"
	// DefaultRuntimeSocketTargetDir represents target directory where runtime socket dirctory will be mounted
	DefaultRuntimeSocketTargetDir = "/runtime/sock-dir/"
	// DefaultRuntimeConfigTargetDir represents target directory where runtime socket dirctory will be mounted
	DefaultRuntimeConfigTargetDir = "/runtime/config-dir/"
	// ValidatorImageEnvName indicates env name for validator image passed
	ValidatorImageEnvName = "VALIDATOR_IMAGE"
	// ValidatorImagePullPolicyEnvName indicates env name for validator image pull policy passed
	ValidatorImagePullPolicyEnvName = "VALIDATOR_IMAGE_PULL_POLICY"
	// ValidatorImagePullSecretsEnvName indicates env name for validator image pull secrets passed
	ValidatorImagePullSecretsEnvName = "VALIDATOR_IMAGE_PULL_SECRETS"
	// ValidatorRuntimeClassEnvName indicates env name of runtime class to be applied to validator pods
	ValidatorRuntimeClassEnvName = "VALIDATOR_RUNTIME_CLASS"
	// MigStrategyEnvName indicates env name for passing MIG strategy
	MigStrategyEnvName = "MIG_STRATEGY"
	// MigPartedDefaultConfigMapName indicates name of ConfigMap containing default mig-parted config
	MigPartedDefaultConfigMapName = "default-mig-parted-config"
	// MigDefaultGPUClientsConfigMapName indicates name of ConfigMap containing default gpu-clients
	MigDefaultGPUClientsConfigMapName = "default-gpu-clients"
	// DCGMRemoteEngineEnvName indicates env name to specify remote DCGM host engine ip:port
	DCGMRemoteEngineEnvName = "DCGM_REMOTE_HOSTENGINE_INFO"
	// DCGMDefaultHostPort indicates default host port bound to DCGM host engine
	DCGMDefaultHostPort = 5555
	// GPUDirectRDMAEnabledEnvName indicates if GPU direct RDMA is enabled through GPU operator
	GPUDirectRDMAEnabledEnvName = "GPU_DIRECT_RDMA_ENABLED"
	// UseHostMOFEDEnvName indicates if MOFED driver is pre-installed on the host
	UseHostMOFEDEnvName = "USE_HOST_MOFED"
	// MetricsConfigMountPath indicates mount path for custom dcgm metrics file
	MetricsConfigMountPath = "/etc/dcgm-exporter/" + MetricsConfigFileName
	// MetricsConfigFileName indicates custom dcgm metrics file name
	MetricsConfigFileName = "dcgm-metrics.csv"
	// NvidiaAnnotationHashKey indicates annotation name for last applied hash by gpu-operator
	NvidiaAnnotationHashKey = "nvidia.com/last-applied-hash"
	// NvidiaDisableRequireEnvName is the env name to disable default cuda constraints
	NvidiaDisableRequireEnvName = "NVIDIA_DISABLE_REQUIRE"
)

Variables

View Source
var CertConfigPathMap = map[string]string{
	"centos": "/etc/pki/ca-trust/extracted/pem",
	"ubuntu": "/etc/ssl/certs",
	"rhcos":  "/etc/pki/ca-trust/extracted/pem",
	"rhel":   "/etc/pki/ca-trust/extracted/pem",
}

CertConfigPathMap indicates standard OS specific paths for ssl keys/certificates. Where Go looks for certs: https://golang.org/src/crypto/x509/root_linux.go Where OCP mounts proxy certs on RHCOS nodes: https://access.redhat.com/documentation/en-us/openshift_container_platform/4.3/html/authentication/ocp-certificates#proxy-certificates_ocp-certificates

View Source
var RepoConfigPathMap = map[string]string{
	"centos": "/etc/yum.repos.d",
	"ubuntu": "/etc/apt/sources.list.d",
	"rhcos":  "/etc/yum.repos.d",
	"rhel":   "/etc/yum.repos.d",
}

RepoConfigPathMap indicates standard OS specific paths for repository configuration files

Functions

func ClusterRole

func ClusterRole(n ClusterPolicyController) (gpuv1.State, error)

ClusterRole creates ClusterRole resource

func ClusterRoleBinding

func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error)

ClusterRoleBinding creates ClusterRoleBinding resource

func ConfigMaps added in v1.9.0

func ConfigMaps(n ClusterPolicyController) (gpuv1.State, error)

ConfigMaps creates ConfigMap resource(s)

func DaemonSet

func DaemonSet(n ClusterPolicyController) (gpuv1.State, error)

DaemonSet creates Daemonset resource

func Deployment

func Deployment(n ClusterPolicyController) (gpuv1.State, error)

Deployment creates Deployment resource

func GetClusterWideProxy

func GetClusterWideProxy() (*apiconfigv1.Proxy, error)

GetClusterWideProxy returns cluster wide proxy object setup in OCP

func OpenshiftVersion

func OpenshiftVersion() (string, error)

OpenshiftVersion fetches OCP version

func PodSecurityPolicy

func PodSecurityPolicy(n ClusterPolicyController) (gpuv1.State, error)

PodSecurityPolicy creates PSP resources

func PrometheusRule added in v1.8.0

func PrometheusRule(n ClusterPolicyController) (gpuv1.State, error)

PrometheusRule creates PrometheusRule object

func Role

Role creates Role resource

func RoleBinding

func RoleBinding(n ClusterPolicyController) (gpuv1.State, error)

RoleBinding creates RoleBinding resource

func RuntimeClass

func RuntimeClass(n ClusterPolicyController) (gpuv1.State, error)

RuntimeClass creates RuntimeClass object

func SecurityContextConstraints

func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)

SecurityContextConstraints creates SCC resources

func Service

Service creates Service object

func ServiceAccount

func ServiceAccount(n ClusterPolicyController) (gpuv1.State, error)

ServiceAccount creates ServiceAccount resource

func ServiceMonitor

func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error)

ServiceMonitor creates ServiceMonitor object

func TransformDCGM added in v1.8.0

func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformDCGM transforms dcgm daemonset with required config as per ClusterPolicy

func TransformDCGMExporter

func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformDCGMExporter transforms dcgm exporter daemonset with required config as per ClusterPolicy

func TransformDevicePlugin

func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformDevicePlugin transforms k8s-device-plugin daemonset with required config as per ClusterPolicy

func TransformDriver

func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformDriver transforms Nvidia driver daemonset with required config as per ClusterPolicy

func TransformGPUDiscoveryPlugin

func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformGPUDiscoveryPlugin transforms GPU discovery daemonset with required config as per ClusterPolicy

func TransformMIGManager

func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformMIGManager transforms MIG Manager daemonset with required config as per ClusterPolicy

func TransformNodeStatusExporter added in v1.8.0

func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformNodeStatusExporter transforms the node-status-exporter daemonset with required config as per ClusterPolicy

func TransformSandboxDevicePlugin added in v1.11.0

func TransformSandboxDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformSandboxDevicePlugin transforms sandbox-device-plugin daemonset with required config as per ClusterPolicy

func TransformSandboxValidator added in v1.11.0

func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformSandboxValidator transforms nvidia-sandbox-validator daemonset with required config as per ClusterPolicy

func TransformToolkit

func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformToolkit transforms Nvidia container-toolkit daemonset with required config as per ClusterPolicy

func TransformVFIOManager added in v1.11.0

func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformVFIOManager transforms VFIO-PCI Manager daemonset with required config as per ClusterPolicy

func TransformVGPUDeviceManager added in v1.11.0

func TransformVGPUDeviceManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformVGPUDeviceManager transforms VGPU Device Manager daemonset with required config as per ClusterPolicy

func TransformVGPUManager added in v1.11.0

func TransformVGPUManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformVGPUManager transforms NVIDIA vGPU Manager daemonset with required config as per ClusterPolicy

func TransformValidator

func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformValidator transforms nvidia-operator-validator daemonset with required config as per ClusterPolicy

func TransformValidatorComponent

func TransformValidatorComponent(config *gpuv1.ClusterPolicySpec, podSpec *corev1.PodSpec, component string) error

TransformValidatorComponent applies changes to given validator component

func TransformValidatorShared added in v1.11.0

func TransformValidatorShared(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error

TransformValidatorShared applies general transformations to the validator daemonset with required config as per ClusterPolicy

Types

type ClusterPolicyController

type ClusterPolicyController struct {
	// contains filtered or unexported fields
}

ClusterPolicyController represents clusterpolicy controller spec for GPU operator

type ClusterPolicyReconciler

type ClusterPolicyReconciler struct {
	client.Client
	Log    logr.Logger
	Scheme *runtime.Scheme
}

ClusterPolicyReconciler reconciles a ClusterPolicy object

func (*ClusterPolicyReconciler) Reconcile

func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state. TODO(user): Modify the Reconcile function to compare the state specified by the ClusterPolicy object against the actual cluster state, and then perform operations to make the cluster state reflect the state specified by the user.

For more details, check Reconcile and its Result here: - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.0/pkg/reconcile

func (*ClusterPolicyReconciler) SetupWithManager

func (r *ClusterPolicyReconciler) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the controller with the Manager.

type OpenShiftDriverToolkit added in v1.9.0

type OpenShiftDriverToolkit struct {
	// contains filtered or unexported fields
}

OpenShiftDriverToolkit contains the values required to deploy OpenShift DriverToolkit DaemonSet.

type OperatorMetrics added in v1.8.0

type OperatorMetrics struct {
	// contains filtered or unexported fields
}

OperatorMetrics defines the Prometheus metrics exposed for the operator status

type Resources

type Resources struct {
	ServiceAccount             corev1.ServiceAccount
	Role                       rbacv1.Role
	RoleBinding                rbacv1.RoleBinding
	ClusterRole                rbacv1.ClusterRole
	ClusterRoleBinding         rbacv1.ClusterRoleBinding
	ConfigMaps                 []corev1.ConfigMap
	DaemonSet                  appsv1.DaemonSet
	Deployment                 appsv1.Deployment
	Pod                        corev1.Pod
	Service                    corev1.Service
	ServiceMonitor             promv1.ServiceMonitor
	PriorityClass              schedv1.PriorityClass
	Taint                      corev1.Taint
	SecurityContextConstraints secv1.SecurityContextConstraints
	PodSecurityPolicy          policyv1beta1.PodSecurityPolicy
	RuntimeClass               nodev1.RuntimeClass
	PrometheusRule             promv1.PrometheusRule
}

Resources indicates resources managed by GPU operator

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL