mig

package
v0.0.0-...-12e0a72 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 21, 2024 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AsResources

func AsResources(g gpu.Geometry) map[v1.ResourceName]int

func ExtractMemoryGBFromMigFormat

func ExtractMemoryGBFromMigFormat(migFormatResourceName v1.ResourceName) (int64, error)

func ExtractProfileNameStr

func ExtractProfileNameStr(resourceName v1.ResourceName) (string, error)

ExtractProfileNameStr extracts the Name of the MIG profile from the provided resource Name, and returns an error if the resource Name is not a valid NVIDIA MIG resource.

Example:

nvidia.com/mig-1g.10gb => 1g.10gb

func GetAllowedGeometries

func GetAllowedGeometries(model gpu.Model) ([]gpu.Geometry, bool)

func GetKnownGeometries

func GetKnownGeometries() map[gpu.Model][]gpu.Geometry

func GetRequestedProfiles

func GetRequestedProfiles(pod v1.Pod) map[ProfileName]int

func GroupDevicesByMigProfile

func GroupDevicesByMigProfile(l gpu.DeviceList) map[Profile]gpu.DeviceList

func GroupSpecAnnotationsByMigProfile

func GroupSpecAnnotationsByMigProfile(annotations gpu.SpecAnnotationList) map[Profile]gpu.SpecAnnotationList

func IsNvidiaMigDevice

func IsNvidiaMigDevice(resourceName v1.ResourceName) bool

func SetKnownGeometries

func SetKnownGeometries(configs map[gpu.Model][]gpu.Geometry) error

func SpecMatchesStatus

func SpecMatchesStatus(specAnnotations gpu.SpecAnnotationList, statusAnnotations gpu.StatusAnnotationList) bool

func ValidateConfigs

func ValidateConfigs(configs map[gpu.Model][]gpu.Geometry) error

Types

type AllowedMigGeometries

type AllowedMigGeometries struct {
	Models     []gpu.Model    `json:"models"`
	Geometries []gpu.Geometry `json:"allowedGeometries"`
}

func (*AllowedMigGeometries) UnmarshalJSON

func (a *AllowedMigGeometries) UnmarshalJSON(b []byte) error

type AllowedMigGeometriesList

type AllowedMigGeometriesList []AllowedMigGeometries

func (AllowedMigGeometriesList) GroupByModel

func (a AllowedMigGeometriesList) GroupByModel() map[gpu.Model][]gpu.Geometry

type Client

type Client interface {
	GetMigDevices(ctx context.Context) (gpu.DeviceList, gpu.Error)
	GetUsedMigDevices(ctx context.Context) (gpu.DeviceList, gpu.Error)
	GetAllocatableMigDevices(ctx context.Context) (gpu.DeviceList, gpu.Error)
	CreateMigDevices(ctx context.Context, profileList ProfileList) (ProfileList, error)
	DeleteMigDevice(ctx context.Context, device gpu.Device) gpu.Error
	DeleteAllExcept(ctx context.Context, resources gpu.DeviceList) error
}

func NewClient

func NewClient(resourceClient resource.Client, nvmlClient nvml.Client) Client

type GPU

type GPU struct {
	// contains filtered or unexported fields
}

func NewGPU

func NewGPU(model gpu.Model, index int, usedMigDevices, freeMigDevices map[ProfileName]int) (GPU, error)

func NewGpuOrPanic

func NewGpuOrPanic(model gpu.Model, index int, usedMigDevices, freeMigDevices map[ProfileName]int) GPU

func (*GPU) AddPod

func (g *GPU) AddPod(pod v1.Pod) error

AddPod adds a Pod to the GPU by updating the free and used MIG devices according to the MIG resources requested by the Pod.

AddPod returns an error if the GPU does not have enough free MIG resources for the Pod.

func (*GPU) AllowsGeometry

func (g *GPU) AllowsGeometry(geometry gpu.Geometry) bool

AllowsGeometry returns true if the geometry provided as argument is allowed by the GPU model

func (*GPU) ApplyGeometry

func (g *GPU) ApplyGeometry(geometry gpu.Geometry) error

ApplyGeometry applies the MIG geometry provided as argument by changing the free devices of the GPU. It returns an error if the provided geometry is not allowed or if applying it would require to delete any used device of the GPU.

func (*GPU) CanApplyGeometry

func (g *GPU) CanApplyGeometry(geometry gpu.Geometry) (bool, string)

CanApplyGeometry returns true if the geometry provided as argument can be applied to the GPU, otherwise it returns false and the reason why the geometry cannot be applied.

func (*GPU) Clone

func (g *GPU) Clone() GPU

func (*GPU) GetAllowedGeometries

func (g *GPU) GetAllowedGeometries() []gpu.Geometry

GetAllowedGeometries returns the MIG geometries allowed by the GPU model

func (*GPU) GetFreeMigDevices

func (g *GPU) GetFreeMigDevices() map[ProfileName]int

func (*GPU) GetGeometry

func (g *GPU) GetGeometry() gpu.Geometry

func (*GPU) GetIndex

func (g *GPU) GetIndex() int

func (*GPU) GetModel

func (g *GPU) GetModel() gpu.Model

func (*GPU) GetUsedMigDevices

func (g *GPU) GetUsedMigDevices() map[ProfileName]int

func (*GPU) HasFreeMigDevices

func (g *GPU) HasFreeMigDevices() bool

func (*GPU) InitGeometry

func (g *GPU) InitGeometry() error

InitGeometry applies the initial MIG geometry of the GPU, so that each MIG GPU has at least one MIG device.

The initial geometry is the one with the largest partitioning (e.g. with fewest slices).

It returns an error if the initial geometry cannot be applied due to used devices that would be deleted by the new geometry.

func (*GPU) UpdateGeometryFor

func (g *GPU) UpdateGeometryFor(requiredProfiles map[gpu.Slice]int) bool

UpdateGeometryFor tries to update the geometry of the GPU in order to create the highest possible number of required profiles provided as argument, without deleting any of the used profiles.

The method returns true if the GPU geometry gets updated, false otherwise.

type Node

type Node struct {
	Name string

	GPUs []GPU
	// contains filtered or unexported fields
}

func NewNode

func NewNode(n framework.NodeInfo) (Node, error)

NewNode creates a new MIG Node starting from the node provided as argument.

The function constructs the MIG GPUs of the provided node using both the nos.nebuly.com MIG status annotations and the labels exposed by the NVIDIA gpu-feature-discovery tool. Specifically, the following labels are used: - GPU product ("nvidia.com/gpu.product") - GPU count ("nvidia.com/gpu.count")

If the v1.Node provided as arg does not have the GPU Product label, returned node will not contain any mig.GPU.

func (*Node) AddPod

func (n *Node) AddPod(pod v1.Pod) error

AddPod adds a Pod to the node by updating the free and used MIG devices of the Node GPUs according to the MIG requested required by the Pod.

AddPod returns an error if the node does not have any GPU providing enough free MIG resources for the Pod.

func (*Node) Clone

func (n *Node) Clone() interface{}

func (*Node) Geometry

func (n *Node) Geometry() map[gpu.Slice]int

Geometry returns the overall MIG geometry of the node, which corresponds to the sum of the MIG geometry of all the GPUs present in the Node.

func (*Node) GetName

func (n *Node) GetName() string

func (*Node) HasFreeCapacity

func (n *Node) HasFreeCapacity() bool

HasFreeCapacity returns true if the Node has at least one GPU with free MIG capacity, namely it either has a free MIG device or its allowed MIG geometries allow to create at least one more MIG device.

func (*Node) NodeInfo

func (n *Node) NodeInfo() framework.NodeInfo

func (*Node) UpdateGeometryFor

func (n *Node) UpdateGeometryFor(slices map[gpu.Slice]int) (bool, error)

UpdateGeometryFor tries to update the MIG geometry of each single GPU of the node in order to create the MIG profiles provided as argument.

The method returns true if it updates the MIG geometry of any GPU, false otherwise.

type Profile

type Profile struct {
	GpuIndex int
	Name     ProfileName
}

type ProfileList

type ProfileList []Profile

func (ProfileList) GroupByGPU

func (p ProfileList) GroupByGPU() map[int]ProfileList

type ProfileName

type ProfileName string
const (
	ProfileEmpty ProfileName = ""

	Profile1g6gb  ProfileName = "1g.6gb"
	Profile2g12gb ProfileName = "2g.12gb"
	Profile4g24gb ProfileName = "4g.24gb"

	Profile1g5gb  ProfileName = "1g.5gb"
	Profile2g10gb ProfileName = "2g.10gb"
	Profile3g20gb ProfileName = "3g.20gb"
	Profile4g20gb ProfileName = "4g.20gb"
	Profile7g40gb ProfileName = "7g.40gb"

	Profile1g10gb ProfileName = "1g.10gb"
	Profile2g20gb ProfileName = "2g.20gb"
	Profile3g40gb ProfileName = "3g.40gb"
	Profile4g40gb ProfileName = "4g.40gb"
	Profile7g79gb ProfileName = "7g.79gb"
)

func ExtractProfileName

func ExtractProfileName(resourceName v1.ResourceName) (ProfileName, error)

ExtractProfileName extracts the Name of the MIG profile from the provided resource Name, and returns an error if the resource Name is not a valid NVIDIA MIG resource.

Example:

nvidia.com/mig-1g.10gb => 1g.10gb

func GetMigProfileName

func GetMigProfileName(device gpu.Device) ProfileName

GetMigProfileName returns the Name of the Mig profile associated to the device

Example:

Resource Name: nvidia.com/mig-1g.10gb
GetMigProfileName() -> 1g.10gb

func (ProfileName) AsResourceName

func (p ProfileName) AsResourceName() v1.ResourceName

func (ProfileName) SmallerThan

func (p ProfileName) SmallerThan(other gpu.Slice) bool

func (ProfileName) String

func (p ProfileName) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL