metrics

package
v1.4.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 26, 2024 License: Apache-2.0 Imports: 8 Imported by: 1

Documentation

Index

Constants

View Source
const (
	CoreSchedCookieKey = "core_sched_cookie"
	CoreSchedGroupKey  = "core_sched_group"
)
View Source
const (
	CPIField = "cpi_field"

	Cycles       = "cycles"
	Instructions = "instructions"
)
View Source
const (
	KoordletSubsystem = "koordlet"

	NodeKey     = "node"
	PriorityKey = "priority"

	PredictorKey = "predictor"

	StatusKey     = "status"
	StatusSucceed = "succeeded"
	StatusFailed  = "failed"

	EvictionReasonKey = "reason"
	BESuppressTypeKey = "type"

	ContainerID   = "container_id"
	ContainerName = "container_name"

	PodUID       = "pod_uid"
	PodName      = "pod_name"
	PodNamespace = "pod_namespace"

	ResourceKey = "resource"

	UnitKey     = "unit"
	UnitCore    = "core"
	UnitByte    = "byte"
	UnitInteger = "integer"
)
View Source
const (
	PSIDegree       = "psi_degree"
	PSIPrecision    = "psi_precision"
	PSIResourceType = "psi_resource_type"

	CPUFullSupported = "cpu_full_supported"
)
View Source
const (
	ResourceTypeCPU = "cpu"
	ResourceTypeMem = "mem"
	ResourceTypeIO  = "io"

	Precision10  = "avg10"
	Precision60  = "avg60"
	Precision300 = "avg300"

	DegreeSome = "some"
	DegreeFull = "full"
)
View Source
const (
	DefaultHTTPPath = "/metrics"
)
View Source
const (
	ExternalHTTPPath = "/external-metrics"
)
View Source
const (
	InternalHTTPPath = "/internal-metrics"
)

Variables

View Source
var (
	KoordletStartTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "start_time",
		Help:      "the start time of koordlet",
	}, []string{NodeKey})

	CollectNodeCPUInfoStatus = prometheus.NewCounterVec(prometheus.CounterOpts{
		Subsystem: KoordletSubsystem,
		Name:      "collect_node_cpu_info_status",
		Help:      "the count of CollectNodeCPUInfo status",
	}, []string{NodeKey, StatusKey})

	CollectNodeNUMAInfoStatus = prometheus.NewCounterVec(prometheus.CounterOpts{
		Subsystem: KoordletSubsystem,
		Name:      "collect_node_numa_info_status",
		Help:      "the count of CollectNodeNUMAInfo status",
	}, []string{NodeKey, StatusKey})

	CollectNodeLocalStorageInfoStatus = prometheus.NewCounterVec(prometheus.CounterOpts{
		Subsystem: KoordletSubsystem,
		Name:      "collect_node_local_storage_info_status",
		Help:      "the count of CollectNodeLocalStorageInfo status",
	}, []string{NodeKey, StatusKey})

	PodEviction = prometheus.NewCounterVec(prometheus.CounterOpts{
		Subsystem: KoordletSubsystem,
		Name:      "pod_eviction",
		Help:      "Number of eviction launched by koordlet",
	}, []string{NodeKey, EvictionReasonKey})

	PodEvictionDetail = metrics.NewGCCounterVec("pod_eviction_detail", prometheus.NewCounterVec(prometheus.CounterOpts{
		Subsystem: KoordletSubsystem,
		Name:      "pod_eviction_detail",
		Help:      "evict detail launched by koordlet",
	}, []string{NodeKey, PodNamespace, PodName, EvictionReasonKey}))

	NodeUsedCPU = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "node_used_cpu_cores",
		Help:      "Number of cpu cores used by node in realtime",
	}, []string{NodeKey})

	CommonCollectors = []prometheus.Collector{
		KoordletStartTime,
		CollectNodeCPUInfoStatus,
		CollectNodeNUMAInfoStatus,
		CollectNodeLocalStorageInfoStatus,
		PodEviction,
		PodEvictionDetail.GetCounterVec(),
		NodeUsedCPU,
	}
)
View Source
var (
	ContainerCoreSchedCookie = metrics.NewGCGaugeVec("container_core_sched_cookie", prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "container_core_sched_cookie",
		Help:      "the core scheduling cookie of the container",
	}, []string{NodeKey, PodName, PodNamespace, PodUID, ContainerName, ContainerID, CoreSchedGroupKey, CoreSchedCookieKey}))

	CoreSchedCookieManageStatus = metrics.NewGCCounterVec("core_sched_cookie_manage_status", prometheus.NewCounterVec(prometheus.CounterOpts{
		Subsystem: KoordletSubsystem,
		Name:      "core_sched_cookie_manage_status",
		Help:      "the manage status of the core scheduling cookie",
	}, []string{NodeKey, CoreSchedGroupKey, StatusKey}))

	CoreSchedCollector = []prometheus.Collector{
		ContainerCoreSchedCookie.GetGaugeVec(),
		CoreSchedCookieManageStatus.GetCounterVec(),
	}
)
View Source
var (
	ContainerCPI = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "container_cpi",
		Help:      "Container cpi collected by koordlet",
	}, []string{NodeKey, ContainerID, ContainerName, PodUID, PodName, PodNamespace, CPIField})

	CPICollectors = []prometheus.Collector{
		ContainerCPI,
	}
)
View Source
var (
	ContainerScaledCFSBurstUS = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "container_scaled_cfs_burst_us",
		Help:      "The maximum accumulated run-time(in microseconds) in container-level set by koordlet",
	}, []string{NodeKey, PodNamespace, PodName, ContainerID, ContainerName})

	ContainerScaledCFSQuotaUS = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "container_scaled_cfs_quota_us",
		Help:      "Run-time replenished within a period (in microseconds) in container-level set by koordlet",
	}, []string{NodeKey, PodNamespace, PodName, ContainerID, ContainerName})

	CPUBurstCollector = []prometheus.Collector{
		ContainerScaledCFSBurstUS,
		ContainerScaledCFSQuotaUS,
	}
)
View Source
var (
	BESuppressCPU = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "be_suppress_cpu_cores",
		Help:      "Number of cores suppress by koordlet",
	}, []string{NodeKey, BESuppressTypeKey})

	BESuppressLSUsedCPU = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "be_suppress_ls_used_cpu_cores",
		Help:      "Number of cpu cores used by LS. We consider non-BE pods and podMeta-missing pods as LS.",
	}, []string{NodeKey})

	CPUSuppressCollector = []prometheus.Collector{
		BESuppressCPU,
		BESuppressLSUsedCPU,
	}
)
View Source
var (
	NodeName string
	Node     *corev1.Node
)
View Source
var (
	NodePredictedResourceReclaimable = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "node_predicted_resource_reclaimable",
		Help:      "the node reclaimable resources predicted by koordinator",
	}, []string{NodeKey, PredictorKey, ResourceKey, UnitKey})

	PredictionCollectors = []prometheus.Collector{
		NodePredictedResourceReclaimable,
	}
)
View Source
var (
	ContainerPSI = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "container_psi",
		Help:      "Container psi collected by koordlet",
	}, []string{NodeKey, ContainerID, ContainerName, PodUID, PodName, PodNamespace, PSIResourceType, PSIPrecision, PSIDegree, CPUFullSupported})

	PodPSI = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "pod_psi",
		Help:      "Pod psi collected by koordlet",
	}, []string{NodeKey, PodUID, PodName, PodNamespace, PSIResourceType, PSIPrecision, PSIDegree, CPUFullSupported})

	PSICollectors = []prometheus.Collector{
		ContainerPSI,
		PodPSI,
	}
)
View Source
var (
	NodeResourceAllocatable = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "node_resource_allocatable",
		Help:      "the node allocatable of resources updated by koordinator",
	}, []string{NodeKey, ResourceKey, UnitKey})

	NodeResourcePriorityReclaimable = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "node_priority_resource_reclaimable",
		Help:      "the node reclaimable of different priorities resources updated by koordinator",
	}, []string{NodeKey, PriorityKey, ResourceKey, UnitKey})

	ContainerResourceRequests = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "container_resource_requests",
		Help:      "the container requests of resources updated by koordinator",
	}, []string{NodeKey, ResourceKey, UnitKey, PodUID, PodName, PodNamespace, ContainerID, ContainerName})

	ContainerResourceLimits = prometheus.NewGaugeVec(prometheus.GaugeOpts{
		Subsystem: KoordletSubsystem,
		Name:      "container_resource_limits",
		Help:      "the container limits of resources updated by koordinator",
	}, []string{NodeKey, ResourceKey, UnitKey, PodUID, PodName, PodNamespace, ContainerID, ContainerName})

	ResourceSummaryCollectors = []prometheus.Collector{
		NodeResourceAllocatable,
		NodeResourcePriorityReclaimable,
		ContainerResourceRequests,
		ContainerResourceLimits,
	}
)
View Source
var (
	// ExternalRegistry	register metrics for users such as PMU or extended resources settings
	ExternalRegistry = prometheus.NewRegistry()
)
View Source
var (
	// InternalRegistry only register metrics of koordlet itself for performance and functional monitor
	// TODO consider using k8s.io/component-base/metrics to replace github.com/prometheus/client_golang/prometheus
	InternalRegistry = legacyregistry.DefaultGatherer
)

Functions

func ExternalMustRegister added in v1.4.1

func ExternalMustRegister(metrics ...prometheus.Collector)

func RecordBESuppressCores

func RecordBESuppressCores(suppressType string, value float64)

func RecordBESuppressLSUsedCPU added in v1.1.1

func RecordBESuppressLSUsedCPU(value float64)

func RecordCollectNodeCPUInfoStatus

func RecordCollectNodeCPUInfoStatus(err error)

func RecordCollectNodeLocalStorageInfoStatus added in v1.3.0

func RecordCollectNodeLocalStorageInfoStatus(err error)

func RecordCollectNodeNUMAInfoStatus added in v1.3.0

func RecordCollectNodeNUMAInfoStatus(err error)

func RecordContainerCPI added in v1.1.0

func RecordContainerCPI(status *corev1.ContainerStatus, pod *corev1.Pod, cycles, instructions float64)

func RecordContainerCoreSchedCookie added in v1.4.0

func RecordContainerCoreSchedCookie(namespace, podName, podUID, containerName, containerID, groupID string, cookieID uint64)

func RecordContainerPSI added in v1.1.0

func RecordContainerPSI(status *corev1.ContainerStatus, pod *corev1.Pod, psi *resourceexecutor.PSIByResource)

func RecordContainerResourceLimits added in v1.1.1

func RecordContainerResourceLimits(resourceName string, unit string, status *corev1.ContainerStatus, pod *corev1.Pod, value float64)

func RecordContainerResourceRequests added in v1.1.1

func RecordContainerResourceRequests(resourceName string, unit string, status *corev1.ContainerStatus, pod *corev1.Pod, value float64)

func RecordContainerScaledCFSBurstUS added in v1.1.1

func RecordContainerScaledCFSBurstUS(podNS, podName, containerID, containerName string, value float64)

func RecordContainerScaledCFSQuotaUS added in v1.1.1

func RecordContainerScaledCFSQuotaUS(podNS, podName, containerID, containerName string, value float64)

func RecordCoreSchedCookieManageStatus added in v1.4.0

func RecordCoreSchedCookieManageStatus(groupID string, isSucceeded bool)

func RecordKoordletStartTime

func RecordKoordletStartTime(nodeName string, value float64)

func RecordNodePredictedResourceReclaimable added in v1.3.0

func RecordNodePredictedResourceReclaimable(resourceName string, unit string, predictor string, value float64)

func RecordNodeResourceAllocatable added in v1.1.1

func RecordNodeResourceAllocatable(resourceName string, unit string, value float64)

func RecordNodeResourcePriorityReclaimable added in v1.3.0

func RecordNodeResourcePriorityReclaimable(resourceName string, unit string, priority string, value float64)

func RecordNodeUsedCPU added in v1.1.1

func RecordNodeUsedCPU(value float64)

func RecordPodEviction

func RecordPodEviction(namespace, podName, reasonType string)

func RecordPodPSI added in v1.1.0

func RecordPodPSI(pod *corev1.Pod, psi *resourceexecutor.PSIByResource)

func Register

func Register(node *corev1.Node)

Register registers the metrics with the node object

func ResetCPUBurstCollector added in v1.1.1

func ResetCPUBurstCollector()

func ResetContainerCPI added in v1.1.0

func ResetContainerCPI()

func ResetContainerCoreSchedCookie added in v1.4.0

func ResetContainerCoreSchedCookie(namespace, podName, podUID, containerName, containerID, groupID string, cookieID uint64)

func ResetContainerPSI added in v1.1.0

func ResetContainerPSI()

func ResetContainerResourceLimits added in v1.1.1

func ResetContainerResourceLimits()

func ResetContainerResourceRequests added in v1.1.1

func ResetContainerResourceRequests()

func ResetPodPSI added in v1.1.0

func ResetPodPSI()

Types

type PSIRecord added in v1.1.0

type PSIRecord struct {
	ResourceType     string
	Precision        string
	Degree           string
	Value            float64
	CPUFullSupported bool
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL