metrics

package
v0.0.0-...-9b598c7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 28, 2023 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// KasFleetManager - metrics prefix
	KasFleetManager = "kas_fleet_manager"

	// ClusterCreateRequestDuration - name of cluster creation duration metric
	ClusterCreateRequestDuration = "worker_cluster_duration"
	// KafkaCreateRequestDuration - name of kafka creation duration metric
	KafkaCreateRequestDuration = "worker_kafka_duration"

	LabelID                = "id"
	LabelStatus            = "status"
	LabelClusterID         = "cluster_id"
	LabelClusterExternalID = "external_id"

	// KafkaOperationsSuccessCount - name of the metric for Kafka-related successful operations
	KafkaOperationsSuccessCount = "kafka_operations_success_count"
	// KafkaOperationsTotalCount - name of the metric for all Kafka-related operations
	KafkaOperationsTotalCount = "kafka_operations_total_count"

	// KafkaRequestsStatus - kafka requests status metric
	KafkaRequestsStatusSinceCreated = "kafka_requests_status_since_created_in_seconds"
	KafkaRequestsStatusCount        = "kafka_requests_status_count"
	KafkaRequestsCurrentStatusInfo  = "kafka_requests_current_status_info"

	// ClusterOperationsSuccessCount - name of the metric for cluster-related successful operations
	ClusterOperationsSuccessCount = "cluster_operations_success_count"
	// ClusterOperationsTotalCount - name of the metric for all cluster-related operations
	ClusterOperationsTotalCount = "cluster_operations_total_count"

	ReconcilerDuration     = "reconciler_duration_in_seconds"
	ReconcilerSuccessCount = "reconciler_success_count"
	ReconcilerFailureCount = "reconciler_failure_count"
	ReconcilerErrorsCount  = "reconciler_errors_count"

	ClusterStatusSinceCreated = "cluster_status_since_created_in_seconds"
	ClusterStatusCount        = "cluster_status_count"

	KafkaPerClusterCount = "kafka_per_cluster_count"

	LeaderWorker = "leader_worker"

	// ObservatoriumRequestCount - metric name for the number of observatorium requests sent
	ObservatoriumRequestCount = "observatorium_request_count"
	// ObservatoriumRequestDuration - metric name for observatorium request duration in seconds
	ObservatoriumRequestDuration = "observatorium_request_duration"

	// DatabaseQueryCount - metric name for the number of database query sent
	DatabaseQueryCount = "database_query_count"
	// DatabaseQueryDuration - metric name for database query duration in milliseconds
	DatabaseQueryDuration = "database_query_duration"

	// ClusterStatusMaxCapacity - metric name for the maximum kafka instance capacity
	ClusterStatusCapacityMax = "cluster_status_capacity_max"

	// ClusterStatusCapacityUsed - metric name for the current number of instances
	ClusterStatusCapacityUsed = "cluster_status_capacity_used"

	// ClusterStatusCapacityAvailable - metric name for the number of available instances
	ClusterStatusCapacityAvailable = "cluster_status_capacity_available"

	// ClusterProviderResourceQuotaConsumedProviderResourceQuotaConsumed - metric name for how much quota, given to a user by a cluster provider, is currently used.
	ClusterProviderResourceQuotaConsumed = "cluster_provider_resource_quota_consumed"

	// ClusterProviderResourceQuotaMaxAllowed - metric name for the maximum allowed resource quota given to a user by a cluster provider (i.e. ocm)
	ClusterProviderResourceQuotaMaxAllowed = "cluster_provider_resource_quota_max_allowed"

	// PrewarmingStatusInfoCount - metric name for the total number of prewarmed instances per cluster_id, status and instance type.
	PrewarmingStatusInfoCount = "prewarmed_kafka_instances"

	LabelStatusCode = "code"
	LabelMethod     = "method"
	LabelPath       = "path"

	LabelDatabaseQueryStatus = "status"
	LabelDatabaseQueryType   = "query"
	LabelRegion              = "region"
	LabelInstanceType        = "instance_type"
	LabelCloudProvider       = "cloud_provider"

	LabelQuotaId         = "quota_id"
	LabelClusterProvider = "cluster_provider"
)

Variables

View Source
var ClusterOperationsCountMetricsLabels = []string{
	labelOperation,
}

ClusterOperationsCountMetricsLabels - is the slice of labels to add to Kafka operations count metrics

View Source
var ClusterProviderResourceQuotaLabels = []string{
	LabelQuotaId,
	LabelClusterProvider,
}
View Source
var ClusterStatusCountMetricsLabels = []string{
	LabelStatus,
}
View Source
var ClusterStatusSinceCreatedMetricsLabels = []string{
	LabelID,
	LabelClusterID,
	LabelStatus,
}
View Source
var JobsMetricsLabels = []string{
	labelJobType,
}

JobsMetricsLabels is the slice of labels to add to job metrics

View Source
var KafkaOperationsCountMetricsLabels = []string{
	labelOperation,
}

KafkaOperationsCountMetricsLabels - is the slice of labels to add to Kafka operations count metrics

View Source
var KafkaPerClusterCountMetricsLabels = []string{
	LabelClusterID,
	LabelClusterExternalID,
}
View Source
var KafkaStatusCountMetric = prometheus.NewGaugeVec(
	prometheus.GaugeOpts{
		Subsystem: KasFleetManager,
		Name:      KafkaRequestsStatusCount,
		Help:      "number of total Kafka instances in each status",
	},
	kafkaStatusCountMetricLabels,
)

create a new GaugeVec for status counts

View Source
var ReconcilerMetricsLabels = []string{
	labelWorkerType,
}

Functions

func IncreaseClusterSuccessOperationsCountMetric

func IncreaseClusterSuccessOperationsCountMetric(operation constants.ClusterOperation)

IncreaseClusterSuccessOperationsCountMetric - increase counter for clusterOperationsSuccessCountMetric

func IncreaseClusterTotalOperationsCountMetric

func IncreaseClusterTotalOperationsCountMetric(operation constants.ClusterOperation)

IncreaseClusterTotalOperationsCountMetric - increase counter for clusterOperationsTotalCountMetric

func IncreaseDatabaseQueryCount

func IncreaseDatabaseQueryCount(status string, queryType string)

Increase the database query count metric with the following labels:

  • status: (i.e. "success" or "failure")
  • queryType: (i.e. "SELECT", "UPDATE", "INSERT", "DELETE")

func IncreaseKafkaSuccessOperationsCountMetric

func IncreaseKafkaSuccessOperationsCountMetric(operation constants.KafkaOperation)

IncreaseKafkaSuccessOperationsCountMetric - increase counter for the kafkaOperationsSuccessCountMetric

func IncreaseKafkaTotalOperationsCountMetric

func IncreaseKafkaTotalOperationsCountMetric(operation constants.KafkaOperation)

IncreaseKafkaTotalOperationsCountMetric - increase counter for the kafkaOperationsTotalCountMetric

func IncreaseObservatoriumRequestCount

func IncreaseObservatoriumRequestCount(code int, path, method string)

Increase the observatorium request count metric with the following labels:

  • code: HTTP Status code (i.e. 200 or 500)
  • path: Request URL path (i.e. /api/v1/query)
  • method: HTTP Method (i.e. GET or POST)

func IncreaseReconcilerErrorsCount

func IncreaseReconcilerErrorsCount(reconcilerType string, numOfErr int)

func IncreaseReconcilerFailureCount

func IncreaseReconcilerFailureCount(reconcilerType string)

func IncreaseReconcilerSuccessCount

func IncreaseReconcilerSuccessCount(reconcilerType string)

func Reset

func Reset()

Reset the metrics we have defined. It is mainly used for testing.

func ResetMetricsForClusterManagers

func ResetMetricsForClusterManagers()

ResetMetricsForClusterManagers will reset the metrics for the ClusterManager background reconciler This is needed because if current process is not the leader anymore, the metrics need to be reset otherwise staled data will be scraped

func ResetMetricsForKafkaManagers

func ResetMetricsForKafkaManagers()

ResetMetricsForKafkaManagers will reset the metrics for the KafkaManager background reconciler This is needed because if current process is not the leader anymore, the metrics need to be reset otherwise staled data will be scraped

func ResetMetricsForObservatorium

func ResetMetricsForObservatorium()

ResetMetricsForObservatorium will reset the metrics related to Observatorium requests This is needed because if current process is not the leader anymore, the metrics need to be reset otherwise staled data will be scraped

func ResetMetricsForReconcilers

func ResetMetricsForReconcilers()

ResetMetricsForReconcilers will reset the metrics related to the reconcilers This is needed because if current process is not the leader anymore, the metrics need to be reset otherwise staled data will be scraped

func SetLeaderWorkerMetric

func SetLeaderWorkerMetric(workerType string, leader bool)

SetLeaderWorkerMetric will set the metric value to 1 if the worker is the leader, and 0 if the worker is not the leader. Then when the metrics is scraped, Prometheus will add additional information like pod name, which then can be used to display which pod is the leader.

func UpdateClusterCreationDurationMetric

func UpdateClusterCreationDurationMetric(jobType JobType, elapsed time.Duration)

UpdateClusterCreationDurationMetric records the duration of a job type

func UpdateClusterPrewarmingStatusInfoCountMetric

func UpdateClusterPrewarmingStatusInfoCountMetric(prewarmingStatusInfo PrewarmingStatusInfo)

UpdateClusterPrewarmingStatusInfoCountMetric - Updates the kas_fleet_manager_prewarmed_kafka_instances metric.

func UpdateClusterProviderResourceQuotaConsumed

func UpdateClusterProviderResourceQuotaConsumed(quotaId, provider string, count int)

UpdateClusterProviderResourceQuotaConsumed - records cluster resource quota currently consumed by a cluster provider account used by the service

func UpdateClusterProviderResourceQuotaMaxAllowedMetric

func UpdateClusterProviderResourceQuotaMaxAllowedMetric(quotaId string, clusterProvider string, maxAllowed int)

UpdateClusterProviderResourceQuotaMaxAllowedMetric - Updates the kas_fleet_manager_cluster_provider_resource_quota_max_allowed metric.

quotaId: id of the resource quota.
clusterProvider: the cluster provider where the resource quota is allocated from.
maxAllowed: the maximum allowed resource quota that can be consumed (the metric value).

func UpdateClusterStatusCapacityAvailableCount

func UpdateClusterStatusCapacityAvailableCount(provider string, region, instanceType, clusterId string, count float64)

UpdateClusterStatusCapacityAvailableCount - sets used capacity per region and instance type

func UpdateClusterStatusCapacityMaxCount

func UpdateClusterStatusCapacityMaxCount(provider string, region, instanceType, clusterId string, count float64)

UpdateClusterStatusCapacityMaxCount - sets maximum capacity per region and instance type

func UpdateClusterStatusCapacityUsedCount

func UpdateClusterStatusCapacityUsedCount(provider string, region, instanceType, clusterId string, count float64)

UpdateClusterStatusCapacityUsedCount - sets used capacity per region and instance type

func UpdateClusterStatusCountMetric

func UpdateClusterStatusCountMetric(status api.ClusterStatus, count int)

func UpdateClusterStatusSinceCreatedMetric

func UpdateClusterStatusSinceCreatedMetric(cluster api.Cluster, status api.ClusterStatus)

func UpdateDatabaseQueryDurationMetric

func UpdateDatabaseQueryDurationMetric(status string, queryType string, elapsed time.Duration)

Update the observatorium request duration metric with the following labels:

  • status: (i.e. "success" or "failure")
  • queryType: (i.e. "SELECT", "UPDATE", "INSERT", "DELETE")

func UpdateKafkaCreationDurationMetric

func UpdateKafkaCreationDurationMetric(jobType JobType, elapsed time.Duration)

UpdateKafkaCreationDurationMetric records the duration of a job type

func UpdateKafkaPerClusterCountMetric

func UpdateKafkaPerClusterCountMetric(clusterId string, clusterExternalID string, count int)

func UpdateKafkaRequestsCurrentStatusInfoMetric

func UpdateKafkaRequestsCurrentStatusInfoMetric(status constants.KafkaStatus, kafkaId string, clusterId string, value float64)

UpdateKafkaRequestsCurrentStatusInfoMetric

func UpdateKafkaRequestsStatusCountMetric

func UpdateKafkaRequestsStatusCountMetric(status constants.KafkaStatus, count int)

UpdateKafkaRequestsStatusCountMetric

func UpdateKafkaRequestsStatusSinceCreatedMetric

func UpdateKafkaRequestsStatusSinceCreatedMetric(status constants.KafkaStatus, kafkaId string, clusterId string, elapsed time.Duration)

UpdateKafkaRequestsStatusSinceCreatedMetric

func UpdateObservatoriumRequestDurationMetric

func UpdateObservatoriumRequestDurationMetric(code int, path, method string, elapsed time.Duration)

Update the observatorium request duration metric with the following labels:

  • code: HTTP Status code (i.e. 200 or 500)
  • path: Request url path (i.e. /api/v1/query)
  • method: HTTP Method (i.e. GET or POST)

func UpdateReconcilerDurationMetric

func UpdateReconcilerDurationMetric(reconcilerType string, elapsed time.Duration)

Types

type JobType

type JobType string

JobType metric to capture

var (
	// JobTypeClusterCreate - cluster_create job type
	JobTypeClusterCreate JobType = "cluster_create"
	// JobTypeKafkaCreate - kafka_create job type
	JobTypeKafkaCreate JobType = "kafka_create"
)

type PrewarmingStatusInfo

type PrewarmingStatusInfo struct {
	ClusterID    string
	InstanceType string
	Status       string
	Count        int
}

PrewarmingStatusInfo is a container of prewarming count information. ClusterID: the cluster_id of the cluster. InstanceType: the instance type of the prewarmed instance. Status: the status of the reserved instance reported by the data plane. Count: the total count of reserved instances in the given status.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL