metrics

package
v0.14.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 23, 2023 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const (
	MetricJobTime        = "pf_metric_job_time"
	MetricQueueInfo      = "pf_metric_queue_info"
	MetricJobGPUInfo     = "pf_metric_job_gpu_info"
	MetricApiDuration    = "pf_metric_api_duration_millisecond"
	MetricRunDuration    = "pf_metric_run_duration_millisecond"
	MetricRunJobDuration = "pf_metric_runJob_duration_millisecond"
)
View Source
const (
	JobIDLabel          = "jobID"
	GpuIdxLabel         = "id"
	StatusLabel         = "status"
	QueueIDLabel        = "queueID"
	FinishedStatusLabel = "finishedStatus"
	QueueNameLabel      = "queueName"
	UserNameLabel       = "userName"
	ResourceLabel       = "resource"
	TypeLabel           = "type"
	BaiduGpuIndexLabel  = "baidu_com_gpu_idx"

	ApiNameLabel       = "apiName"
	RequestMethodLabel = "method"
	ResponseCodeLabel  = "code"
	RunIDLabel         = "runID"
	RunStageLabel      = "runStage"
	RunStepNameLabel   = "runStepName"
	RunStepStageLabel  = "runStepStage"
	RunJobStageLabel   = "runJobStage"
	RequestIDLabel     = "requestID"
	RunJobIDLabel      = "runJobID"
)
View Source
const (
	MinTimePoint = T1
	MinStatus    = StatusDBInserting
	MaxTimePoint = T8
	MaxStatus    = StatusRunning
)
View Source
const (
	MaxNum       = 10000
	Timeout      = time.Hour
	ZeroDuration = time.Duration(0)
)
View Source
const (
	QueueTypeMaxResource    = "maxResource"
	QueueTypeMinResource    = "minResource"
	QueueTypeScalarResource = "scalarResource"
)
View Source
const (
	// 开始创建run的时间
	StageRunStartTime stageTimeType = "run start"

	// run处于終态的时间
	StageRunEndTime stageTimeType = "run end"

	// 开始解析 runyaml 的时间
	StageRunParseStartTime stageTimeType = "run parse start"

	// 完成 runyaml 解析的时间
	StageRunParseEndTime stageTimeType = "run parse end"

	// 对runyaml 以及相关参数进行校验的开始时间
	StageRunValidateStartTime stageTimeType = "run validate start"

	// 完成对runyaml 以及相关参数进行校验的结束时间
	StageRunValidateEndTime stageTimeType = "run validate end"

	// 开始进行Run后处理的时间:即Run检测到处于终态的时间
	StageRunAftertreatmentStartTime stageTimeType = "run aftertreatment start"

	// 开始进行Step的调度时间:即确定Step可以运行的时间点
	StageStepScheduleStartTime stageTimeType = "step schedule start"

	// Job 开始调度的时间,等价于StageStepScheduleStartTime
	// 这里需要再次进行记录是因为循环结构中,不同的job的调度结束时间不一致,为了方便计算每个job的调度时间,所以会在每一个jobTimeRecorder单独记录一次
	// 在StepTimeRecorder中记录该时间点,是因为在Job阶段无法获取到改信息,因此需要在StepTimeRecorder进行记录,然后在jobTimeRecorder进行拷贝操作。
	StageJobScheduleStartTime stageTimeType = "job schedule start"

	// 完成job调度的时间:即在调用Job模块的Create前的时间
	StageJobScheduleEndTime stageTimeType = "job schedule end"

	// 完成Job创建的时间
	StageJobCreateEndTime stageTimeType = "job create end"

	// 开始进行 Job 后处理的时间:也即Job处于終态的时间点
	StageJobAftertreatmentStartTime stageTimeType = "job aftertreatment start"

	// 完成Job 后处理的时间:也即 Job的終态写入数据库的时间
	StageJobAftertreatmentEndTime stageTimeType = "job aftertreatment end"
)
View Source
const (
	// run stage
	StageRunExecuteDuration        = "execution"
	StageRunParseDuration          = "parse"
	StageRunValidateDuration       = "validate"
	StageRunAftertreatmentDuration = "aftertreatment"

	// job stage
	StageRunJobScheduleDuration       = "job schedule"
	StageRunJobCreateDuration         = "job create"
	StageRunJobAftertreatmentDuration = "job aftertreatment"
)
View Source
const (
	PromQLQueryPodAnnotations = "kube_pod_annotations{pod~=\"%s\"}"
	PromQLQueryPodLabels      = "kube_pod_labels{pod~=\"%s\"}"
)
View Source
const (
	DefaultMetricPort = 8231
)
View Source
const (
	QueryTimeout = time.Second * 1
)

Variables

View Source
var (
	RunMetricManger *RunRecorderManager
	Job             TimePointManager
	PromAPIClient   prom_v1.API
)
View Source
var APiDurationSummary = prometheus.NewSummaryVec(
	prometheus.SummaryOpts{
		Name:       MetricApiDuration,
		Help:       toHelp(MetricApiDuration),
		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001, 1: 0},
	},
	[]string{ApiNameLabel, RequestMethodLabel, ResponseCodeLabel})
View Source
var (
	ZeroTime = time.Time{}
)

Functions

func GetAnnotationsFromRuntimeInfo added in v0.14.5

func GetAnnotationsFromRuntimeInfo(info interface{}) map[string]string

GetAnnotationsFromRuntimeInfo get annotations from info map

func GetQueryLabelsFromPrometheus added in v0.14.5

func GetQueryLabelsFromPrometheus(query string) map[string]string

GetQueryLabelsFromPrometheus return query labels from prometheus Deprecated

func InitMetrics

func InitMetrics()

func StartMetricsService

func StartMetricsService(port int, queueFunc ListQueueFunc, jobFunc ListJobFunc) string

Types

type Info

type Info map[string]string

type JobMetricCollector

type JobMetricCollector struct {
	// contains filtered or unexported fields
}

func NewJobMetricsCollector

func NewJobMetricsCollector(manager TimePointManager, listJob ListJobFunc) *JobMetricCollector

func (*JobMetricCollector) Collect

func (j *JobMetricCollector) Collect(metrics chan<- prometheus.Metric)

func (*JobMetricCollector) Describe

func (j *JobMetricCollector) Describe(descs chan<- *prometheus.Desc)

type JobStageTimeRecorder added in v0.14.6

type JobStageTimeRecorder struct {
	*StageTimeRecorder
	JobID    string
	StepName string
	RunID    string
	Status   schema.JobStatus
}

func NewJobStageTimeRecorder added in v0.14.6

func NewJobStageTimeRecorder(jobID string, stepName string, runID string) *JobStageTimeRecorder

type JobStatus

type JobStatus int
const (
	StatusUnknown JobStatus = iota
	StatusDBInserting
	StatusEnqueue
	StatusDequeue
	StatusSubmitting
	StatusPending
	StatusCreating
	StatusRunning
)

func (JobStatus) String

func (j JobStatus) String() string

func (JobStatus) TimePoint

func (j JobStatus) TimePoint() (start TimePoint, end TimePoint)

type JobTimePoint

type JobTimePoint int
const (
	// T1 api query time
	T1 JobTimePoint = iota
	// T2 db insert time
	T2
	// T3 enqueue time
	T3
	// T4 dequeue time
	T4
	// T5 submit time
	T5
	// T6 scheduled time
	// TODO: T6 is not supported yet
	T6
	// T7 run time
	T7
	// T8 finish(success/fail) time
	T8
)

func (JobTimePoint) Index

func (t JobTimePoint) Index() int

func (JobTimePoint) Status

func (t JobTimePoint) Status() Status

type ListJobFunc added in v0.14.5

type ListJobFunc func() []model.Job

type ListQueueFunc

type ListQueueFunc func() []model.Queue

type MetricRunCollector added in v0.14.6

type MetricRunCollector struct {
	// contains filtered or unexported fields
}

func NewMetricRunCollector added in v0.14.6

func NewMetricRunCollector() *MetricRunCollector

func (*MetricRunCollector) Collect added in v0.14.6

func (rm *MetricRunCollector) Collect(ch chan<- prometheus.Metric)

func (*MetricRunCollector) Describe added in v0.14.6

func (rm *MetricRunCollector) Describe(descs chan<- *prometheus.Desc)

type QueueMetricCollector

type QueueMetricCollector struct {
	// contains filtered or unexported fields
}

func NewQueueMetricsCollector

func NewQueueMetricsCollector(queueFunc ListQueueFunc) *QueueMetricCollector

func (*QueueMetricCollector) Collect

func (q *QueueMetricCollector) Collect(metrics chan<- prometheus.Metric)

func (*QueueMetricCollector) Describe

func (q *QueueMetricCollector) Describe(descs chan<- *prometheus.Desc)

type RunRecorderManager added in v0.14.6

type RunRecorderManager struct {
	Cache gcache.Cache
}

func NewRunRecorderManager added in v0.14.6

func NewRunRecorderManager() *RunRecorderManager

func (*RunRecorderManager) AddJobStageTimeRecord added in v0.14.6

func (m *RunRecorderManager) AddJobStageTimeRecord(runID, stepName, jobID string, status schema.JobStatus, stage stageTimeType, timestamp time.Time)

func (*RunRecorderManager) AddRunStageTimeRecord added in v0.14.6

func (m *RunRecorderManager) AddRunStageTimeRecord(runID, requestID, status string, stage stageTimeType, timestamp time.Time)

func (*RunRecorderManager) AddStepStageTimeRecord added in v0.14.6

func (m *RunRecorderManager) AddStepStageTimeRecord(runID, stepName string, stage stageTimeType, timestamp time.Time)

type RunStageTimeRecorder added in v0.14.6

type RunStageTimeRecorder struct {
	*StageTimeRecorder
	RunID      string
	RequestID  string
	StepStages sync.Map
	Status     string
}

func NewRunStageTimeRecorder added in v0.14.6

func NewRunStageTimeRecorder(runID, reqID string) *RunStageTimeRecorder

type StageTimeRecorder added in v0.14.6

type StageTimeRecorder struct {
	// 用于进行日志标识
	LoggerMeta string

	StageTime sync.Map
	Support   []stageTimeType
}

func NewStageTimeRecorder added in v0.14.6

func NewStageTimeRecorder(suppport []stageTimeType, loggerMeta string) *StageTimeRecorder

type Status

type Status interface {
	String() string
	TimePoint() (start TimePoint, end TimePoint)
}

type StepStageTimeRecorder added in v0.14.6

type StepStageTimeRecorder struct {
	*StageTimeRecorder
	StepName  string
	RunID     string
	JobStages sync.Map
}

func NewStepStageTimeRecorder added in v0.14.6

func NewStepStageTimeRecorder(stepName string, runID string) *StepStageTimeRecorder

type TimePoint

type TimePoint interface {
	Status() Status
	Index() int
}

type TimePointManager

type TimePointManager interface {
	AddTimestamp(key string, timePoint TimePoint, timestamp time.Time, extraInfos ...Info)
	GetStatusTime(key string, status Status) (time.Duration, bool)
	GetTimestamp(key string, timePoint TimePoint) (time.Time, bool)
	GetTimestampsCache() map[string]Timestamps
	GetStatusCount(status Status) int64
	GetInfo(key string) (Info, bool)
}

func NewJobMetricTimePointManager

func NewJobMetricTimePointManager() TimePointManager

NewJobMetricTimePointManager Implementation of default job metric manager

type Timestamps

type Timestamps interface {
	GetStatusTime(status Status) (time.Duration, bool)
	GetTimestamp(timePoint TimePoint) (time.Time, bool)
	AddTimestamp(timePoint TimePoint, timestamp time.Time)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL