metrics

package
v1.4.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 5, 2020 License: LGPL-3.0 Imports: 14 Imported by: 0

Documentation

Overview

Package metrics provides general system and process level metrics collection.

Index

Constants

View Source
const (
	TypeCount metricsType = iota // 0
	TypeRate1
	TypeRate5
	TypeRate15
	TypeRateMean
	TypeMean
	TypeMax
	TypeMin
	TypeStdDev
	TypeSum
	TypeVariance
	TypeValue
)
View Source
const (
	CodeHeartbeat = uint32(0x01) // 心跳包msg code

)
View Source
const LevelDBPrefix = "glemo/db/chaindata/"

Variables

View Source
var (
	PackagePrefix = []byte{0x77, 0x88} // package flag
	PackageLength = 4                  // msg长度所占字节的个数
)
View Source
var (
	MetricsEnabledFlagValue = "metrics"
	Enabled                 = false // 是否激活metrics,通过检测到配置文件是否配置了告警server的url来判断是否激活
	AlarmUrl                string  // 告警系统server的url,通过配置文件传进来
)

Enabled is the flag specifying if metrics are enable or not.

View Source
var (
	InvalidTx_meterName      = "txpool/DelInvalidTxs/invalid"
	TxpoolNumber_counterName = "txpool/totalTxNumber"

	VerifyFailedTx_meterName = "tx/VerifyTxBody/verifyFailed"

	HandleBlocksMsg_meterName                 = "network/protocol_manager/handleBlocksMsg"                 // 统计调用handleBlocksMsg的频率
	HandleGetBlocksMsg_meterName              = "network/protocol_manager/handleGetBlocksMsg"              // 统计调用handleGetBlocksMsg的频率
	HandleBlockHashMsg_meterName              = "network/protocol_manager/handleBlockHashMsg"              // 统计调用handleBlockHashMsg的频率
	HandleGetConfirmsMsg_meterName            = "network/protocol_manager/handleGetConfirmsMsg"            // 统计调用handleGetConfirmsMsg的频率
	HandleConfirmMsg_meterName                = "network/protocol_manager/handleConfirmMsg"                // 统计调用handleConfirmMsg的频率
	HandleGetBlocksWithChangeLogMsg_meterName = "network/protocol_manager/handleGetBlocksWithChangeLogMsg" // 统计调用handleGetBlocksWithChangeLogMsg的频率
	HandleDiscoverReqMsg_meterName            = "network/protocol_manager/handleDiscoverReqMsg"            // 统计调用handleDiscoverReqMsg的频率
	HandleDiscoverResMsg_meterName            = "network/protocol_manager/handleDiscoverResMsg"            // 统计调用handleDiscoverResMsg的频率

	LevelDb_get_timerName       = LevelDBPrefix + "user/gets"
	LevelDb_put_timerName       = LevelDBPrefix + "user/puts"
	LevelDb_del_timerName       = LevelDBPrefix + "user/dels"
	LevelDb_miss_meterName      = LevelDBPrefix + "user/misses" // 对数据库进行get操作失败的频率
	LevelDb_read_meterName      = LevelDBPrefix + "user/reads"  // get数据库出来的数据字节大小
	LevelDb_write_meterName     = LevelDBPrefix + "user/writes" // put进数据库的数据字节大小
	LevelDb_compTime_meteName   = LevelDBPrefix + "user/time"
	LevelDb_compRead_meterName  = LevelDBPrefix + "user/input"
	LevelDb_compWrite_meterName = LevelDBPrefix + "user/output"

	BlockInsert_timerName   = "consensus/InsertBlock/insertBlock" // 统计区块插入链中的速率和所用时间的分布情况
	MineBlock_timerName     = "consensus/MineBlock/mineBlock"     // 统计出块速率和时间分布
	VerifyBlock_meterName   = "consensus/dpovp"                   // 校验收到的区块失败的频率
	UnStableBlock_meterName = "consensus/dpovp/saveNewBlock"      // 未稳定块过多
	// 告警条件
	Alarm_BlockInsert float64 = 5 // Insert chain 所用平均时间大于5s
	Alarm_MineBlock   float64 = 8 // Mine Block 所用平均时间大于8s

	PeerConnFailed_meterName  = "p2p/listenLoop/failedHandleConn"
	ReadMsgSuccess_timerName  = "p2p/readLoop/readMsgSuccess"  // 统计成功读取msg的timer
	ReadMsgFailed_timerName   = "p2p/readLoop/readMsgFailed"   // 统计读取msg失败的timer
	WriteMsgSuccess_timerName = "p2p/WriteMsg/writeMsgSuccess" // 统计写msg成功的timer
	WriteMsgFailed_timerName  = "p2p/WriteMsg/writeMsgFailed"  // 统计写msg失败的timer

	System_memory_allocs   = "system/memory/allocs"   // 申请内存的次数
	System__memory_frees   = "system/memory/frees"    // 释放内存的次数
	System_memory_inuse    = "system/memory/inuse"    // 已申请且仍在使用的字节数
	System_memory_pauses   = "system/memory/pauses"   // GC总的暂停时间的循环缓冲
	System_disk_readCount  = "system/disk/readcount"  // 读磁盘操作次数
	System_disk_readData   = "system/disk/readdata"   // 读取的字节总数
	System_disk_writeCount = "system/disk/writecount" // 写磁盘操作次数
	System_disk_writeData  = "system/disk/writedata"  // 写的字节总数
)
View Source
var AlarmRuleTable = map[string]*Condition{

	InvalidTx_meterName: {
		AlarmReason:  "最近的一分钟时间内有大于30笔交易执行失败了",
		MetricsType:  TypeRate1,
		AlarmValue:   0.5,
		AlarmMsgCode: textMsgCode,
	},
	TxpoolNumber_counterName: {
		AlarmReason:  "交易池中的交易大于5000笔了",
		MetricsType:  TypeCount,
		AlarmValue:   5000,
		AlarmMsgCode: textMsgCode,
	},

	HandleBlocksMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点广播过来的blocks消息次数大于60次",
		MetricsType:  TypeRate1,
		AlarmValue:   1,
		AlarmMsgCode: textMsgCode,
	},
	HandleGetBlocksMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点请求拉取block消息次数大于60次",
		MetricsType:  TypeRate1,
		AlarmValue:   1,
		AlarmMsgCode: textMsgCode,
	},
	HandleBlockHashMsg_meterName: {
		AlarmReason:  "最近一分钟时间内普通节点收到广播的稳定块hash的次数大于6000次",
		MetricsType:  TypeRate1,
		AlarmValue:   100,
		AlarmMsgCode: textMsgCode,
	},
	HandleGetConfirmsMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点请求拉取block确认包消息次数大于960次",
		MetricsType:  TypeRate1,
		AlarmValue:   16,
		AlarmMsgCode: textMsgCode,
	},
	HandleConfirmMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点广播过来的区块确认包的次数大于960",
		MetricsType:  TypeRate1,
		AlarmValue:   16,
		AlarmMsgCode: textMsgCode,
	},
	HandleGetBlocksWithChangeLogMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到调用handleGetBlocksWithChangeLogMsg请求的次数大于600次",
		MetricsType:  TypeRate1,
		AlarmValue:   10,
		AlarmMsgCode: textMsgCode,
	},
	HandleDiscoverReqMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到调用handleDiscoverReqMsg的次数大于600次",
		MetricsType:  TypeRate1,
		AlarmValue:   10,
		AlarmMsgCode: textMsgCode,
	},
	HandleDiscoverResMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到调用handleDiscoverReqMsg的次数大于600次",
		MetricsType:  TypeRate1,
		AlarmValue:   10,
		AlarmMsgCode: textMsgCode,
	},

	PeerConnFailed_meterName: {
		AlarmReason:  "最近一分钟时间内节点连接断开的次数大于5次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.083,
		AlarmMsgCode: textMsgCode,
	},
	ReadMsgSuccess_timerName: {
		AlarmReason:  "读取接收节点的Msg所用的平均时间大于6s,有必要升级网络带宽",
		MetricsType:  TypeMean,
		AlarmValue:   6,
		AlarmMsgCode: textMsgCode,
	},
	ReadMsgFailed_timerName: {
		AlarmReason:  "最近一分钟时间内读取接收节点的Msg失败的次数大于5次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.083,
		AlarmMsgCode: textMsgCode,
	},
	WriteMsgSuccess_timerName: {
		AlarmReason:  "发送Msg给其他节点的平均用时超过5s,有必要升级网络带宽",
		MetricsType:  TypeMean,
		AlarmValue:   5,
		AlarmMsgCode: textMsgCode,
	},
	WriteMsgFailed_timerName: {
		AlarmReason:  "最近一分钟时间内发送Msg给其他节点失败的次数超过5次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.083,
		AlarmMsgCode: textMsgCode,
	},

	VerifyFailedTx_meterName: {
		AlarmReason:  "最近一分钟时间内交易验证失败的的次数超过了30笔",
		MetricsType:  TypeRate1,
		AlarmValue:   0.5,
		AlarmMsgCode: textMsgCode,
	},

	BlockInsert_timerName: {
		AlarmReason:  "Insert chain 所用平均时间大于5s",
		MetricsType:  TypeMean,
		AlarmValue:   5,
		AlarmMsgCode: textMsgCode,
	},
	MineBlock_timerName: {
		AlarmReason:  "Mine Block 所用平均时间大于15s",
		MetricsType:  TypeMean,
		AlarmValue:   15,
		AlarmMsgCode: textMsgCode,
	},
	VerifyBlock_meterName: {
		AlarmReason:  "最近一分钟时间内收到2个以上InsertBlock校验不通过的block",
		MetricsType:  TypeRate1,
		AlarmValue:   0.033,
		AlarmMsgCode: textMsgCode,
	},
	UnStableBlock_meterName: {
		AlarmReason:  "未稳定块已经超过了设置的过度期区块总数的十分之九了",
		MetricsType:  TypeRate1,
		AlarmValue:   0.016,
		AlarmMsgCode: textMsgCode,
	},

	LevelDb_miss_meterName: {
		AlarmReason:  "最近一分钟时间内从leveldb中读取数据失败次数大于10次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.16,
		AlarmMsgCode: textMsgCode,
	},
}

告警规则表

Functions

func CollectProcessMetrics

func CollectProcessMetrics(refresh time.Duration)

CollectProcessMetrics periodically collects various metrics about the running process.

func NewAlarmManager

func NewAlarmManager() *alarmManager

func NewCounter

func NewCounter(name string) metrics.Counter

NewCounter create a new metrics Counter, either a real one of a NOP stub depending on the metrics flag.

func NewGauge

func NewGauge(name string) metrics.Gauge

func NewMeter

func NewMeter(name string) metrics.Meter

NewMeter create a new metrics Meter, either a real one of a NOP stub depending on the metrics flag.

func NewTimer

func NewTimer(name string) metrics.Timer

NewTimer create a new metrics Timer, either a real one of a NOP stub depending on the metrics flag.

func PointMetricsLog

func PointMetricsLog()

func ReadDiskStats

func ReadDiskStats(stats *DiskStats) error

ReadDiskStats retrieves the disk IO stats belonging to the current process.

func SprintMetrics

func SprintMetrics(metricsName string, i interface{}) []string

返回出给定name的metrics的[]string

func ToStrings

func ToStrings(str ...string) []string

ToStrings 把多个string拼接成一个[]string

func WriteMetricsData

func WriteMetricsData(r metrics.Registry, refresh time.Duration)

WriteMetricsData 收集统计数据

Types

type Condition

type Condition struct {
	AlarmReason  string      // 告警的理由
	MetricsType  metricsType // 需要告警的度量类型
	AlarmValue   float64     // 触发告警的临界度量值
	TimeStamp    time.Time   // 用于记录上次告警时间
	AlarmMsgCode uint32      // 发送告警消息类型,目前只支持text类型
}

验证触发告警条件

type DiskStats

type DiskStats struct {
	ReadCount  int64 // Number of read operations executed
	ReadBytes  int64 // Total number of bytes read
	WriteCount int64 // Number of write operations executed
	WriteBytes int64 // Total number of byte written
}

DiskStats is the per process disk io stats.

type MetricsMap

type MetricsMap map[string]interface{}

缓存注册的metrics方法

func GetMapMetrics

func GetMapMetrics() MetricsMap

GetMapMetrics 返回所有注册是metrics方法

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL