metrics

package

v1.4.3 Latest Latest Go to latest Published: Oct 5, 2020 License: LGPL-3.0 Imports: 14 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/LemoFoundationLtd/lemochain-core

Links

Open Source Insights

Documentation ¶

Rendered for

Overview ¶

Package metrics provides general system and process level metrics collection.

Index ¶

Constants
Variables
func CollectProcessMetrics(refresh time.Duration)
func NewAlarmManager() *alarmManager
func NewCounter(name string) metrics.Counter
func NewGauge(name string) metrics.Gauge
func NewMeter(name string) metrics.Meter
func NewTimer(name string) metrics.Timer
func PointMetricsLog()
func ReadDiskStats(stats *DiskStats) error
func SprintMetrics(metricsName string, i interface{}) []string
func ToStrings(str ...string) []string
func WriteMetricsData(r metrics.Registry, refresh time.Duration)
type Condition
type DiskStats
type MetricsMap
- func GetMapMetrics() MetricsMap

Constants ¶

View Source

const (
	TypeCount metricsType = iota // 0
	TypeRate1
	TypeRate5
	TypeRate15
	TypeRateMean
	TypeMean
	TypeMax
	TypeMin
	TypeStdDev
	TypeSum
	TypeVariance
	TypeValue
)

View Source

const (
	CodeHeartbeat = uint32(0x01) // 心跳包msg code

)

View Source

const LevelDBPrefix = "glemo/db/chaindata/"

Variables ¶

View Source

var (
	PackagePrefix = []byte{0x77, 0x88} // package flag
	PackageLength = 4                  // msg长度所占字节的个数
)

View Source

var (
	MetricsEnabledFlagValue = "metrics"
	Enabled                 = false // 是否激活metrics,通过检测到配置文件是否配置了告警server的url来判断是否激活
	AlarmUrl                string  // 告警系统server的url,通过配置文件传进来
)

Enabled is the flag specifying if metrics are enable or not.

View Source

var (
	InvalidTx_meterName      = "txpool/DelInvalidTxs/invalid"
	TxpoolNumber_counterName = "txpool/totalTxNumber"

	VerifyFailedTx_meterName = "tx/VerifyTxBody/verifyFailed"

	HandleBlocksMsg_meterName                 = "network/protocol_manager/handleBlocksMsg"                 // 统计调用handleBlocksMsg的频率
	HandleGetBlocksMsg_meterName              = "network/protocol_manager/handleGetBlocksMsg"              // 统计调用handleGetBlocksMsg的频率
	HandleBlockHashMsg_meterName              = "network/protocol_manager/handleBlockHashMsg"              // 统计调用handleBlockHashMsg的频率
	HandleGetConfirmsMsg_meterName            = "network/protocol_manager/handleGetConfirmsMsg"            // 统计调用handleGetConfirmsMsg的频率
	HandleConfirmMsg_meterName                = "network/protocol_manager/handleConfirmMsg"                // 统计调用handleConfirmMsg的频率
	HandleGetBlocksWithChangeLogMsg_meterName = "network/protocol_manager/handleGetBlocksWithChangeLogMsg" // 统计调用handleGetBlocksWithChangeLogMsg的频率
	HandleDiscoverReqMsg_meterName            = "network/protocol_manager/handleDiscoverReqMsg"            // 统计调用handleDiscoverReqMsg的频率
	HandleDiscoverResMsg_meterName            = "network/protocol_manager/handleDiscoverResMsg"            // 统计调用handleDiscoverResMsg的频率

	LevelDb_get_timerName       = LevelDBPrefix + "user/gets"
	LevelDb_put_timerName       = LevelDBPrefix + "user/puts"
	LevelDb_del_timerName       = LevelDBPrefix + "user/dels"
	LevelDb_miss_meterName      = LevelDBPrefix + "user/misses" // 对数据库进行get操作失败的频率
	LevelDb_read_meterName      = LevelDBPrefix + "user/reads"  // get数据库出来的数据字节大小
	LevelDb_write_meterName     = LevelDBPrefix + "user/writes" // put进数据库的数据字节大小
	LevelDb_compTime_meteName   = LevelDBPrefix + "user/time"
	LevelDb_compRead_meterName  = LevelDBPrefix + "user/input"
	LevelDb_compWrite_meterName = LevelDBPrefix + "user/output"

	BlockInsert_timerName   = "consensus/InsertBlock/insertBlock" // 统计区块插入链中的速率和所用时间的分布情况
	MineBlock_timerName     = "consensus/MineBlock/mineBlock"     // 统计出块速率和时间分布
	VerifyBlock_meterName   = "consensus/dpovp"                   // 校验收到的区块失败的频率
	UnStableBlock_meterName = "consensus/dpovp/saveNewBlock"      // 未稳定块过多
	// 告警条件
	Alarm_BlockInsert float64 = 5 // Insert chain 所用平均时间大于5s
	Alarm_MineBlock   float64 = 8 // Mine Block 所用平均时间大于8s

	PeerConnFailed_meterName  = "p2p/listenLoop/failedHandleConn"
	ReadMsgSuccess_timerName  = "p2p/readLoop/readMsgSuccess"  // 统计成功读取msg的timer
	ReadMsgFailed_timerName   = "p2p/readLoop/readMsgFailed"   // 统计读取msg失败的timer
	WriteMsgSuccess_timerName = "p2p/WriteMsg/writeMsgSuccess" // 统计写msg成功的timer
	WriteMsgFailed_timerName  = "p2p/WriteMsg/writeMsgFailed"  // 统计写msg失败的timer

	System_memory_allocs   = "system/memory/allocs"   // 申请内存的次数
	System__memory_frees   = "system/memory/frees"    // 释放内存的次数
	System_memory_inuse    = "system/memory/inuse"    // 已申请且仍在使用的字节数
	System_memory_pauses   = "system/memory/pauses"   // GC总的暂停时间的循环缓冲
	System_disk_readCount  = "system/disk/readcount"  // 读磁盘操作次数
	System_disk_readData   = "system/disk/readdata"   // 读取的字节总数
	System_disk_writeCount = "system/disk/writecount" // 写磁盘操作次数
	System_disk_writeData  = "system/disk/writedata"  // 写的字节总数
)

View Source

var AlarmRuleTable = map[string]*Condition{

	InvalidTx_meterName: {
		AlarmReason:  "最近的一分钟时间内有大于30笔交易执行失败了",
		MetricsType:  TypeRate1,
		AlarmValue:   0.5,
		AlarmMsgCode: textMsgCode,
	},
	TxpoolNumber_counterName: {
		AlarmReason:  "交易池中的交易大于5000笔了",
		MetricsType:  TypeCount,
		AlarmValue:   5000,
		AlarmMsgCode: textMsgCode,
	},

	HandleBlocksMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点广播过来的blocks消息次数大于60次",
		MetricsType:  TypeRate1,
		AlarmValue:   1,
		AlarmMsgCode: textMsgCode,
	},
	HandleGetBlocksMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点请求拉取block消息次数大于60次",
		MetricsType:  TypeRate1,
		AlarmValue:   1,
		AlarmMsgCode: textMsgCode,
	},
	HandleBlockHashMsg_meterName: {
		AlarmReason:  "最近一分钟时间内普通节点收到广播的稳定块hash的次数大于6000次",
		MetricsType:  TypeRate1,
		AlarmValue:   100,
		AlarmMsgCode: textMsgCode,
	},
	HandleGetConfirmsMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点请求拉取block确认包消息次数大于960次",
		MetricsType:  TypeRate1,
		AlarmValue:   16,
		AlarmMsgCode: textMsgCode,
	},
	HandleConfirmMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到其他节点广播过来的区块确认包的次数大于960",
		MetricsType:  TypeRate1,
		AlarmValue:   16,
		AlarmMsgCode: textMsgCode,
	},
	HandleGetBlocksWithChangeLogMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到调用handleGetBlocksWithChangeLogMsg请求的次数大于600次",
		MetricsType:  TypeRate1,
		AlarmValue:   10,
		AlarmMsgCode: textMsgCode,
	},
	HandleDiscoverReqMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到调用handleDiscoverReqMsg的次数大于600次",
		MetricsType:  TypeRate1,
		AlarmValue:   10,
		AlarmMsgCode: textMsgCode,
	},
	HandleDiscoverResMsg_meterName: {
		AlarmReason:  "最近一分钟时间内收到调用handleDiscoverReqMsg的次数大于600次",
		MetricsType:  TypeRate1,
		AlarmValue:   10,
		AlarmMsgCode: textMsgCode,
	},

	PeerConnFailed_meterName: {
		AlarmReason:  "最近一分钟时间内节点连接断开的次数大于5次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.083,
		AlarmMsgCode: textMsgCode,
	},
	ReadMsgSuccess_timerName: {
		AlarmReason:  "读取接收节点的Msg所用的平均时间大于6s，有必要升级网络带宽",
		MetricsType:  TypeMean,
		AlarmValue:   6,
		AlarmMsgCode: textMsgCode,
	},
	ReadMsgFailed_timerName: {
		AlarmReason:  "最近一分钟时间内读取接收节点的Msg失败的次数大于5次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.083,
		AlarmMsgCode: textMsgCode,
	},
	WriteMsgSuccess_timerName: {
		AlarmReason:  "发送Msg给其他节点的平均用时超过5s，有必要升级网络带宽",
		MetricsType:  TypeMean,
		AlarmValue:   5,
		AlarmMsgCode: textMsgCode,
	},
	WriteMsgFailed_timerName: {
		AlarmReason:  "最近一分钟时间内发送Msg给其他节点失败的次数超过5次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.083,
		AlarmMsgCode: textMsgCode,
	},

	VerifyFailedTx_meterName: {
		AlarmReason:  "最近一分钟时间内交易验证失败的的次数超过了30笔",
		MetricsType:  TypeRate1,
		AlarmValue:   0.5,
		AlarmMsgCode: textMsgCode,
	},

	BlockInsert_timerName: {
		AlarmReason:  "Insert chain 所用平均时间大于5s",
		MetricsType:  TypeMean,
		AlarmValue:   5,
		AlarmMsgCode: textMsgCode,
	},
	MineBlock_timerName: {
		AlarmReason:  "Mine Block 所用平均时间大于15s",
		MetricsType:  TypeMean,
		AlarmValue:   15,
		AlarmMsgCode: textMsgCode,
	},
	VerifyBlock_meterName: {
		AlarmReason:  "最近一分钟时间内收到2个以上InsertBlock校验不通过的block",
		MetricsType:  TypeRate1,
		AlarmValue:   0.033,
		AlarmMsgCode: textMsgCode,
	},
	UnStableBlock_meterName: {
		AlarmReason:  "未稳定块已经超过了设置的过度期区块总数的十分之九了",
		MetricsType:  TypeRate1,
		AlarmValue:   0.016,
		AlarmMsgCode: textMsgCode,
	},

	LevelDb_miss_meterName: {
		AlarmReason:  "最近一分钟时间内从leveldb中读取数据失败次数大于10次",
		MetricsType:  TypeRate1,
		AlarmValue:   0.16,
		AlarmMsgCode: textMsgCode,
	},
}

告警规则表

Functions ¶

func CollectProcessMetrics ¶

func CollectProcessMetrics(refresh time.Duration)

CollectProcessMetrics periodically collects various metrics about the running process.

func NewAlarmManager ¶

func NewAlarmManager() *alarmManager

func NewCounter ¶

func NewCounter(name string) metrics.Counter

NewCounter create a new metrics Counter, either a real one of a NOP stub depending on the metrics flag.

func NewGauge ¶

func NewGauge(name string) metrics.Gauge

func NewMeter ¶

func NewMeter(name string) metrics.Meter

NewMeter create a new metrics Meter, either a real one of a NOP stub depending on the metrics flag.

func NewTimer ¶

func NewTimer(name string) metrics.Timer

NewTimer create a new metrics Timer, either a real one of a NOP stub depending on the metrics flag.

func PointMetricsLog ¶

func PointMetricsLog()

func ReadDiskStats ¶

func ReadDiskStats(stats *DiskStats) error

ReadDiskStats retrieves the disk IO stats belonging to the current process.

func SprintMetrics ¶

func SprintMetrics(metricsName string, i interface{}) []string

返回出给定name的metrics的[]string

func ToStrings ¶

func ToStrings(str ...string) []string

ToStrings 把多个string拼接成一个[]string

func WriteMetricsData ¶

func WriteMetricsData(r metrics.Registry, refresh time.Duration)

WriteMetricsData 收集统计数据

Types ¶

type Condition ¶

type Condition struct {
	AlarmReason  string      // 告警的理由
	MetricsType  metricsType // 需要告警的度量类型
	AlarmValue   float64     // 触发告警的临界度量值
	TimeStamp    time.Time   // 用于记录上次告警时间
	AlarmMsgCode uint32      // 发送告警消息类型,目前只支持text类型
}

验证触发告警条件

type DiskStats ¶

type DiskStats struct {
	ReadCount  int64 // Number of read operations executed
	ReadBytes  int64 // Total number of bytes read
	WriteCount int64 // Number of write operations executed
	WriteBytes int64 // Total number of byte written
}

DiskStats is the per process disk io stats.

type MetricsMap ¶

type MetricsMap map[string]interface{}

缓存注册的metrics方法

func GetMapMetrics ¶

func GetMapMetrics() MetricsMap

GetMapMetrics 返回所有注册是metrics方法

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL