checks

package
v0.0.0-...-3bfe646 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 28, 2023 License: Apache-2.0 Imports: 32 Imported by: 7

Documentation

Index

Constants

View Source
const (
	// EtcdMinWriteIOPSSoft defines the soft threshold for a minimum number of
	// sequential write iops required for etcd to perform effectively.
	//
	// The number is recommended by etcd documentation:
	// https://github.com/etcd-io/etcd/blob/master/Documentation/op-guide/hardware.md#disks
	//
	// The soft threshold will generate a warning.
	EtcdMinWriteIOPSSoft = 50
	// EtcdMinWriteIOPSHard is the lowest number of IOPS Gravity will tolerate
	// before generating a critical probe failure.
	EtcdMinWriteIOPSHard = 10

	// EtcdMaxFsyncLatencyMsSoft defines the soft threshold for a maximum fsync
	// latency required for etcd to perform effectively, in milliseconds.
	//
	// Etcd documentation recommends 10ms for optimal performance but we're
	// being conservative here to ensure better dev/test experience:
	// https://github.com/etcd-io/etcd/blob/master/Documentation/faq.md#what-does-the-etcd-warning-failed-to-send-out-heartbeat-on-time-mean
	//
	// The soft threshold will generate a warning.
	EtcdMaxFsyncLatencyMsSoft = 50
	// EtcdMaxFsyncLatencyMsHard is the highest fsync latency Gravity prechecks
	// will tolerate before generating a critical probe failure.
	EtcdMaxFsyncLatencyMsHard = 150
)
View Source
const (
	// EtcdMinIOPSSoftEnvVar is the environment variable with soft IOPS limit.
	EtcdMinIOPSSoftEnvVar = "GRAVITY_ETCD_MIN_IOPS_SOFT"
	// EtcdMinIOPSHardEnvVar is the environment variable with hard IOPS limit.
	EtcdMinIOPSHardEnvVar = "GRAVITY_ETCD_MIN_IOPS_HARD"
	// EtcdMaxLatencySoftEnvVar is the environment variable with soft fsync limit.
	EtcdMaxLatencySoftEnvVar = "GRAVITY_ETCD_MAX_LATENCY_SOFT"
	// EtcdMaxLatencyHardEnvVar is the environment variable with hard fsync limit.
	EtcdMaxLatencyHardEnvVar = "GRAVITY_ETCD_MAX_LATENCY_HARD"
)
View Source
const (
	// ModePingPong is the game mode where servers send each other
	// short "ping" messages
	ModePingPong = "pingpong"
	// ModeBandwidth is the mode for testing bandwidth between servers
	ModeBandwidth = "bandwidth"
)

Variables

This section is empty.

Functions

func DockerConfigFromSchema

func DockerConfigFromSchema(dockerSchema *schema.Docker) (config storage.DockerConfig)

DockerConfigFromSchema converts the specified Docker schema to storage configuration format

func DockerConfigFromSchemaValue

func DockerConfigFromSchemaValue(dockerSchema schema.Docker) (config storage.DockerConfig)

DockerConfigFromSchemaValue converts the specified Docker schema to storage configuration format

func FormatFailedChecks

func FormatFailedChecks(failed []*agentpb.Probe) string

FormatFailedChecks returns failed checks formatted as a list

func OverrideDockerConfig

func OverrideDockerConfig(config *storage.DockerConfig, overrideConfig storage.DockerConfig)

OverrideDockerConfig updates given config with values from overrideConfig where necessary

func RequirementsFromManifest

func RequirementsFromManifest(manifest schema.Manifest) (map[string]Requirements, error)

RequirementsFromManifest returns check requirements for each node profile in the provided manifest.

func RequirementsFromManifests

func RequirementsFromManifests(old, new schema.Manifest, profiles map[string]string, docker storage.DockerConfig) (map[string]Requirements, error)

RequirementsFromManifests generates check requirements as a difference between two manifests - old and new.

func RunBasicChecks

func RunBasicChecks(ctx context.Context, options *validationpb.ValidateOptions) (failed []*agentpb.Probe)

RunBasicChecks executes a set of additional health checks. Returns list of failed health probes.

func RunLocalChecks

func RunLocalChecks(ctx context.Context, req LocalChecksRequest) error

RunLocalChecks performs all preflight checks for an application that can be run locally on the node

Types

type Checker

type Checker struct {
	// contains filtered or unexported fields
}

Checker is the runner for tests

func New

func New(config Config) (*Checker, error)

New creates a new checker for the specified list of servers using given set of server information payloads and the specified interface for running remote commands.

func (*Checker) Check

func (r *Checker) Check(ctx context.Context) (failed []*agentpb.Probe)

Check executes checks on r.servers and returns a list of failed probes.

func (*Checker) CheckNode

func (r *Checker) CheckNode(ctx context.Context, server Server) (failed []*agentpb.Probe)

CheckNode executes checks for the provided individual server.

func (*Checker) CheckNodes

func (r *Checker) CheckNodes(ctx context.Context, servers []Server) (failed []*agentpb.Probe)

CheckNodes executes checks that take all provided servers into account.

func (*Checker) Run

func (r *Checker) Run(ctx context.Context) error

Run runs a full set of checks on the servers specified in r.servers

type Config

type Config struct {
	// Remote is an interface for validating and executing commands on remote nodes.
	Remote Remote
	// Manifest is the cluster manifest the checker validates nodes against.
	Manifest schema.Manifest
	// Servers is a list of nodes for validation.
	Servers []Server
	// Requirements maps node roles to their validation requirements.
	Requirements map[string]Requirements
	// Features allows to turn certain checks off.
	Features
}

Config represents the checker configuration.

type Features

type Features struct {
	// TestBandwidth specifies whether the network bandwidth test should
	// be executed.
	TestBandwidth bool
	// TestPorts specifies whether the ports availability test should
	// be executed.
	TestPorts bool
	// TestEtcdDisk specifies whether the device where etcd data resides
	// should be performance-tested.
	TestEtcdDisk bool
}

Features controls which tests the checker will run

type Interface

type Interface interface {
	// Run runs a full set of checks on the nodes configured in the checker.
	Run(ctx context.Context) error
	// CheckNode executes single-node checks (such as CPU/RAM requirements,
	// disk space, etc) for the provided server.
	CheckNode(ctx context.Context, server Server) []*agentpb.Probe
	// CheckNodes executes multi-node checks (such as network reachability,
	// bandwidth, etc) on the provided set of servers.
	CheckNodes(ctx context.Context, servers []Server) []*agentpb.Probe
	// Check executes all checks on configured servers and returns failed probes.
	Check(ctx context.Context) []*agentpb.Probe
}

Interface defines a preflight checker interface.

type LocalChecksRequest

type LocalChecksRequest struct {
	// Manifest is the application manifest to check against
	Manifest schema.Manifest
	// Role is the node profile name to check
	Role string
	// Options is additional validation options
	Options *validationpb.ValidateOptions
	// Docker specifies Docker configuration overrides (if any)
	Docker storage.DockerConfig
	// Mounts specifies optional mount overrides as name -> source path pairs
	Mounts map[string]string
	// AutoFix when set to true attempts to fix some common problems
	AutoFix bool
	// Progress is used to report information about auto-fixed problems
	utils.Progress
}

LocalChecksRequest describes a request to run local pre-flight checks

func (*LocalChecksRequest) CheckAndSetDefaults

func (r *LocalChecksRequest) CheckAndSetDefaults() error

CheckAndSetDefaults checks the request and sets some defaults

type LocalChecksResult

type LocalChecksResult struct {
	// Failed is a list of failed probes
	Failed []*agentpb.Probe
	// Fixed is a list of probes that failed but have been auto-fixed
	Fixed []*agentpb.Probe
	// Fixable is a list of probes that can be attempted to auto-fix
	Fixable []*agentpb.Probe
}

LocalChecksResult describes the outcome of local checks execution

func ValidateLocal

func ValidateLocal(ctx context.Context, req LocalChecksRequest) (*LocalChecksResult, error)

ValidateLocal runs checks on the local node and returns their outcome

func (*LocalChecksResult) GetFailed

func (r *LocalChecksResult) GetFailed() []*agentpb.Probe

GetFailed returns a list of all failed probes

type ManifestValidator

type ManifestValidator struct {
	// Manifest specifies the manifest to validate against
	Manifest schema.Manifest
	// Profile specifies the node profile to validate against
	Profile schema.NodeProfile
	// StateDir specifies the state directory on the local node
	StateDir string
	// Docker specifies optional docker configuration.
	// If specified, overrides the system docker configuration
	Docker *storage.DockerConfig
	// Mounts specifies the mount overrides as name -> source path pairs
	Mounts map[string]string
}

ManifestValidator describes a manifest validator

func (*ManifestValidator) Validate

func (r *ManifestValidator) Validate(ctx context.Context) (failedProbes []*agentpb.Probe, err error)

Validate verifies the specified manifest against the host environment. Returns list of failed health probes.

type Network

type Network struct {
	// MinTransferRate is minimum required transfer rate.
	MinTransferRate utils.TransferRate
	// Ports specifies requirements for ports to be available on server.
	Ports Ports
}

Network describes network requirements.

type PingPongGame

type PingPongGame map[string]PingPongRequest

PingPongGame describes a composite request to check ports in an agent cluster as a mapping of agent IP -> PingPongRequest

type PingPongGameResults

type PingPongGameResults map[string]PingPongResult

PingPongGameResults describes a composite result of checking ports in an agent cluster as a mapping of agent IP -> PingPongResult

func (*PingPongGameResults) Failures

func (p *PingPongGameResults) Failures() []string

Failures returns the list of test failures

type PingPongRequest

type PingPongRequest struct {
	// Listen is the listening servers
	Listen []pb.Addr `json:"listen"`
	// Ping is the remote servers
	Ping []pb.Addr `json:"ping"`
	// Duration is the duration of the game
	Duration time.Duration `json:"duration"`
	// Mode is the game mode: pingpong or bandwidth
	Mode string `json:"mode"`
}

PingPongRequest is a ping-pong game request

func (PingPongRequest) BandwidthProto

func (r PingPongRequest) BandwidthProto() *pb.CheckBandwidthRequest

BandwidthProto converts this request to protobuf format

func (PingPongRequest) Check

func (r PingPongRequest) Check() error

Check makes sure the request is correct

func (PingPongRequest) PortsProto

func (r PingPongRequest) PortsProto() *pb.CheckPortsRequest

PortsProto converts this request to protobuf format

type PingPongResult

type PingPongResult struct {
	// Code means that the whole operation has succeeded
	// 0 does not mean that all results are success, it just
	// means that experiment went uninterrupted, and there
	// still can be some failures in results
	Code int `json:"code"`
	// Message contains string message
	Message string `json:"message"`
	// ListenResults contains information about attempts to start listening servers
	ListenResults []pb.ServerResult `json:"listen_result"`
	// RemoteResponses is a result of attempts to reach out to remote servers
	PingResults []pb.ServerResult `json:"ping_results"`
	// BandwidthResult is the result of the bandwidth test
	BandwidthResult uint64 `json:"bandwidth_result"`
}

PingPongResult is a result of a ping-pong game

func ResultFromBandwidthProto

func ResultFromBandwidthProto(resp *pb.CheckBandwidthResponse, err error) *PingPongResult

ResultFromBandwidthProto converts protobuf response to PingPongResult

func ResultFromPortsProto

func ResultFromPortsProto(resp *pb.CheckPortsResponse, err error) *PingPongResult

ResultFromPortsProto converts protobuf response to PingPongResult

func (PingPongResult) FailureCount

func (r PingPongResult) FailureCount() int

FailureCount returns number of failures in the result

type Ports

type Ports struct {
	// TCP lists a range of TCP ports.
	TCP []int
	// UDP lists a range of UDP ports.
	UDP []int
}

Ports describes port requirements for a specific profile.

type RawServerInfo

type RawServerInfo struct {
	// System describes the remote system as a JSON-encoded blob
	System []byte
	// RuntimeConfig is the server's runtime configuration
	pb.RuntimeConfig
	// LocalTime is the localtime at the point when this measurement
	// was taken
	LocalTime time.Time `json:"local_time"`
	// ServerTime is the remote time
	ServerTime time.Time `json:"server_time"`
}

RawServerInfo describes a server. It is a transport-friendly format

func (RawServerInfo) FromTransport

func (r RawServerInfo) FromTransport() (*ServerInfo, error)

FromTransport converts from transport-friendly representation of server info

type Remote

type Remote interface {
	// Exec executes the command remotely on the specified node.
	Exec(ctx context.Context, addr string, command []string, stdout, stderr io.Writer) error
	// CheckPorts executes network test to test port availability.
	CheckPorts(context.Context, PingPongGame) (PingPongGameResults, error)
	// CheckBandwidth executes network bandwidth test.
	CheckBandwidth(context.Context, PingPongGame) (PingPongGameResults, error)
	// CheckDisks executes disk performance test on the specified node.
	CheckDisks(ctx context.Context, addr string, req *proto.CheckDisksRequest) (*proto.CheckDisksResponse, error)
	// Validate performs local checks on the specified node.
	Validate(ctx context.Context, addr string, config ValidateConfig) ([]*agentpb.Probe, error)
}

Remote defines an interface for validating remote nodes.

type Remoter

type Remoter struct {
	// contains filtered or unexported fields
}

Remoter allows to execute remote commands and validate remote nodes.

Implements Remote.

func NewRemote

func NewRemote(agents rpc.AgentRepository) *Remoter

NewRemote creates a remote node validator from the provided agents repository.

func (*Remoter) CheckBandwidth

func (r *Remoter) CheckBandwidth(ctx context.Context, req PingPongGame) (PingPongGameResults, error)

CheckBandwidth executes network bandwidth test.

func (*Remoter) CheckDisks

func (r *Remoter) CheckDisks(ctx context.Context, addr string, req *proto.CheckDisksRequest) (*proto.CheckDisksResponse, error)

CheckDisks executes disk performance test.

func (*Remoter) CheckPorts

func (r *Remoter) CheckPorts(ctx context.Context, req PingPongGame) (PingPongGameResults, error)

CheckPorts executes network test to test port availability.

func (*Remoter) Exec

func (r *Remoter) Exec(ctx context.Context, addr string, command []string, stdout, stderr io.Writer) error

Exec executes the command remotely on the specified node.

The command's output is written to the provided writer.

func (*Remoter) Validate

func (r *Remoter) Validate(ctx context.Context, addr string, config ValidateConfig) ([]*agentpb.Probe, error)

Validate performs local checks on the specified node.

Returns a list of failed test results.

type Requirements

type Requirements struct {
	// CPU describes CPU requirements.
	CPU *schema.CPU
	// RAM describes RAM requirements.
	RAM *schema.RAM
	// OS describes OS requirements
	OS []schema.OS
	// Network describes network requirements.
	Network Network
	// Volumes describes volumes requirements.
	Volumes []schema.Volume
	// Docker describes Docker requirements.
	Docker storage.DockerConfig
}

Requirements defines a set of requirements for a node profile.

type Server

type Server struct {
	// Server defines a remote node
	storage.Server
	// ServerInfo describes the remote node environment
	ServerInfo
}

Server describes a remote node

func GetServer

func GetServer(ctx context.Context, rpc rpc.AgentRepository, server storage.Server) (*Server, error)

GetServer returns a check server by retrieving runtime information for the provided server.

func GetServers

func GetServers(ctx context.Context, rpc rpc.AgentRepository, servers []storage.Server) ([]Server, error)

GetServers returns a list of check servers by retrieving runtime information for each of the provided servers.

func (Server) String

func (r Server) String() string

String return textual representation of this server object

type ServerInfo

type ServerInfo struct {
	// System describes the remote system
	storage.System
	// RuntimeConfig is the server's runtime configuration
	pb.RuntimeConfig
	// LocalTime is the localtime at the point when this measurement
	// was taken
	LocalTime time.Time `json:"local_time"`
	// ServerTime is the remote time
	ServerTime time.Time `json:"server_time"`
}

ServerInfo describes a server

func GetServerInfo

func GetServerInfo(ctx context.Context, client rpcclient.Interface) (*ServerInfo, error)

GetServerInfo fetches remote server information

func (ServerInfo) Transport

func (r ServerInfo) Transport() (*RawServerInfo, error)

Transport returns transport-friendly representation of server info

type ServerInfos

type ServerInfos []ServerInfo

ServerInfos is a collection of system infos from all agents

func (ServerInfos) FindByIP

func (r ServerInfos) FindByIP(addr string) (*ServerInfo, error)

FindByIP returns the system information for the given IP

func (ServerInfos) Hostnames

func (r ServerInfos) Hostnames() (hostnames []string)

Hostnames returns the list of all hostnames

type ValidateConfig

type ValidateConfig struct {
	// Manifest is the manifest to validate against.
	Manifest schema.Manifest
	// Profile is the node profile name to validate against.
	Profile string
	// Docker is the Docker configuration to validate.
	Docker storage.DockerConfig
}

ValidateConfig specifies validation data to validate node against.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL