health

package
v0.0.0-...-11620cc Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 20, 2019 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Overview

Package health provides support for application health checks.

Health check status can be `Green`, `Yellow`, or `Red`. A yellow status indicates that health check aspect is still functional but may be under stress, experiencing degraded performance, close to resource constraints, etc.

When health checks are registered, they are scheduled to run on a periodic basis. The max number of health checks that can be run concurrently is configurable as a module option.

The health check is configured with a timeout. If the health check times out, then it is considered a `Red` failure. Health checks should be designed to run as fast as possible.

The latest health check results are cached. Interested parties can subscribe for the following health check events:

  • health check registrations
  • health check results
  • overall health status changes

TODO: 1. health check http API 2. health check grpc API

  • server streaming APIs for health check results and
Example
package main

import (
	"context"
	"github.com/oysterpack/andiamo/pkg/fx/health"
	"go.uber.org/fx"
	"log"
)

func main() {

	MongoDB := "01DFRABQGWX6HB1HFX3K0GR15G"

	DatabaseHealthCheck := health.Check{
		ID: "01DFR9PN8BEMZFGBC49A698WTS",
		Description: `Performs the following:
1. creates a new session 
2. retrieves the session 
3. deletes the session    

Each operation should not take more than 50 msec.
Yellow -> operation took longer than 50 msec but less than 1 sec
Red -> SQL error or operation took longer than 1 sec
`,
		YellowImpact: "degraded performance",
		RedImpact:    "unacceptable user experience",
		Tags:         []string{MongoDB},
	}

	SmokeTest := health.Check{
		ID:          "01DFRB8XAXMF9XJW2XYCSMN4VE",
		Description: "smoke test",
		RedImpact:   "application is non-functional",
	}

	var registeredCheckSubscription health.RegisteredCheckSubscription
	var checkResultsSubscription health.CheckResultsSubscription
	var checkResults health.CheckResults
	app := fx.New(
		// install the health module using default options
		health.Module(health.DefaultOpts()),
		fx.Invoke(
			// initialize subscribers
			func(subscribe health.SubscribeForRegisteredChecks) {
				registeredCheckSubscription = subscribe()
			},
			func(subscribe health.SubscribeForCheckResults) {
				checkResultsSubscription = subscribe(func(result health.Result) bool {
					return result.ID == SmokeTest.ID
				})
			},
			// register some health checks
			func(register health.Register) error {
				return register(DatabaseHealthCheck, health.CheckerOpts{}, func() (status health.Status, e error) {
					return health.Green, nil
				})
			},
			func(register health.Register) error {
				return register(SmokeTest, health.CheckerOpts{}, func() (status health.Status, e error) {
					return health.Green, nil
				})
			},
			func(registeredChecks health.RegisteredChecks) {
				log.Print(<-registeredChecks())
			},
		),
		fx.Populate(&checkResults),
	)

	// make sure the app initialized with no errors
	if app.Err() != nil {
		log.Panic(app.Err())
	}
	if err := app.Start(context.Background()); err != nil {
		log.Panic(err)
	}
	defer func() {
		if err := app.Stop(context.Background()); err != nil {
			log.Panic(err)
		}
	}()

	// 2 health checks were registered
	log.Println(<-registeredCheckSubscription.Chan())
	log.Println(<-registeredCheckSubscription.Chan())

	// we subscribed to receive health check results for the smoke test
	log.Println(<-checkResultsSubscription.Chan())
	// get all check results
	log.Println(<-checkResults(nil))

}
Output:

Index

Examples

Constants

View Source
const (
	// Health checks should be designed to run fast
	MaxTimeout = 10 * time.Second
	// Health checks should not be scheduled to run more frequently than every second
	MinRunInterval = time.Second

	// MaxCheckParallelism is used to configure the max number of health checks that can run concurrently
	MaxCheckParallelism uint8 = 1
)

checker constraints

View Source
const (
	DefaultTimeout     = 5 * time.Second
	DefaultRunInterval = 15 * time.Second
)

checker defaults

Variables

View Source
var (
	ErrServiceNotRunning = errors.New("health service is not running")

	// ErrTimeout indicates a health check timed out.
	ErrTimeout = errors.New("health check timed out")

	ErrContextTimout = errors.New("context timed out")
)

package errors

View Source
var (
	ErrIDNotULID        = errors.New("`ID` must be a ULID")
	ErrBlankDescription = errors.New("`Description` must not be blank")
	ErrBlankRedImpact   = errors.New("`RedImpact` must not be blank")
	ErrTagNotULID       = errors.New("`Tags` must be ULIDs")

	ErrNilChecker             = errors.New("`Checker` is required and must not be nil")
	ErrRunTimeoutTooHigh      = fmt.Errorf("health check run timeout is too high - max allowed timeout is %s", MaxTimeout)
	ErrRunIntervalTooFrequent = fmt.Errorf("health check run interval is too frequent - min allowed run interval is %s", MinRunInterval)
)

health check registration errors validation errors

Functions

func Module

func Module(opts Opts) fx.Option

Module provides the fx Module for the health module

Types

type Check

type Check struct {
	// ID format is ULID
	ID          string
	Description string
	// RedImpact describes what is the application impact of the health check status is red.
	RedImpact string
	// YellowImpact describes what is the application impact of the health check status is yellow.
	// YellowImpact is optional because some health checks may not have a yellow state.
	YellowImpact string // optional
	// Tags are used to categorize related health checks.
	// Tags are ULIDs because naming is hard and we want to avoid accidental collision.
	Tags []string // optional
}

Check defines a health check

type CheckResults

type CheckResults func(filter func(result Result) bool) <-chan []Result

CheckResults returns all current health check results that match the specified filter

type CheckResultsSubscription

type CheckResultsSubscription struct {
	// contains filtered or unexported fields
}

CheckResultsSubscription wraps the channel used to notify subscribers

func (CheckResultsSubscription) Chan

func (s CheckResultsSubscription) Chan() <-chan Result

Chan returns the chan in read-only mode

type Checker

type Checker func() Result

Checker performs the health check.

NOTE: health checks must be designed to run as fast and as efficient as possible.

type CheckerOpts

type CheckerOpts struct {
	// Timeout must not be zero
	Timeout time.Duration
	// Used to schedule health checks to be run on an interval
	RunInterval time.Duration
}

CheckerOpts is used to configure Checker run Module. Zero values imply using the system default values.

type MonitorOverallHealth

type MonitorOverallHealth func() OverallHealthMonitor

MonitorOverallHealth is used to subscribe to overall health status changes

type Opts

type Opts struct {
	MinRunInterval time.Duration
	MaxTimeout     time.Duration

	DefaultTimeout     time.Duration
	DefaultRunInterval time.Duration

	MaxCheckParallelism uint8

	// FailFastOnStartup means the app will fail fast if any health checks fail to pass on app start up.
	// If true, then all registered health checks are run on application startup.
	//
	// default = false
	FailFastOnStartup bool
}

Opts are used to configure the fx module.

func DefaultOpts

func DefaultOpts() Opts

DefaultOpts constructs a new Opts using recommended default values.

func (Opts) SetDefaultRunInterval

func (o Opts) SetDefaultRunInterval(runInterval time.Duration) Opts

SetDefaultRunInterval sets the default health check run interval

func (Opts) SetDefaultTimeout

func (o Opts) SetDefaultTimeout(timeout time.Duration) Opts

SetDefaultTimeout sets the default health check timeout

func (Opts) SetFailFastOnStartup

func (o Opts) SetFailFastOnStartup(failFastOnStartup bool) Opts

SetFailFastOnStartup sets the fail fast on startup setting

func (Opts) SetMaxTimeout

func (o Opts) SetMaxTimeout(timeout time.Duration) Opts

SetMaxTimeout sets the max health check timeout

func (Opts) SetMinRunInterval

func (o Opts) SetMinRunInterval(runInterval time.Duration) Opts

SetMinRunInterval sets the min health check run interval

type OverallHealth

type OverallHealth func() Status

OverallHealth returns the overall health status.

  • `Green` if all health checks are `Green`
  • `Yellow` if there is at least 1 `Yellow` and no `Red`
  • `Red` if at least 1 health check has a `Red` status

type OverallHealthMonitor

type OverallHealthMonitor struct {
	// contains filtered or unexported fields
}

OverallHealthMonitor publishes overall health changes. When first created, it immediately sends the current status. From that point on, when ever the overall health status changes, it is published.

func (OverallHealthMonitor) Chan

func (m OverallHealthMonitor) Chan() <-chan Status

Chan returns the chan in read-only mode

type Register

type Register func(check Check, opts CheckerOpts, checker func() (Status, error)) error

Register is used to register health checks.

type RegisteredCheck

type RegisteredCheck struct {
	Check
	CheckerOpts
	Checker
}

RegisteredCheck represents a registered health check.

NOTE: when a health check is registered the following augmentations are applied:

  • Check fields are trimmed during registration
  • Checker function is wrapped when registered to enforce the run timeout policy.
  • defaults are applied to CheckerOpts zero value fields

type RegisteredCheckSubscription

type RegisteredCheckSubscription struct {
	// contains filtered or unexported fields
}

RegisteredCheckSubscription wraps the channel used to notify subscribers

func (RegisteredCheckSubscription) Chan

Chan returns the chan in read-only mode

type RegisteredChecks

type RegisteredChecks func() <-chan []RegisteredCheck

RegisteredChecks returns all registered Checks

type Result

type Result struct {
	// ID is the health check ID
	ID string

	Status Status
	// error should be nil if the status is `Green`
	Err error

	// Time is when the health check was run
	time.Time
	// Duration is how long it took for the health check to run
	time.Duration
}

Result represents a health check Result

func (*Result) String

func (r *Result) String() string

type Status

type Status uint8

Status is used to define a health error status

const (
	Green Status = iota
	// Yellow indicates the health check is triggering a warning - usually to signal a degraded state.
	Yellow
	Red
)

Status enum

func (Status) String

func (e Status) String() string

type SubscribeForCheckResults

type SubscribeForCheckResults func(filter func(result Result) bool) CheckResultsSubscription

SubscribeForCheckResults is used to subscribe to health check results that match the specified filter

type SubscribeForRegisteredChecks

type SubscribeForRegisteredChecks func() RegisteredCheckSubscription

SubscribeForRegisteredChecks is used to subscribe for health check registrations

Use Cases:

  • logging - log the registered health checks

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL