Documentation ¶
Overview ¶
Package health provides support for application health checks.
Health check status can be `Green`, `Yellow`, or `Red`. A yellow status indicates that health check aspect is still functional but may be under stress, experiencing degraded performance, close to resource constraints, etc.
When health checks are registered, they are scheduled to run on a periodic basis. The max number of health checks that can be run concurrently is configurable as a module option.
The health check is configured with a timeout. If the health check times out, then it is considered a `Red` failure. Health checks should be designed to run as fast as possible.
The latest health check results are cached. Interested parties can subscribe for the following health check events:
- health check registrations
- health check results
- overall health status changes
TODO: 1. health check http API 2. health check grpc API
- server streaming APIs for health check results and
Example ¶
package main import ( "context" "github.com/oysterpack/andiamo/pkg/fx/health" "go.uber.org/fx" "log" ) func main() { MongoDB := "01DFRABQGWX6HB1HFX3K0GR15G" DatabaseHealthCheck := health.Check{ ID: "01DFR9PN8BEMZFGBC49A698WTS", Description: `Performs the following: 1. creates a new session 2. retrieves the session 3. deletes the session Each operation should not take more than 50 msec. Yellow -> operation took longer than 50 msec but less than 1 sec Red -> SQL error or operation took longer than 1 sec `, YellowImpact: "degraded performance", RedImpact: "unacceptable user experience", Tags: []string{MongoDB}, } SmokeTest := health.Check{ ID: "01DFRB8XAXMF9XJW2XYCSMN4VE", Description: "smoke test", RedImpact: "application is non-functional", } var registeredCheckSubscription health.RegisteredCheckSubscription var checkResultsSubscription health.CheckResultsSubscription var checkResults health.CheckResults app := fx.New( // install the health module using default options health.Module(health.DefaultOpts()), fx.Invoke( // initialize subscribers func(subscribe health.SubscribeForRegisteredChecks) { registeredCheckSubscription = subscribe() }, func(subscribe health.SubscribeForCheckResults) { checkResultsSubscription = subscribe(func(result health.Result) bool { return result.ID == SmokeTest.ID }) }, // register some health checks func(register health.Register) error { return register(DatabaseHealthCheck, health.CheckerOpts{}, func() (status health.Status, e error) { return health.Green, nil }) }, func(register health.Register) error { return register(SmokeTest, health.CheckerOpts{}, func() (status health.Status, e error) { return health.Green, nil }) }, func(registeredChecks health.RegisteredChecks) { log.Print(<-registeredChecks()) }, ), fx.Populate(&checkResults), ) // make sure the app initialized with no errors if app.Err() != nil { log.Panic(app.Err()) } if err := app.Start(context.Background()); err != nil { log.Panic(err) } defer func() { if err := app.Stop(context.Background()); err != nil { log.Panic(err) } }() // 2 health checks were registered log.Println(<-registeredCheckSubscription.Chan()) log.Println(<-registeredCheckSubscription.Chan()) // we subscribed to receive health check results for the smoke test log.Println(<-checkResultsSubscription.Chan()) // get all check results log.Println(<-checkResults(nil)) }
Output:
Index ¶
- Constants
- Variables
- func Module(opts Opts) fx.Option
- type Check
- type CheckResults
- type CheckResultsSubscription
- type Checker
- type CheckerOpts
- type MonitorOverallHealth
- type Opts
- func (o Opts) SetDefaultRunInterval(runInterval time.Duration) Opts
- func (o Opts) SetDefaultTimeout(timeout time.Duration) Opts
- func (o Opts) SetFailFastOnStartup(failFastOnStartup bool) Opts
- func (o Opts) SetMaxTimeout(timeout time.Duration) Opts
- func (o Opts) SetMinRunInterval(runInterval time.Duration) Opts
- type OverallHealth
- type OverallHealthMonitor
- type Register
- type RegisteredCheck
- type RegisteredCheckSubscription
- type RegisteredChecks
- type Result
- type Status
- type SubscribeForCheckResults
- type SubscribeForRegisteredChecks
Examples ¶
Constants ¶
const ( // Health checks should be designed to run fast MaxTimeout = 10 * time.Second // Health checks should not be scheduled to run more frequently than every second MinRunInterval = time.Second // MaxCheckParallelism is used to configure the max number of health checks that can run concurrently MaxCheckParallelism uint8 = 1 )
checker constraints
const ( DefaultTimeout = 5 * time.Second DefaultRunInterval = 15 * time.Second )
checker defaults
Variables ¶
var ( ErrServiceNotRunning = errors.New("health service is not running") // ErrTimeout indicates a health check timed out. ErrTimeout = errors.New("health check timed out") ErrContextTimout = errors.New("context timed out") )
package errors
var ( ErrIDNotULID = errors.New("`ID` must be a ULID") ErrBlankDescription = errors.New("`Description` must not be blank") ErrBlankRedImpact = errors.New("`RedImpact` must not be blank") ErrTagNotULID = errors.New("`Tags` must be ULIDs") ErrNilChecker = errors.New("`Checker` is required and must not be nil") ErrRunTimeoutTooHigh = fmt.Errorf("health check run timeout is too high - max allowed timeout is %s", MaxTimeout) ErrRunIntervalTooFrequent = fmt.Errorf("health check run interval is too frequent - min allowed run interval is %s", MinRunInterval) )
health check registration errors validation errors
Functions ¶
Types ¶
type Check ¶
type Check struct { // ID format is ULID ID string Description string // RedImpact describes what is the application impact of the health check status is red. RedImpact string // YellowImpact describes what is the application impact of the health check status is yellow. // YellowImpact is optional because some health checks may not have a yellow state. YellowImpact string // optional // Tags are used to categorize related health checks. // Tags are ULIDs because naming is hard and we want to avoid accidental collision. Tags []string // optional }
Check defines a health check
type CheckResults ¶
CheckResults returns all current health check results that match the specified filter
type CheckResultsSubscription ¶
type CheckResultsSubscription struct {
// contains filtered or unexported fields
}
CheckResultsSubscription wraps the channel used to notify subscribers
func (CheckResultsSubscription) Chan ¶
func (s CheckResultsSubscription) Chan() <-chan Result
Chan returns the chan in read-only mode
type Checker ¶
type Checker func() Result
Checker performs the health check.
NOTE: health checks must be designed to run as fast and as efficient as possible.
type CheckerOpts ¶
type CheckerOpts struct { // Timeout must not be zero Timeout time.Duration // Used to schedule health checks to be run on an interval RunInterval time.Duration }
CheckerOpts is used to configure Checker run Module. Zero values imply using the system default values.
type MonitorOverallHealth ¶
type MonitorOverallHealth func() OverallHealthMonitor
MonitorOverallHealth is used to subscribe to overall health status changes
type Opts ¶
type Opts struct { MinRunInterval time.Duration MaxTimeout time.Duration DefaultTimeout time.Duration DefaultRunInterval time.Duration MaxCheckParallelism uint8 // FailFastOnStartup means the app will fail fast if any health checks fail to pass on app start up. // If true, then all registered health checks are run on application startup. // // default = false FailFastOnStartup bool }
Opts are used to configure the fx module.
func DefaultOpts ¶
func DefaultOpts() Opts
DefaultOpts constructs a new Opts using recommended default values.
func (Opts) SetDefaultRunInterval ¶
SetDefaultRunInterval sets the default health check run interval
func (Opts) SetDefaultTimeout ¶
SetDefaultTimeout sets the default health check timeout
func (Opts) SetFailFastOnStartup ¶
SetFailFastOnStartup sets the fail fast on startup setting
func (Opts) SetMaxTimeout ¶
SetMaxTimeout sets the max health check timeout
type OverallHealth ¶
type OverallHealth func() Status
OverallHealth returns the overall health status.
- `Green` if all health checks are `Green`
- `Yellow` if there is at least 1 `Yellow` and no `Red`
- `Red` if at least 1 health check has a `Red` status
type OverallHealthMonitor ¶
type OverallHealthMonitor struct {
// contains filtered or unexported fields
}
OverallHealthMonitor publishes overall health changes. When first created, it immediately sends the current status. From that point on, when ever the overall health status changes, it is published.
func (OverallHealthMonitor) Chan ¶
func (m OverallHealthMonitor) Chan() <-chan Status
Chan returns the chan in read-only mode
type Register ¶
type Register func(check Check, opts CheckerOpts, checker func() (Status, error)) error
Register is used to register health checks.
type RegisteredCheck ¶
type RegisteredCheck struct { Check CheckerOpts Checker }
RegisteredCheck represents a registered health check.
NOTE: when a health check is registered the following augmentations are applied:
- Check fields are trimmed during registration
- Checker function is wrapped when registered to enforce the run timeout policy.
- defaults are applied to CheckerOpts zero value fields
type RegisteredCheckSubscription ¶
type RegisteredCheckSubscription struct {
// contains filtered or unexported fields
}
RegisteredCheckSubscription wraps the channel used to notify subscribers
func (RegisteredCheckSubscription) Chan ¶
func (s RegisteredCheckSubscription) Chan() <-chan RegisteredCheck
Chan returns the chan in read-only mode
type RegisteredChecks ¶
type RegisteredChecks func() <-chan []RegisteredCheck
RegisteredChecks returns all registered Checks
type Result ¶
type Result struct { // ID is the health check ID ID string Status Status // error should be nil if the status is `Green` Err error // Time is when the health check was run time.Time // Duration is how long it took for the health check to run time.Duration }
Result represents a health check Result
type Status ¶
type Status uint8
Status is used to define a health error status
type SubscribeForCheckResults ¶
type SubscribeForCheckResults func(filter func(result Result) bool) CheckResultsSubscription
SubscribeForCheckResults is used to subscribe to health check results that match the specified filter
type SubscribeForRegisteredChecks ¶
type SubscribeForRegisteredChecks func() RegisteredCheckSubscription
SubscribeForRegisteredChecks is used to subscribe for health check registrations
Use Cases:
- logging - log the registered health checks