Documentation ¶
Overview ¶
Reference : https://github.dev/slok/sloth/tree/main/internal/alert/window.go
Index ¶
- Constants
- Variables
- func ApplyFiltersToCortexEvents(seriesInfo *cortexadmin.SeriesInfoList) (*sloapi.MetricGroupList, error)
- func DetectActiveWindows(severity string, matrix *prommodel.Matrix) ([]*sloapi.AlertFiringWindows, error)
- func LeftJoinSlice[T comparable](arr1, arr2 []T) []T
- func LeftJoinSliceAbstract[T any, S comparable](arr1, arr2 []T, getId func(T) S) []T
- func MergeLabels(ms ...map[string]string) map[string]string
- func MergeRuleGroups(left rulefmt.RuleGroup, right *rulefmt.RuleGroup) *rulefmt.RuleGroup
- func NewWindowRange(sloPeriod string) []string
- func QuerySLOComponentByRawQuery(ctx context.Context, client cortexadmin.CortexAdminClient, rawQuery string, ...) (*model.Vector, error)
- func QuerySLOComponentByRawQueryRange(ctx context.Context, client cortexadmin.CortexAdminClient, rawQuery string, ...) (*model.Matrix, error)
- func QuerySLOComponentByRecordName(ctx context.Context, client cortexadmin.CortexAdminClient, recordName string, ...) (*model.Vector, error)
- func RegisterDatasource(datasource string, sloImpl SLOStore, serviceImpl ServiceBackend)
- func Scheme(ctx context.Context) meta.Scheme
- func TimeDurationToPromStr(t time.Duration) string
- func ToMatchingSubsetIdenticalMetric(goodEvents, totalEvents []*sloapi.Event) (good, total []*sloapi.Event)
- type Filter
- type FilterValue
- type IdentificationLabels
- type LabelPair
- type LabelPairs
- type Metric
- type MetricIds
- type MonitoringServiceBackend
- func (m MonitoringServiceBackend) ListEvents() (*sloapi.EventList, error)
- func (m MonitoringServiceBackend) ListMetrics() (*sloapi.MetricGroupList, error)
- func (m MonitoringServiceBackend) ListServices() (*sloapi.ServiceList, error)
- func (m *MonitoringServiceBackend) WithCurrentRequest(ctx context.Context, req proto.Message) ServiceBackend
- type Plugin
- func (p *Plugin) CloneSLO(ctx context.Context, ref *corev1.Reference) (*sloapi.SLOData, error)
- func (p *Plugin) CloneToClusters(ctx context.Context, req *sloapi.MultiClusterSLO) (*sloapi.MultiClusterFailures, error)
- func (p *Plugin) CreateSLO(ctx context.Context, slorequest *sloapi.CreateSLORequest) (*corev1.Reference, error)
- func (p *Plugin) DeleteSLO(ctx context.Context, req *corev1.Reference) (*emptypb.Empty, error)
- func (p *Plugin) GetSLO(ctx context.Context, ref *corev1.Reference) (*sloapi.SLOData, error)
- func (p *Plugin) ListEvents(ctx context.Context, req *sloapi.ListEventsRequest) (*sloapi.EventList, error)
- func (p *Plugin) ListMetrics(ctx context.Context, req *sloapi.ListMetricsRequest) (*sloapi.MetricGroupList, error)
- func (p *Plugin) ListSLOs(ctx context.Context, _ *emptypb.Empty) (*sloapi.ServiceLevelObjectiveList, error)
- func (p *Plugin) ListServices(ctx context.Context, req *sloapi.ListServicesRequest) (*sloapi.ServiceList, error)
- func (p *Plugin) Preview(ctx context.Context, req *sloapi.CreateSLORequest) (*sloapi.SLOPreviewResponse, error)
- func (p *Plugin) Status(ctx context.Context, ref *corev1.Reference) (*sloapi.SLOStatus, error)
- func (p *Plugin) UpdateSLO(ctx context.Context, req *sloapi.SLOData) (*emptypb.Empty, error)
- func (p *Plugin) UseAPIExtensions(intf system.ExtensionClientInterface)
- func (p *Plugin) UseKeyValueStore(client system.KeyValueStoreClient)
- func (p *Plugin) UseManagementAPI(client managementv1.ManagementClient)
- type Regexp
- type RequestBase
- type SLO
- func (s *SLO) AlertPageThreshold() float64
- func (s *SLO) ConstructAlertingRuleGroup(interval *time.Duration) rulefmt.RuleGroup
- func (s *SLO) ConstructCortexRules(interval *time.Duration) (sli, metadata, alerts rulefmt.RuleGroup)
- func (s *SLO) ConstructMetadataRules(interval *time.Duration) rulefmt.RuleGroup
- func (s *SLO) ConstructRawAlertQueries() (yaml.Node, yaml.Node)
- func (s *SLO) ConstructRecordingRuleGroup(interval *time.Duration) rulefmt.RuleGroup
- func (s *SLO) GetId() string
- func (s *SLO) GetName() string
- func (s *SLO) GetObjective() float64
- func (s *SLO) GetPeriod() string
- func (s *SLO) GetPrometheusRuleFilterByIdLabels() (string, error)
- func (s *SLO) RawBudgetRemainingQuery() string
- func (s *SLO) RawCurrentBurnRateQuery() string
- func (s *SLO) RawDashboardInfoQuery() string
- func (s *SLO) RawErrorBudgetQuery() string
- func (s *SLO) RawGoodEventsQuery(w string) (string, error)
- func (s *SLO) RawObjectiveQuery() string
- func (s *SLO) RawPeriodBurnRateQuery() string
- func (s *SLO) RawPeriodDurationQuery() string
- func (s *SLO) RawSLIQuery(w string) (string, error)
- func (s *SLO) RawTotalEventsQuery(w string) (string, error)
- func (s *SLO) SetId(id string)
- func (s *SLO) SetName(input string)
- type SLOLogging
- type SLOMonitoring
- func (s SLOMonitoring) Clone(clone *sloapi.SLOData) (*corev1.Reference, *sloapi.SLOData, error)
- func (s SLOMonitoring) Create() (*corev1.Reference, error)
- func (s SLOMonitoring) Delete(existing *sloapi.SLOData) error
- func (s SLOMonitoring) MultiClusterClone(base *sloapi.SLOData, inputClusters []*corev1.Reference, ...) ([]*corev1.Reference, []*sloapi.SLOData, []error)
- func (s SLOMonitoring) Preview(slo *SLO) (*sloapi.SLOPreviewResponse, error)
- func (s SLOMonitoring) Status(existing *sloapi.SLOData) (*sloapi.SLOStatus, error)
- func (s SLOMonitoring) Update(existing *sloapi.SLOData) (*sloapi.SLOData, error)
- func (s *SLOMonitoring) WithCurrentRequest(ctx context.Context, req proto.Message) SLOStore
- type SLOStore
- type Service
- type ServiceBackend
- type SliQueryInfo
- type SloFiltersInfo
- type StorageAPIs
- type UserLabels
- type Window
- type Windows
- func (w Windows) GetBurnRateFactor(totalWindow time.Duration, errorBudgetPercent float64, ...) float64
- func (w Windows) GetSpeedPageQuick() float64
- func (w Windows) GetSpeedPageSlow() float64
- func (w Windows) GetSpeedTicketQuick() float64
- func (w Windows) GetSpeedTicketSlow() float64
- func (w Windows) Validate() error
Constants ¶
const ( // alert rule names RecordingRuleSuffix = "-recording" MetadataRuleSuffix = "-metadata" AlertRuleSuffix = "-alerts" )
Variables ¶
var EnabledFilters = map[string]embed.FS{"metricgroups": MetricGroups, "servicegroups": ServiceGroups}
map of directory names to their embed.FS
var MetricGroups embed.FS
var ServiceGroups embed.FS
Functions ¶
func ApplyFiltersToCortexEvents ¶ added in v0.6.0
func ApplyFiltersToCortexEvents(seriesInfo *cortexadmin.SeriesInfoList) (*sloapi.MetricGroupList, error)
func DetectActiveWindows ¶ added in v0.6.0
func DetectActiveWindows(severity string, matrix *prommodel.Matrix) ([]*sloapi.AlertFiringWindows, error)
DetectActiveWindows
@warning Expectation is that the timestamps are ordered when traversing matrix --> sample streams --> [] values but this may not always be the case
func LeftJoinSlice ¶ added in v0.6.0
func LeftJoinSlice[T comparable](arr1, arr2 []T) []T
func LeftJoinSliceAbstract ¶ added in v0.6.0
func LeftJoinSliceAbstract[T any, S comparable](arr1, arr2 []T, getId func(T) S) []T
func MergeRuleGroups ¶ added in v0.6.0
func NewWindowRange ¶ added in v0.6.0
func QuerySLOComponentByRawQuery ¶ added in v0.6.0
func QuerySLOComponentByRawQuery( ctx context.Context, client cortexadmin.CortexAdminClient, rawQuery string, clusterId string, ) (*model.Vector, error)
func QuerySLOComponentByRawQueryRange ¶ added in v0.6.0
func QuerySLOComponentByRecordName ¶ added in v0.6.0
func QuerySLOComponentByRecordName( ctx context.Context, client cortexadmin.CortexAdminClient, recordName string, clusterId string, ) (*model.Vector, error)
func RegisterDatasource ¶
func RegisterDatasource(datasource string, sloImpl SLOStore, serviceImpl ServiceBackend)
func TimeDurationToPromStr ¶ added in v0.6.0
Pretty simple durations for prometheus.
Types ¶
type Filter ¶ added in v0.6.0
type Filter struct { Name string `yaml:"name"` Filters []FilterValue `yaml:"filters"` Ignore []FilterValue `yaml:"ignore"` }
type FilterValue ¶ added in v0.6.0
type IdentificationLabels ¶ added in v0.6.0
type LabelPairs ¶ added in v0.6.0
type LabelPairs []LabelPair
func (LabelPairs) Construct ¶ added in v0.6.0
func (l LabelPairs) Construct() string
type MonitoringServiceBackend ¶
type MonitoringServiceBackend struct {
RequestBase
}
func (MonitoringServiceBackend) ListEvents ¶ added in v0.6.0
func (m MonitoringServiceBackend) ListEvents() (*sloapi.EventList, error)
func (MonitoringServiceBackend) ListMetrics ¶ added in v0.6.0
func (m MonitoringServiceBackend) ListMetrics() (*sloapi.MetricGroupList, error)
func (MonitoringServiceBackend) ListServices ¶ added in v0.6.0
func (m MonitoringServiceBackend) ListServices() (*sloapi.ServiceList, error)
func (*MonitoringServiceBackend) WithCurrentRequest ¶
func (m *MonitoringServiceBackend) WithCurrentRequest(ctx context.Context, req proto.Message) ServiceBackend
type Plugin ¶
type Plugin struct { slo.UnsafeSLOServer system.UnimplementedSystemPluginClient // contains filtered or unexported fields }
func (*Plugin) CloneToClusters ¶ added in v0.6.0
func (p *Plugin) CloneToClusters(ctx context.Context, req *sloapi.MultiClusterSLO) (*sloapi.MultiClusterFailures, error)
func (*Plugin) ListEvents ¶ added in v0.6.0
func (*Plugin) ListMetrics ¶
func (p *Plugin) ListMetrics(ctx context.Context, req *sloapi.ListMetricsRequest) (*sloapi.MetricGroupList, error)
func (*Plugin) ListServices ¶
func (p *Plugin) ListServices(ctx context.Context, req *sloapi.ListServicesRequest) (*sloapi.ServiceList, error)
func (*Plugin) Preview ¶ added in v0.6.0
func (p *Plugin) Preview(ctx context.Context, req *sloapi.CreateSLORequest) (*sloapi.SLOPreviewResponse, error)
func (*Plugin) UseAPIExtensions ¶
func (p *Plugin) UseAPIExtensions(intf system.ExtensionClientInterface)
func (*Plugin) UseKeyValueStore ¶
func (p *Plugin) UseKeyValueStore(client system.KeyValueStoreClient)
func (*Plugin) UseManagementAPI ¶
func (p *Plugin) UseManagementAPI(client managementv1.ManagementClient)
type Regexp ¶ added in v0.6.0
Regexp adds unmarshalling from json for regexp.Regexp
func (*Regexp) MarshalText ¶ added in v0.6.0
MarshalText marshals regexp.Regexp as string
func (*Regexp) UnmarshalText ¶ added in v0.6.0
UnmarshalText unmarshals json into a regexp.Regexp
type RequestBase ¶
type RequestBase struct {
// contains filtered or unexported fields
}
type SLO ¶ added in v0.6.0
type SLO struct {
// contains filtered or unexported fields
}
func CreateSLORequestToStruct ¶ added in v0.6.0
func CreateSLORequestToStruct(c *sloapi.CreateSLORequest) *SLO
func SLODataToStruct ¶ added in v0.6.0
func (*SLO) AlertPageThreshold ¶ added in v0.6.0
func (*SLO) ConstructAlertingRuleGroup ¶ added in v0.6.0
ConstructAlertingRuleGroup
Note: first two are expected to be the recording rules Note: second two are expected to be the alerting rules
func (*SLO) ConstructCortexRules ¶ added in v0.6.0
func (*SLO) ConstructMetadataRules ¶ added in v0.6.0
func (*SLO) ConstructRawAlertQueries ¶ added in v0.6.0
func (s *SLO) ConstructRawAlertQueries() (yaml.Node, yaml.Node)
func (*SLO) ConstructRecordingRuleGroup ¶ added in v0.6.0
func (*SLO) GetObjective ¶ added in v0.6.0
func (*SLO) GetPrometheusRuleFilterByIdLabels ¶ added in v0.6.0
func (*SLO) RawBudgetRemainingQuery ¶ added in v0.6.0
func (*SLO) RawCurrentBurnRateQuery ¶ added in v0.6.0
RawCurrentBurnRateQuery ratioRate : slo:sli_error:ratio_rate<some-period-string>
func (*SLO) RawDashboardInfoQuery ¶ added in v0.6.0
func (*SLO) RawErrorBudgetQuery ¶ added in v0.6.0
func (*SLO) RawGoodEventsQuery ¶ added in v0.6.0
func (*SLO) RawObjectiveQuery ¶ added in v0.6.0
func (*SLO) RawPeriodBurnRateQuery ¶ added in v0.6.0
func (*SLO) RawPeriodDurationQuery ¶ added in v0.6.0
func (*SLO) RawTotalEventsQuery ¶ added in v0.6.0
type SLOLogging ¶
type SLOLogging struct {
RequestBase
}
type SLOMonitoring ¶
type SLOMonitoring struct {
RequestBase
}
func (SLOMonitoring) MultiClusterClone ¶ added in v0.6.0
func (SLOMonitoring) Preview ¶ added in v0.6.0
func (s SLOMonitoring) Preview(slo *SLO) (*sloapi.SLOPreviewResponse, error)
func (SLOMonitoring) Status ¶
Status Only return errors here that should be considered severe InternalServerErrors - Check if enough time has passed to evaluate the rules - First Checks if it has NoData - If it has Data, check if it is within budget - If is within budget, check if any alerts are firing
func (*SLOMonitoring) WithCurrentRequest ¶
type SLOStore ¶
type SLOStore interface { // This method has to handle storage of the SLO in the KVStore itself // since there can be partial successes inside the method Create() (*corev1.Reference, error) Update(existing *sloapi.SLOData) (*sloapi.SLOData, error) Delete(existing *sloapi.SLOData) error Clone(clone *sloapi.SLOData) (*corev1.Reference, *sloapi.SLOData, error) MultiClusterClone( base *sloapi.SLOData, clusters []*corev1.Reference, svcBackend ServiceBackend, ) ([]*corev1.Reference, []*sloapi.SLOData, []error) Status(existing *sloapi.SLOData) (*sloapi.SLOStatus, error) Preview(s *SLO) (*sloapi.SLOPreviewResponse, error) WithCurrentRequest(ctx context.Context, req proto.Message) SLOStore }
type ServiceBackend ¶
type ServiceBackend interface { ListServices() (*sloapi.ServiceList, error) ListMetrics() (*sloapi.MetricGroupList, error) ListEvents() (*sloapi.EventList, error) WithCurrentRequest(ctx context.Context, req proto.Message) ServiceBackend }
func NewMonitoringServiceBackend ¶
func NewMonitoringServiceBackend(p *Plugin, lg *slog.Logger) ServiceBackend
type SliQueryInfo ¶ added in v0.6.0
SliQueryInfo used for filling sli query templates
type SloFiltersInfo ¶ added in v0.6.0
type StorageAPIs ¶
type StorageAPIs struct { SLOs storage.KeyValueStoreT[*slo.SLOData] Services storage.KeyValueStoreT[*slo.Service] Metrics storage.KeyValueStoreT[*slo.Metric] }
type UserLabels ¶ added in v0.6.0
type UserLabels []string
type Window ¶
type Window struct { // ErrorBudgetPercent is the error budget % consumed for a full time window. // Google gives us some defaults in its SRE workbook that work correctly most of the times: // - Page quick: 2% // - Page slow: 5% // - Ticket quick: 10% // - Ticket slow: 10% ErrorBudgetPercent float64 // ShortWindow is the small window used on the alerting part to stop alerting // during a long window because we consumed a lot of error budget but the problem // is already gone. ShortWindow time.Duration // LongWindow is the long window used to alert based on the errors happened on that // long window. LongWindow time.Duration }
type Windows ¶
type Windows struct { SLOPeriod time.Duration PageQuick Window PageSlow Window TicketQuick Window TicketSlow Window }
func GenerateGoogleWindows ¶
https://sre.google/workbook/alerting-on-slos/
budgeting interval is the shortest interval to monitor in a window
func WindowDefaults ¶
func (Windows) GetBurnRateFactor ¶
func (w Windows) GetBurnRateFactor(totalWindow time.Duration, errorBudgetPercent float64, consumptionWindow time.Duration) float64
getBurnRateFactor calculates the burnRateFactor (speed) needed to consume all the error budget available percent in a specific time window taking into account the total time window.
func (Windows) GetSpeedPageQuick ¶
Error budget speeds based on a full time window, however once we have the factor (speed) the value can be used with any time window.