diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index b27e60d47c..2da748b0bf 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -38,8 +38,8 @@ type Pusher interface { } type PusherAppender struct { - failedWrites prometheus.Counter - totalWrites prometheus.Counter + failedWrites *prometheus.CounterVec + totalWrites *prometheus.CounterVec ctx context.Context pusher Pusher @@ -91,7 +91,7 @@ func (a *PusherAppender) AppendHistogramCTZeroSample(storage.SeriesRef, labels.L } func (a *PusherAppender) Commit() error { - a.totalWrites.Inc() + a.totalWrites.WithLabelValues(a.userID).Inc() // Since a.pusher is distributor, client.ReuseSlice will be called in a.pusher.Push. // We shouldn't call client.ReuseSlice here. @@ -103,7 +103,7 @@ func (a *PusherAppender) Commit() error { // Don't report client errors, which are the same ones that would be reported with 4xx HTTP status code // (e.g. series limits, duplicate samples, out of order, etc.) if !mimirpb.IsClientError(err) { - a.failedWrites.Inc() + a.failedWrites.WithLabelValues(a.userID, "error").Inc() } } @@ -123,11 +123,11 @@ type PusherAppendable struct { pusher Pusher userID string - totalWrites prometheus.Counter - failedWrites prometheus.Counter + totalWrites *prometheus.CounterVec + failedWrites *prometheus.CounterVec } -func NewPusherAppendable(pusher Pusher, userID string, totalWrites, failedWrites prometheus.Counter) *PusherAppendable { +func NewPusherAppendable(pusher Pusher, userID string, totalWrites, failedWrites *prometheus.CounterVec) *PusherAppendable { return &PusherAppendable{ pusher: pusher, userID: userID, @@ -209,9 +209,9 @@ type RulesLimits interface { RulerMaxIndependentRuleEvaluationConcurrencyPerTenant(userID string) int64 } -func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Counter, remoteQuerier bool) rules.QueryFunc { +func MetricsQueryFunc(qf rules.QueryFunc, userID string, queries, failedQueries *prometheus.CounterVec, remoteQuerier bool) rules.QueryFunc { return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) { - queries.Inc() + queries.WithLabelValues(userID).Inc() result, err := qf(ctx, qs, t) if err == nil { return result, nil @@ -233,7 +233,7 @@ func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Coun // All errors will still be counted towards "evaluation failures" metrics and logged by Prometheus Ruler, // but we only want internal errors here. if _, ok := querier.TranslateToPromqlAPIError(origErr).(promql.ErrStorage); ok { - failedQueries.Inc() + failedQueries.WithLabelValues(userID, "error").Inc() } // Return unwrapped error. @@ -242,7 +242,7 @@ func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Coun // When remote querier enabled, consider anything an error except those with 4xx status code. st, ok := grpcutil.ErrorToStatus(err) if !(ok && st.Code()/100 == 4) { - failedQueries.Inc() + failedQueries.WithLabelValues(userID, "error").Inc() } } return result, err @@ -329,23 +329,23 @@ func DefaultTenantManagerFactory( overrides RulesLimits, reg prometheus.Registerer, ) ManagerFactory { - totalWrites := promauto.With(reg).NewCounter(prometheus.CounterOpts{ + totalWrites := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ruler_write_requests_total", Help: "Number of write requests to ingesters.", - }) - failedWrites := promauto.With(reg).NewCounter(prometheus.CounterOpts{ + }, []string{"user"}) + failedWrites := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ruler_write_requests_failed_total", Help: "Number of failed write requests to ingesters.", - }) + }, []string{"user", "reason"}) - totalQueries := promauto.With(reg).NewCounter(prometheus.CounterOpts{ + totalQueries := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ruler_queries_total", Help: "Number of queries executed by ruler.", - }) - failedQueries := promauto.With(reg).NewCounter(prometheus.CounterOpts{ + }, []string{"user"}) + failedQueries := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ruler_queries_failed_total", Help: "Number of failed queries by ruler.", - }) + }, []string{"user", "reason"}) var rulerQuerySeconds *prometheus.CounterVec var zeroFetchedSeriesQueries *prometheus.CounterVec if cfg.EnableQueryStats { @@ -368,7 +368,7 @@ func DefaultTenantManagerFactory( // Wrap the query function with our custom logic. wrappedQueryFunc := WrapQueryFuncWithReadConsistency(queryFunc, logger) - wrappedQueryFunc = MetricsQueryFunc(wrappedQueryFunc, totalQueries, failedQueries, cfg.QueryFrontend.Address != "") + wrappedQueryFunc = MetricsQueryFunc(wrappedQueryFunc, userID, totalQueries, failedQueries, cfg.QueryFrontend.Address != "") wrappedQueryFunc = RecordAndReportRuleQueryMetrics(wrappedQueryFunc, queryTime, zeroFetchedSeriesCount, logger) // Wrap the queryable with our custom logic. diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index 35be76d9a5..bc313120b4 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -59,7 +59,12 @@ func (p *fakePusher) Push(_ context.Context, r *mimirpb.WriteRequest) (*mimirpb. func TestPusherAppendable(t *testing.T) { pusher := &fakePusher{} - pa := NewPusherAppendable(pusher, "user-1", promauto.With(nil).NewCounter(prometheus.CounterOpts{}), promauto.With(nil).NewCounter(prometheus.CounterOpts{})) + pa := NewPusherAppendable( + pusher, + "user-1", + promauto.With(nil).NewCounterVec(prometheus.CounterOpts{}, []string{"user"}), + promauto.With(nil).NewCounterVec(prometheus.CounterOpts{}, []string{"user", "reason"}), + ) type sample struct { series string @@ -262,8 +267,8 @@ func TestPusherErrors(t *testing.T) { pusher := &fakePusher{err: tc.returnedError, response: &mimirpb.WriteResponse{}} - writes := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) - failures := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) + writes := promauto.With(nil).NewCounterVec(prometheus.CounterOpts{}, []string{"user"}) + failures := promauto.With(nil).NewCounterVec(prometheus.CounterOpts{}, []string{"user", "reason"}) pa := NewPusherAppendable(pusher, "user-1", writes, failures) lbls, err := parser.ParseMetric("foo_bar") @@ -275,8 +280,8 @@ func TestPusherErrors(t *testing.T) { require.Equal(t, tc.returnedError, a.Commit()) - require.Equal(t, tc.expectedWrites, int(testutil.ToFloat64(writes))) - require.Equal(t, tc.expectedFailures, int(testutil.ToFloat64(failures))) + require.Equal(t, tc.expectedWrites, int(testutil.ToFloat64(writes.WithLabelValues("user-1")))) + require.Equal(t, tc.expectedFailures, int(testutil.ToFloat64(failures.WithLabelValues("user-1", "error")))) }) } } @@ -379,19 +384,19 @@ func TestMetricsQueryFuncErrors(t *testing.T) { } for name, tc := range allCases { t.Run(name, func(t *testing.T) { - queries := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) - failures := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) + queries := promauto.With(nil).NewCounterVec(prometheus.CounterOpts{}, []string{"user"}) + failures := promauto.With(nil).NewCounterVec(prometheus.CounterOpts{}, []string{"user", "reason"}) mockFunc := func(context.Context, string, time.Time) (promql.Vector, error) { return promql.Vector{}, tc.returnedError } - qf := MetricsQueryFunc(mockFunc, queries, failures, tc.remoteQuerier) + qf := MetricsQueryFunc(mockFunc, "user-1", queries, failures, tc.remoteQuerier) _, err := qf(context.Background(), "test", time.Now()) require.Equal(t, tc.expectedError, err) - require.Equal(t, tc.expectedQueries, int(testutil.ToFloat64(queries))) - require.Equal(t, tc.expectedFailedQueries, int(testutil.ToFloat64(failures))) + require.Equal(t, tc.expectedQueries, int(testutil.ToFloat64(queries.WithLabelValues("user-1")))) + require.Equal(t, tc.expectedFailedQueries, int(testutil.ToFloat64(failures.WithLabelValues("user-1", "error")))) }) } }