diff --git a/telemetry/prometheus/config.go b/telemetry/prometheus/config.go index c04fcb7..f9f2c49 100644 --- a/telemetry/prometheus/config.go +++ b/telemetry/prometheus/config.go @@ -5,10 +5,19 @@ import ( "regexp" ) +const ( + // Default bucket count 1000 can satisfy the precision of p99 for most histogram stats. + DefaultBucketCount = 1000 +) + type Config struct { - Enabled bool - Namespace string // optional - Subsystem string // optional + Enabled bool + Namespace string // optional + Subsystem string // optional + HistogramBucketCount int // Number of buckets for histogram, default to 1000 + // Number of buckets for time buckets, default to 1000. + // The bucket size is 0.01s(10ms), so the maximum covered time range is 10ms * TimeBucketCount. + TimeBucketCount int } func (c *Config) Validate() error { diff --git a/telemetry/prometheus/metrics.go b/telemetry/prometheus/metrics.go index 65bc6ff..f3a2bf4 100644 --- a/telemetry/prometheus/metrics.go +++ b/telemetry/prometheus/metrics.go @@ -24,6 +24,7 @@ type metrics struct { // NewMetrics initializes a new instance of Prometheus metrics. func NewMetrics(cfg *Config) (*metrics, error) { //nolint:revive // only used as Metrics interface. + setDefaultCfg(cfg) if err := cfg.Validate(); err != nil { return nil, err } @@ -173,7 +174,8 @@ func (p *metrics) Histogram(name string, value float64, tags []string, rate floa Namespace: p.cfg.Namespace, Subsystem: p.cfg.Subsystem, Help: name + " histogram", - Buckets: prometheus.LinearBuckets(0, rate, 10), // Adjust bucketing as necessary + // The maximum covered stats range is rate * HistogramBucketCount + Buckets: prometheus.LinearBuckets(0, rate, p.cfg.HistogramBucketCount), }, labels) prometheus.MustRegister(histogramVec) p.histogramVecs[name] = histogramVec @@ -196,13 +198,15 @@ func (p *metrics) Time(name string, value time.Duration, tags []string) { Namespace: p.cfg.Namespace, Subsystem: p.cfg.Subsystem, Help: name + " timing histogram", - Buckets: prometheus.LinearBuckets(0, 1, 10), // Adjust bucketing as necessary + // Given bucket=0.01s(10ms), the maximum covered time range is 10ms * TimeBucketCount + Buckets: prometheus.LinearBuckets(0, 0.01, p.cfg.TimeBucketCount), }, labels) prometheus.MustRegister(histogramVec) p.histogramVecs[name] = histogramVec } // Convert time.Duration to seconds since Prometheus prefers base units + // see https://prometheus.io/docs/practices/naming/#base-units histogramVec.WithLabelValues(labelValues...).Observe(value.Seconds()) } @@ -250,3 +254,13 @@ func forceValidName(name string) string { return string(runes) } + +// Set default values if not provided. +func setDefaultCfg(cfg *Config) { + if cfg.HistogramBucketCount <= 0 { + cfg.HistogramBucketCount = DefaultBucketCount + } + if cfg.TimeBucketCount <= 0 { + cfg.TimeBucketCount = DefaultBucketCount + } +}