From 3c4fdac5e80b515fba4d73fb9b9102b8042ca1a4 Mon Sep 17 00:00:00 2001 From: Gordon Date: Tue, 11 Jun 2024 14:55:17 -0400 Subject: [PATCH 1/7] Make time bucket count configurable --- telemetry/prometheus/config.go | 15 ++++++++++++--- telemetry/prometheus/metrics.go | 19 ++++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/telemetry/prometheus/config.go b/telemetry/prometheus/config.go index c04fcb72..e2604321 100644 --- a/telemetry/prometheus/config.go +++ b/telemetry/prometheus/config.go @@ -5,10 +5,19 @@ import ( "regexp" ) +const ( + // Default bucket count 1000 can satisfy the precision of p99 for most histogram stats + DefaultBucketCount = 1000 +) + type Config struct { - Enabled bool - Namespace string // optional - Subsystem string // optional + Enabled bool + Namespace string // optional + Subsystem string // optional + HistogramBucketCount int // Number of buckets for histogram, default to 1000 + // Number of buckets for time buckets, default to 1000. + // The bucket size is 10ms, so the maximum covered time range is 10ms * TimeBucketCount + TimeBucketCount int } func (c *Config) Validate() error { diff --git a/telemetry/prometheus/metrics.go b/telemetry/prometheus/metrics.go index 65bc6ff3..6f17105b 100644 --- a/telemetry/prometheus/metrics.go +++ b/telemetry/prometheus/metrics.go @@ -24,6 +24,7 @@ type metrics struct { // NewMetrics initializes a new instance of Prometheus metrics. func NewMetrics(cfg *Config) (*metrics, error) { //nolint:revive // only used as Metrics interface. + setDefaultCfg(cfg) if err := cfg.Validate(); err != nil { return nil, err } @@ -173,7 +174,8 @@ func (p *metrics) Histogram(name string, value float64, tags []string, rate floa Namespace: p.cfg.Namespace, Subsystem: p.cfg.Subsystem, Help: name + " histogram", - Buckets: prometheus.LinearBuckets(0, rate, 10), // Adjust bucketing as necessary + // The maximum covered stats range is rate * HistogramBucketCount + Buckets: prometheus.LinearBuckets(0, rate, p.cfg.HistogramBucketCount), }, labels) prometheus.MustRegister(histogramVec) p.histogramVecs[name] = histogramVec @@ -196,14 +198,15 @@ func (p *metrics) Time(name string, value time.Duration, tags []string) { Namespace: p.cfg.Namespace, Subsystem: p.cfg.Subsystem, Help: name + " timing histogram", - Buckets: prometheus.LinearBuckets(0, 1, 10), // Adjust bucketing as necessary + // Given bucket=10ms, the maximum covered time range is 10ms * TimeBucketCount + Buckets: prometheus.LinearBuckets(0, 10, p.cfg.TimeBucketCount), }, labels) prometheus.MustRegister(histogramVec) p.histogramVecs[name] = histogramVec } // Convert time.Duration to seconds since Prometheus prefers base units - histogramVec.WithLabelValues(labelValues...).Observe(value.Seconds()) + histogramVec.WithLabelValues(labelValues...).Observe(float64(value.Milliseconds())) } // Latency is a helper function to measure the latency of a routine. @@ -250,3 +253,13 @@ func forceValidName(name string) string { return string(runes) } + +func setDefaultCfg(cfg *Config) { + // Set default values if not provided + if cfg.HistogramBucketCount == 0 { + cfg.HistogramBucketCount = DefaultBucketCount + } + if cfg.TimeBucketCount == 0 { + cfg.TimeBucketCount = DefaultBucketCount + } +} From a7123d1fa01a324b68f650b75993fef57fac30c4 Mon Sep 17 00:00:00 2001 From: Gordon Date: Tue, 11 Jun 2024 15:03:40 -0400 Subject: [PATCH 2/7] lint --- telemetry/prometheus/config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/telemetry/prometheus/config.go b/telemetry/prometheus/config.go index e2604321..0d02acce 100644 --- a/telemetry/prometheus/config.go +++ b/telemetry/prometheus/config.go @@ -6,7 +6,7 @@ import ( ) const ( - // Default bucket count 1000 can satisfy the precision of p99 for most histogram stats + // Default bucket count 1000 can satisfy the precision of p99 for most histogram stats. DefaultBucketCount = 1000 ) @@ -16,7 +16,7 @@ type Config struct { Subsystem string // optional HistogramBucketCount int // Number of buckets for histogram, default to 1000 // Number of buckets for time buckets, default to 1000. - // The bucket size is 10ms, so the maximum covered time range is 10ms * TimeBucketCount + // The bucket size is 10ms, so the maximum covered time range is 10ms * TimeBucketCount. TimeBucketCount int } From 6b855985c2014b36298159b50d422c6eb27105c5 Mon Sep 17 00:00:00 2001 From: gordonbear <168457646+gordonbear@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:16:57 -0400 Subject: [PATCH 3/7] Update telemetry/prometheus/metrics.go Co-authored-by: Cal Bera Signed-off-by: gordonbear <168457646+gordonbear@users.noreply.github.com> --- telemetry/prometheus/metrics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/telemetry/prometheus/metrics.go b/telemetry/prometheus/metrics.go index 6f17105b..00246bee 100644 --- a/telemetry/prometheus/metrics.go +++ b/telemetry/prometheus/metrics.go @@ -256,10 +256,10 @@ func forceValidName(name string) string { func setDefaultCfg(cfg *Config) { // Set default values if not provided - if cfg.HistogramBucketCount == 0 { + if cfg.HistogramBucketCount <= 0 { cfg.HistogramBucketCount = DefaultBucketCount } - if cfg.TimeBucketCount == 0 { + if cfg.TimeBucketCount <= 0 { cfg.TimeBucketCount = DefaultBucketCount } } From e969e88d6ee5ce66844077ce86ebf9ed0145a270 Mon Sep 17 00:00:00 2001 From: Gordon Date: Tue, 11 Jun 2024 15:19:42 -0400 Subject: [PATCH 4/7] Update to use seconds --- telemetry/prometheus/config.go | 2 +- telemetry/prometheus/metrics.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/telemetry/prometheus/config.go b/telemetry/prometheus/config.go index 0d02acce..f9f2c49d 100644 --- a/telemetry/prometheus/config.go +++ b/telemetry/prometheus/config.go @@ -16,7 +16,7 @@ type Config struct { Subsystem string // optional HistogramBucketCount int // Number of buckets for histogram, default to 1000 // Number of buckets for time buckets, default to 1000. - // The bucket size is 10ms, so the maximum covered time range is 10ms * TimeBucketCount. + // The bucket size is 0.01s(10ms), so the maximum covered time range is 10ms * TimeBucketCount. TimeBucketCount int } diff --git a/telemetry/prometheus/metrics.go b/telemetry/prometheus/metrics.go index 00246bee..db797d3f 100644 --- a/telemetry/prometheus/metrics.go +++ b/telemetry/prometheus/metrics.go @@ -198,15 +198,15 @@ func (p *metrics) Time(name string, value time.Duration, tags []string) { Namespace: p.cfg.Namespace, Subsystem: p.cfg.Subsystem, Help: name + " timing histogram", - // Given bucket=10ms, the maximum covered time range is 10ms * TimeBucketCount - Buckets: prometheus.LinearBuckets(0, 10, p.cfg.TimeBucketCount), + // Given bucket=0.01s(10ms), the maximum covered time range is 10ms * TimeBucketCount + Buckets: prometheus.LinearBuckets(0, 0.01, p.cfg.TimeBucketCount), }, labels) prometheus.MustRegister(histogramVec) p.histogramVecs[name] = histogramVec } // Convert time.Duration to seconds since Prometheus prefers base units - histogramVec.WithLabelValues(labelValues...).Observe(float64(value.Milliseconds())) + histogramVec.WithLabelValues(labelValues...).Observe(float64(value.Seconds())) } // Latency is a helper function to measure the latency of a routine. From 693abbd6886946876857064b9d64bf252df7611a Mon Sep 17 00:00:00 2001 From: Gordon Date: Tue, 11 Jun 2024 15:30:54 -0400 Subject: [PATCH 5/7] more comments --- telemetry/prometheus/metrics.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/telemetry/prometheus/metrics.go b/telemetry/prometheus/metrics.go index db797d3f..3a195a35 100644 --- a/telemetry/prometheus/metrics.go +++ b/telemetry/prometheus/metrics.go @@ -206,6 +206,7 @@ func (p *metrics) Time(name string, value time.Duration, tags []string) { } // Convert time.Duration to seconds since Prometheus prefers base units + // see https://prometheus.io/docs/practices/naming/#base-units histogramVec.WithLabelValues(labelValues...).Observe(float64(value.Seconds())) } @@ -254,8 +255,8 @@ func forceValidName(name string) string { return string(runes) } +// Set default values if not provided func setDefaultCfg(cfg *Config) { - // Set default values if not provided if cfg.HistogramBucketCount <= 0 { cfg.HistogramBucketCount = DefaultBucketCount } From a0343be067a00dac373976d78c94ba297f68b98e Mon Sep 17 00:00:00 2001 From: Gordon Date: Tue, 11 Jun 2024 15:32:34 -0400 Subject: [PATCH 6/7] Remove unneeded conversion --- telemetry/prometheus/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/prometheus/metrics.go b/telemetry/prometheus/metrics.go index 3a195a35..9d13ef98 100644 --- a/telemetry/prometheus/metrics.go +++ b/telemetry/prometheus/metrics.go @@ -207,7 +207,7 @@ func (p *metrics) Time(name string, value time.Duration, tags []string) { // Convert time.Duration to seconds since Prometheus prefers base units // see https://prometheus.io/docs/practices/naming/#base-units - histogramVec.WithLabelValues(labelValues...).Observe(float64(value.Seconds())) + histogramVec.WithLabelValues(labelValues...).Observe(value.Seconds()) } // Latency is a helper function to measure the latency of a routine. From f17e2522557a44a5c0c7e036da4cf5fd688baa3d Mon Sep 17 00:00:00 2001 From: Gordon Date: Tue, 11 Jun 2024 15:34:32 -0400 Subject: [PATCH 7/7] lint --- telemetry/prometheus/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/prometheus/metrics.go b/telemetry/prometheus/metrics.go index 9d13ef98..f3a2bf44 100644 --- a/telemetry/prometheus/metrics.go +++ b/telemetry/prometheus/metrics.go @@ -255,7 +255,7 @@ func forceValidName(name string) string { return string(runes) } -// Set default values if not provided +// Set default values if not provided. func setDefaultCfg(cfg *Config) { if cfg.HistogramBucketCount <= 0 { cfg.HistogramBucketCount = DefaultBucketCount