diff --git a/DEPS.bzl b/DEPS.bzl index 7d72a0f069ee..aa71574845b9 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -4545,10 +4545,10 @@ def go_deps(): name = "com_github_guptarohit_asciigraph", build_file_proto_mode = "disable_global", importpath = "github.com/guptarohit/asciigraph", - sha256 = "c2b81da57a50425d313a684efd13d9741c4e9df4c3cca92dea34d562d34271a1", - strip_prefix = "github.com/guptarohit/asciigraph@v0.5.5", + sha256 = "ec30034bd6d082f3242a5410ae1d02d9a4d164504e735f8448766461207be5a5", + strip_prefix = "github.com/guptarohit/asciigraph@v0.7.3", urls = [ - "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/guptarohit/asciigraph/com_github_guptarohit_asciigraph-v0.5.5.zip", + "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/guptarohit/asciigraph/com_github_guptarohit_asciigraph-v0.7.3.zip", ], ) go_repository( diff --git a/build/bazelutil/distdir_files.bzl b/build/bazelutil/distdir_files.bzl index c0f44432a2c5..2c8c52001dc9 100644 --- a/build/bazelutil/distdir_files.bzl +++ b/build/bazelutil/distdir_files.bzl @@ -616,7 +616,7 @@ DISTDIR_FILES = { "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/grpc-ecosystem/go-grpc-prometheus/com_github_grpc_ecosystem_go_grpc_prometheus-v1.2.0.zip": "124dfc63aa52611a2882417e685c0452d4d99d64c13836a6a6747675e911fc17", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/grpc-ecosystem/grpc-gateway/com_github_grpc_ecosystem_grpc_gateway-v1.16.0.zip": "377b03aef288b34ed894449d3ddba40d525dd7fb55de6e79045cdf499e7fe565", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/gsterjov/go-libsecret/com_github_gsterjov_go_libsecret-v0.0.0-20161001094733-a6f4afe4910c.zip": "cffe0a452fd3f00e4d07730caeb254417a720d907294b5b4a3428322655fb130", - "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/guptarohit/asciigraph/com_github_guptarohit_asciigraph-v0.5.5.zip": "c2b81da57a50425d313a684efd13d9741c4e9df4c3cca92dea34d562d34271a1", + "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/guptarohit/asciigraph/com_github_guptarohit_asciigraph-v0.7.3.zip": "ec30034bd6d082f3242a5410ae1d02d9a4d164504e735f8448766461207be5a5", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/hailocab/go-hostpool/com_github_hailocab_go_hostpool-v0.0.0-20160125115350-e80d13ce29ed.zip": "faf2b985681cda77ab928976b620b790585e364b6aff351483227d474db85e9a", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/hashicorp/consul/api/com_github_hashicorp_consul_api-v1.10.1.zip": "a84081dcb2361b540bb787871abedc0f9569c09637f5b5c40e973500a4402a82", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/hashicorp/consul/sdk/com_github_hashicorp_consul_sdk-v0.8.0.zip": "cf29fff6c000ee67eda1b8cacec9648d06944e3cdbb80e2e22dc0165708974c6", diff --git a/go.mod b/go.mod index fb4da92f5e4f..b3bafd62cbe3 100644 --- a/go.mod +++ b/go.mod @@ -176,7 +176,7 @@ require ( github.com/goware/modvendor v0.5.0 github.com/grafana/grafana-openapi-client-go v0.0.0-20240215164046-eb0e60d27cb7 github.com/grpc-ecosystem/grpc-gateway v1.16.0 - github.com/guptarohit/asciigraph v0.5.5 + github.com/guptarohit/asciigraph v0.7.3 github.com/influxdata/influxdb-client-go/v2 v2.3.1-0.20210518120617-5d1fff431040 github.com/irfansharif/recorder v0.0.0-20211218081646-a21b46510fd6 github.com/jackc/pgx/v5 v5.4.2 diff --git a/go.sum b/go.sum index d30ed64f38de..3d802c00803c 100644 --- a/go.sum +++ b/go.sum @@ -1325,8 +1325,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c h1:6rhixN/i8ZofjG1Y75iExal34USq5p+wiN1tpie8IrU= github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c/go.mod h1:NMPJylDgVpX0MLRlPy15sqSwOFv/U1GZ2m21JhFfek0= -github.com/guptarohit/asciigraph v0.5.5 h1:ccFnUF8xYIOUPPY3tmdvRyHqmn1MYI9iv1pLKX+/ZkQ= -github.com/guptarohit/asciigraph v0.5.5/go.mod h1:dYl5wwK4gNsnFf9Zp+l06rFiDZ5YtXM6x7SRWZ3KGag= +github.com/guptarohit/asciigraph v0.7.3 h1:p05XDDn7cBTWiBqWb30mrwxd6oU0claAjqeytllnsPY= +github.com/guptarohit/asciigraph v0.7.3/go.mod h1:dYl5wwK4gNsnFf9Zp+l06rFiDZ5YtXM6x7SRWZ3KGag= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= diff --git a/pkg/sql/vecindex/BUILD.bazel b/pkg/sql/vecindex/BUILD.bazel index 3f477183a76e..057705eab834 100644 --- a/pkg/sql/vecindex/BUILD.bazel +++ b/pkg/sql/vecindex/BUILD.bazel @@ -13,6 +13,7 @@ go_library( "fixup_worker.go", "index_stats.go", "kmeans.go", + "pacer.go", "split_data.go", "vector_index.go", ], @@ -27,6 +28,7 @@ go_library( "//pkg/util/stop", "//pkg/util/syncutil", "//pkg/util/vector", + "@com_github_cockroachdb_crlib//crtime", "@com_github_cockroachdb_errors//:errors", "@org_gonum_v1_gonum//stat", ], @@ -38,6 +40,7 @@ go_test( "fixup_worker_test.go", "index_stats_test.go", "kmeans_test.go", + "pacer_test.go", "vector_index_test.go", ], data = glob(["testdata/**"]), @@ -52,8 +55,10 @@ go_test( "//pkg/util/num32", "//pkg/util/stop", "//pkg/util/vector", + "@com_github_cockroachdb_crlib//crtime", "@com_github_cockroachdb_datadriven//:datadriven", "@com_github_cockroachdb_errors//:errors", + "@com_github_guptarohit_asciigraph//:asciigraph", "@com_github_stretchr_testify//require", "@org_gonum_v1_gonum//floats/scalar", "@org_gonum_v1_gonum//stat", diff --git a/pkg/sql/vecindex/fixup_processor.go b/pkg/sql/vecindex/fixup_processor.go index 233faa6b7c83..a436e9f62c95 100644 --- a/pkg/sql/vecindex/fixup_processor.go +++ b/pkg/sql/vecindex/fixup_processor.go @@ -34,8 +34,8 @@ const ( // maxFixups specifies the maximum number of pending index fixups that can be // enqueued by foreground threads, waiting for processing. Hitting this limit -// indicates the background goroutines have fallen far behind. -const maxFixups = 100 +// indicates the background goroutine has fallen far behind. +const maxFixups = 200 // fixup describes an index fixup so that it can be enqueued for processing. // Each fixup type needs to have some subset of the fields defined. diff --git a/pkg/sql/vecindex/pacer.go b/pkg/sql/vecindex/pacer.go new file mode 100644 index 000000000000..0cd4b14178d6 --- /dev/null +++ b/pkg/sql/vecindex/pacer.go @@ -0,0 +1,281 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package vecindex + +import ( + "math" + "time" + + "github.com/cockroachdb/crlib/crtime" +) + +// targetQueuedFixups is the number of fixups that are allowed in the queue +// before throttling may begin. Note that even if the current queue size is +// below this threshold, throttling will still occur if the queue size is +// increasing at too high of a rate. Also, this is a "soft" target; as long as +// the size is reasonably close, the pacer won't do much. +const targetQueuedFixups = 5 + +// maxQueueSizeRate clamps the measured change in queue size over the course of +// one second, either positive or negative. This avoids pacer overreaction to +// brief bursts of change in small intervals. +const maxQueueSizeRate = 5 + +// gradualQueueSizeMax specifies the max rate of change when the fixup queue +// size needs to be reduced. For example, if the current fixup queue size is 50, +// this is much bigger than the allowed size of 5. However, rather than attempt +// to reduce the size from 50 to 5 in one step, this setting tries to reduce it +// in increments of 2 fixups per second. +const gradualQueueSizeMax = 2 + +// deltaFactor governs how quickly the pacer makes changes to the allowed +// ops/sec. A higher factor value makes the pacer more responsive to changes, +// but increases how much it will overshoot the point of equilibrium. +const deltaFactor = 2 + +// pacer limits the rate of foreground insert and delete operations in the +// vector index such that background split and merge operations can keep up. It +// does this by setting the allowed ops/sec and then delaying operations that +// would otherwise exceed this limit. +// +// During normal operation, the pacer sets ops/sec at a level that tries to +// maintain the fixup queue at its current size (i.e. change rate of zero). +// However, there are two cases in which it will target a non-zero change rate: +// +// 1. If the fixup queue is empty (or nearly so) and operations are being +// delayed, then the pacer will raise allowed ops/sec, since it might be set +// too low (as evidenced by a small queue). +// 2. If the fixup queue size is > targetQueuedFixups, then the pacer will +// reduce allowed ops/sec in an attempt to reduce queue size. It does this +// in increments, with the goal of reducing queue size over time rather than +// all at once. +// +// NOTE: The pacer is *not* thread-safe. It is the responsibility of the caller +// to do any needed locking. +type pacer struct { + // monoNow measures elapsed time and can be mocked for testing. + monoNow func() crtime.Mono + + // lastOpAt records the time of the last insert or delete operation that + // updated the token bucket. + lastOpAt crtime.Mono + + // lastUpdateAt records the time of the last update to allowed ops/sec. + lastUpdateAt crtime.Mono + + // lastQueuedFixups remembers the size of the fixup queue when the last + // insert or delete operation was executed. It's used to observe the delta + // in queue size since that time. + lastQueuedFixups int + + // queueSizeRate estimates how much the size of the fixup queue has changed + // over the last second. It is computed as an exponential moving average + // (EMA) and clamped to +-maxQueueSizeRate. + queueSizeRate float64 + + // allowedOpsPerSec is the maximum rate of insert or delete operations + // that is allowed by the pacer. + allowedOpsPerSec float64 + + // currentTokens tracks how many tokens are currently in the bucket. Each + // token represents one insert or delete operation. When currentTokens + // drops below zero, operations will be delayed. + currentTokens float64 + + // delayed is true if the pacer has delayed an insert or delete operation + // since the last update to allowed ops/sec. + delayed bool +} + +// Init sets up the pacer. "initialOpsPerSec" initializes the token bucket +// refill rate that governs how many insert or delete operations run per second. +// This value will automatically change over time, but a more accurate initial +// value can decrease "ramp up" time for the pacer as it learns the optimal +// pace. "initialFixups" specifies the initial number of fixups in the queue +// (used for testing). +func (p *pacer) Init(initialOpsPerSec int, initialFixups int, monoNow func() crtime.Mono) { + p.monoNow = monoNow + p.lastUpdateAt = monoNow() + p.allowedOpsPerSec = float64(initialOpsPerSec) + p.lastQueuedFixups = initialFixups +} + +// OnFixup is called when the size of the fixup queue has changed because a +// fixup has been added or removed to/from the queue by the vector index. +func (p *pacer) OnFixup(queuedFixups int) { + // Compute elapsed time since the last update to allowed ops/sec. + now := p.monoNow() + sinceUpdate := now.Sub(p.lastUpdateAt) + if sinceUpdate == 0 { + // Avoid division by zero. + sinceUpdate = 1 + } + p.lastUpdateAt = now + + p.updateOpsPerSec(sinceUpdate, queuedFixups) +} + +// OnInsertOrDelete is called when an insert or delete operation is about to be +// run by the vector index. It takes the current size of the fixup queue and +// based on that, returns how much time to delay before running the operation. +// This ensures that background index maintenance operations do not fall too far +// behind foreground operations. +func (p *pacer) OnInsertOrDelete(queuedFixups int) time.Duration { + // Fast path: if there are enough tokens in the bucket, no need for delay. + p.currentTokens-- + if p.currentTokens >= 0 { + return 0 + } + + // If it's been at least a second since allowed ops/sec was updated, do so + // now. This handles an edge case where ops/sec is being throttled so heavily + // (e.g. 1 op/sec) that fixups are rare, and it takes too long to increase + // allowed ops/sec. + now := p.monoNow() + sinceUpdate := now.Sub(p.lastUpdateAt) + if sinceUpdate >= time.Second { + p.lastUpdateAt = now + p.updateOpsPerSec(sinceUpdate, queuedFixups) + } + + // Compute elapsed time since the last insert or delete operation that + // updated the token bucket. + sinceOp := now.Sub(p.lastOpAt) + p.lastOpAt = now + + // Add tokens to the bucket based on elapsed time. Allow bucket to contain + // up to one second of excess tokens. + p.currentTokens += p.allowedOpsPerSec * sinceOp.Seconds() + if p.currentTokens > p.allowedOpsPerSec { + p.currentTokens = p.allowedOpsPerSec + } + + if p.currentTokens >= 0 { + // Enough tokens, no delay. + return 0 + } + + // The token bucket has gone into "debt", so return the pacing delay that + // enforces the allowed ops/sec. This is the inverse of allowed ops/sec, or + // the "time between operations". It is multiplied by the number of tokens + // of other waiting operations (i.e. the token debt). For example, if the + // allowed ops/sec is 1000, then operations should have an average 1 ms + // interval between them. If three operations arrive in immediate succession, + // then the first should have 0 ms delay, the second should have 1 ms delay, + // and the third should have 2 ms delay, and so on. + p.delayed = true + return time.Duration(float64(time.Second) * -p.currentTokens / p.allowedOpsPerSec) +} + +// OnInsertOrDeleteCanceled should be called when an insert or delete operation +// has its context canceled while waiting out its pacing delay. Because the +// operation was never completed, its token should not be consumed. Without +// this, a repeated sequence of cancellations could cause the token bucket to go +// increasingly negative, causing ever-increasing delays. +func (p *pacer) OnInsertOrDeleteCanceled() { + p.currentTokens++ +} + +// updateOpsPerSec updates the allowed ops/sec based on the number of fixups in +// the queue. Updates are scaled by the amount of time that's elapsed since the +// last call to updateOpsPerSec. Allowing sub-second elapsed increments allows +// the pacer to be significantly more responsive. +func (p *pacer) updateOpsPerSec(elapsed time.Duration, queuedFixups int) { + // Remember if any operation was throttled since the last call to update. + delayed := p.delayed + p.delayed = false + + // Calculate the desired rate of change in the fixup queue size over the next + // second. + var desiredQueueSizeRate float64 + if queuedFixups > targetQueuedFixups { + // If the fixup queue is too large, reduce it at a gradual rate that's + // proportional to its distance from the target. Never reduce it more + // than gradualQueueSizeMax. + const gradualRateFactor = 10 + desiredQueueSizeRate = float64(targetQueuedFixups-queuedFixups) / gradualRateFactor + desiredQueueSizeRate = max(desiredQueueSizeRate, -gradualQueueSizeMax) + } else if queuedFixups <= 1 { + // If the fixup queue is empty or has just one fixup, then it could be + // that background fixups are happening fast enough. However, it's also + // possible that the fixup queue is small because the pacer is heavily + // throttling operations. Sharply increase allowed ops/sec, up to the + // target, in case that's true. + desiredQueueSizeRate = float64(targetQueuedFixups - queuedFixups) + } + + // Calculate the actual rate of change in the fixup queue size over the last + // second. + actualQueueSizeRate := p.calculateQueueSizeRate(elapsed, queuedFixups) + + // Calculate the net rate that's needed to match the desired rate. For + // example, if we desire to decrease the queue size by 2 fixups/sec, but the + // queue is actually growing at 2 fixups/sec, then we need a net decrease of + // 4 fixups/sec. + netQueueSizeRate := desiredQueueSizeRate - actualQueueSizeRate + netQueueSizeRate = max(min(netQueueSizeRate, maxQueueSizeRate), -maxQueueSizeRate) + + // Do not increase allowed ops/sec if operations are not being throttled. + // Otherwise, if there's little or no activity, the pacer would never stop + // increasing allowed ops/sec. + if netQueueSizeRate > 0 && !delayed { + return + } + + // Determine how much to change allowed ops/sec to achieve the net change in + // fixup queue size over the next second. When allowed ops/sec is small, + // allow it to ramp quickly by starting with a minimum delta of 10 ops/sec. + const minDeltaOpsPerSec = 10 + deltaOpsPerSec := max(p.allowedOpsPerSec, minDeltaOpsPerSec) + if netQueueSizeRate < 0 { + // Decrease ops/sec up to some % of its current value. For example, if + // deltaFactor is 2, then it can decrease by up to 50% of its current + // value. + deltaOpsPerSec = deltaOpsPerSec/deltaFactor - deltaOpsPerSec + } else { + // Increase ops/sec by some % of its current value. For example, if + // deltaFactor is 2, then it can increase by up to 100% of its current + // value. + deltaOpsPerSec = deltaOpsPerSec*deltaFactor - deltaOpsPerSec + } + + // Scale the change in ops/sec by the magnitude of desired change with respect + // to the max allowed change. + deltaOpsPerSec *= math.Abs(netQueueSizeRate) / maxQueueSizeRate + + // Scale the delta based on the elapsed time. For example, if we want to + // decrease ops/sec by 200, but it's been only 0.5 seconds since the last + // fixup, then we need to change ops/sec by -200 * 0.5 = -100. This allows + // for multiple micro-adjustments over the course of a second that add up to + // the full adjustment (if the trend doesn't change). + deltaOpsPerSec = deltaOpsPerSec * min(elapsed.Seconds(), 1) + + // Update allowed ops/sec, but don't let it fall below 1, even in case where, + // for example, fixups are somehow blocked. + p.allowedOpsPerSec = max(p.allowedOpsPerSec+deltaOpsPerSec, 1) +} + +// calculateQueueSizeRate calculates the exponential moving average (EMA) of the +// rate of change in the fixup queue size, over the last second. +func (p *pacer) calculateQueueSizeRate(elapsed time.Duration, queuedFixups int) float64 { + // Calculate the rate of change in the fixup queue size over the elapsed time + // period. + queueSizeRate := float64(queuedFixups-p.lastQueuedFixups) / elapsed.Seconds() + p.lastQueuedFixups = queuedFixups + + // Factor that sample into the EMA by weighting it according to the elapsed + // time (clamped to 1 second max). + alpha := min(elapsed.Seconds(), 1) + p.queueSizeRate = alpha*queueSizeRate + (1-alpha)*p.queueSizeRate + + // Clamp the overall rate of change in order to prevent anomalies when a large + // number of fixups are generated in a short amount of time (e.g. because of + // statistical clustering or a backlog of fixups that is suddenly added to + // the queue). + p.queueSizeRate = max(min(p.queueSizeRate, maxQueueSizeRate), -maxQueueSizeRate) + + return p.queueSizeRate +} diff --git a/pkg/sql/vecindex/pacer_test.go b/pkg/sql/vecindex/pacer_test.go new file mode 100644 index 000000000000..7193c60cc7e0 --- /dev/null +++ b/pkg/sql/vecindex/pacer_test.go @@ -0,0 +1,336 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package vecindex + +import ( + "fmt" + "math/rand" + "strconv" + "testing" + "time" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/crlib/crtime" + "github.com/cockroachdb/datadriven" + "github.com/guptarohit/asciigraph" + "github.com/stretchr/testify/require" + "gonum.org/v1/gonum/floats/scalar" +) + +// TestPacer runs the pacer with simulated insert and delete operations and +// fixups. It prints ASCII plots of key metrics over the course of the run in +// order to evaluate its effectiveness. +func TestPacer(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + datadriven.Walk(t, "testdata/pacer", func(t *testing.T, path string) { + datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "plot": + return plotPacerDecisions(t, d) + } + + t.Fatalf("unknown cmd: %s", d.Cmd) + return "" + }) + }) +} + +func plotPacerDecisions(t *testing.T, d *datadriven.TestData) string { + const seed = 42 + + var err error + seconds := 10 + height := 10 + width := 90 + initialOpsPerSec := 500 + queuedFixups := 0 + opsPerFixup := 50 + fixupsPerSec := 10 + showActualOpsPerSec := false + showQueueSize := false + showQueueSizeRate := false + showDelayMillis := false + noise := 0.0 + + for _, arg := range d.CmdArgs { + switch arg.Key { + case "seconds": + require.Len(t, arg.Vals, 1) + seconds, err = strconv.Atoi(arg.Vals[0]) + require.NoError(t, err) + + case "height": + require.Len(t, arg.Vals, 1) + height, err = strconv.Atoi(arg.Vals[0]) + require.NoError(t, err) + + case "width": + require.Len(t, arg.Vals, 1) + width, err = strconv.Atoi(arg.Vals[0]) + require.NoError(t, err) + + case "initial-ops-per-sec": + require.Len(t, arg.Vals, 1) + initialOpsPerSec, err = strconv.Atoi(arg.Vals[0]) + require.NoError(t, err) + + case "initial-fixups": + require.Len(t, arg.Vals, 1) + queuedFixups, err = strconv.Atoi(arg.Vals[0]) + require.NoError(t, err) + + case "ops-per-fixup": + require.Len(t, arg.Vals, 1) + opsPerFixup, err = strconv.Atoi(arg.Vals[0]) + require.NoError(t, err) + + case "fixups-per-sec": + require.Len(t, arg.Vals, 1) + fixupsPerSec, err = strconv.Atoi(arg.Vals[0]) + require.NoError(t, err) + + case "show-actual-ops-per-sec": + require.Len(t, arg.Vals, 0) + showActualOpsPerSec = true + + case "show-queue-size": + require.Len(t, arg.Vals, 0) + showQueueSize = true + + case "show-queue-size-rate": + require.Len(t, arg.Vals, 0) + showQueueSizeRate = true + + case "show-delay-millis": + require.Len(t, arg.Vals, 0) + showDelayMillis = true + + case "noise": + require.Len(t, arg.Vals, 1) + noise, err = strconv.ParseFloat(arg.Vals[0], 64) + require.NoError(t, err) + } + } + + var now crtime.Mono + var p pacer + p.Init(initialOpsPerSec, queuedFixups, func() crtime.Mono { return now }) + + var allowedOpsPerSecPoints, actualOpsPerSecPoints []float64 + var queueSizePoints, delayPoints, queueSizeRatePoints []float64 + var opCountPoints []int + var nextOp, nextFixup, nextPoint crtime.Mono + var totalOpCount int + var delay time.Duration + + rng := rand.New(rand.NewSource(seed)) + + // Simulate run with deterministic clock. + end := crtime.Mono(time.Duration(seconds) * time.Second) + for now < end { + if now == nextOp { + noiseFactor := rng.Float64()*2*noise - noise + + totalOpCount++ + delay = p.OnInsertOrDelete(queuedFixups) + // Keep ops/sec <= 1000 (not counting noise). + interval := max(delay, time.Millisecond) + interval = time.Duration(float64(interval) * (1 + noiseFactor)) + nextOp = now + crtime.Mono(interval) + + if totalOpCount%opsPerFixup == 0 { + queuedFixups++ + p.OnFixup(queuedFixups) + } + } + + if now == nextFixup { + noiseFactor := rng.Float64()*2*noise - noise + + if queuedFixups > 0 { + queuedFixups-- + p.OnFixup(queuedFixups) + } + + fixupsInterval := crtime.Mono(time.Second / time.Duration(fixupsPerSec)) + fixupsInterval = max(crtime.Mono(float64(fixupsInterval)*(1+noiseFactor)), 1) + nextFixup = now + fixupsInterval + } + + if now == nextPoint { + // Compute running average of last second of actual ops/sec. + opCountPoints = append(opCountPoints, totalOpCount) + startOffset := 0 + endOffset := len(opCountPoints) - 1 + if len(opCountPoints) > 1000 { + startOffset = endOffset - 1000 + } + + actualOpsPerSec := float64(opCountPoints[endOffset] - opCountPoints[startOffset]) + actualOpsPerSecPoints = append(actualOpsPerSecPoints, actualOpsPerSec) + + delayPoints = append(delayPoints, delay.Seconds()*1000) + allowedOpsPerSecPoints = append(allowedOpsPerSecPoints, p.allowedOpsPerSec) + queueSizePoints = append(queueSizePoints, float64(queuedFixups)) + queueSizeRatePoints = append(queueSizeRatePoints, p.queueSizeRate) + + nextPoint += crtime.Mono(time.Millisecond) + } + + now = min(nextOp, nextFixup, nextPoint) + } + + finalOffset := len(allowedOpsPerSecPoints) - 1 + if showQueueSize { + caption := fmt.Sprintf(" Fixup queue size = %0.2f fixups (avg), %v fixups (final)\n", + computePointAverage(queueSizePoints), queueSizePoints[finalOffset]) + return caption + asciigraph.Plot(queueSizePoints, + asciigraph.Width(width), + asciigraph.Height(height)) + } + + if showQueueSizeRate { + caption := fmt.Sprintf(" Fixup queue size rate = %0.2f fixups/sec (avg)\n", + computePointAverage(queueSizeRatePoints)) + return caption + asciigraph.Plot(queueSizeRatePoints, + asciigraph.Width(width), + asciigraph.Height(height)) + } + + if showDelayMillis { + caption := fmt.Sprintf(" Delay (ms) = %0.2f ms (avg), %0.2f ms (final)\n", + computePointAverage(delayPoints), delayPoints[finalOffset]) + return caption + asciigraph.Plot(delayPoints, + asciigraph.Width(width), + asciigraph.Height(height)) + } + + if showActualOpsPerSec { + caption := fmt.Sprintf(" Actual ops per second = %0.2f ops/sec (avg), %0.2f ops/sec (final)\n", + computePointAverage(actualOpsPerSecPoints), actualOpsPerSecPoints[finalOffset]) + return caption + asciigraph.Plot(actualOpsPerSecPoints, + asciigraph.Width(width), + asciigraph.Height(height)) + } + + caption := fmt.Sprintf(" Allowed ops per second = %0.2f ops/sec (avg), %0.2f ops/sec (final)\n", + computePointAverage(allowedOpsPerSecPoints), allowedOpsPerSecPoints[finalOffset]) + return caption + asciigraph.Plot(allowedOpsPerSecPoints, + asciigraph.Width(width), + asciigraph.Height(height)) +} + +func computePointAverage(points []float64) float64 { + sum := 0.0 + for _, p := range points { + sum += p + } + return sum / float64(len(points)) +} + +// TestMultipleArrival tests multiple operations arriving while still waiting +// out the delay for previous operations. +func TestMultipleArrival(t *testing.T) { + var p pacer + var now crtime.Mono + p.Init(500, 0, func() crtime.Mono { return now }) + + // Initial delay is 2 milliseconds. + delay := p.OnInsertOrDelete(5) + require.Equal(t, 2*time.Millisecond, delay) + + // After 1 millisecond has elapsed, another op arrives; it should be delayed + // by 3 milliseconds. + now += crtime.Mono(time.Millisecond) + delay = p.OnInsertOrDelete(5) + require.Equal(t, 3*time.Millisecond, delay) + + // Before any further waiting, yet another op arrives. + delay = p.OnInsertOrDelete(5) + require.Equal(t, 5*time.Millisecond, delay) +} + +func TestCancelOp(t *testing.T) { + var p pacer + var now crtime.Mono + p.Init(500, 0, func() crtime.Mono { return now }) + delay := p.OnInsertOrDelete(5) + require.Equal(t, 2*time.Millisecond, delay) + delay = p.OnInsertOrDelete(5) + require.Equal(t, 4*time.Millisecond, delay) + + // "Cancel" one of the operations and expect the next operation to get the + // same delay as the second operation rather than 6 ms. + p.OnInsertOrDeleteCanceled() + delay = p.OnInsertOrDelete(5) + require.Equal(t, 4*time.Millisecond, delay) +} + +// TestCalculateQueueSizeRate tests the queue size rate EMA. The "exact" field +// shows that the EMA is a good approximation of the true value. +func TestCalculateQueueSizeRate(t *testing.T) { + testCases := []struct { + desc string + elapsed time.Duration + queuedFixups int + expected float64 + exact float64 + }{ + { + desc: "EMA is exact when elapsed time is = 1 second", + elapsed: time.Second, + queuedFixups: 5, + expected: 5, + exact: 5, + }, + { + desc: "EMA is exact when elapsed time is > 1 second", + elapsed: 2 * time.Second, + queuedFixups: 8, + expected: 1.5, + exact: 1.5, + }, + { + desc: "rate increases over a smaller time period", + elapsed: time.Second / 2, + queuedFixups: 9, + expected: 1.75, + exact: 1, + }, + { + desc: "rate continues to increase", + elapsed: time.Second / 2, + queuedFixups: 10, + expected: 1.88, + exact: 2, + }, + { + desc: "rate begins to decrease over a smaller time period", + elapsed: time.Second / 4, + queuedFixups: 9, + expected: 0.41, + exact: 0, + }, + { + desc: "rate continues to decrease", + elapsed: time.Second / 4, + queuedFixups: 6, + expected: -2.7, + exact: -3, + }, + } + + var p pacer + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + actual := p.calculateQueueSizeRate(tc.elapsed, tc.queuedFixups) + require.Equal(t, tc.expected, scalar.Round(actual, 2)) + }) + } +} diff --git a/pkg/sql/vecindex/testdata/pacer/basic.ddt b/pkg/sql/vecindex/testdata/pacer/basic.ddt new file mode 100644 index 000000000000..b2beef6650fe --- /dev/null +++ b/pkg/sql/vecindex/testdata/pacer/basic.ddt @@ -0,0 +1,308 @@ +# ---------- +# Fixup processing keeps up with 1000 ops/sec test load. +# ---------- + +# At 50 ops per fixup, 20 fixups/sec must be processed to keep up with 1000 +# ops/sec. Therefore, no throttling should be needed. +plot ops-per-fixup=50 fixups-per-sec=20 +---- + Allowed ops per second = 1031.03 ops/sec (avg), 1068.02 ops/sec (final) + 1068 ┤ ╭─────────────────────────────────────────────────────────────────────────────── + 1011 ┤ ╭╯ + 954 ┤ │ + 898 ┤ ╭╯ + 841 ┤ ╭╯ + 784 ┤ ╭╯ + 727 ┤ ╭╯ + 670 ┤ ╭╯ + 614 ┤ ╭╯ + 557 ┤ ╭╯ + 500 ┼─╯ + +# Actual ops/sec should converge to exactly 1000 ops/sec (unthrottled rate). +plot ops-per-fixup=50 fixups-per-sec=20 show-actual-ops-per-sec +---- + Actual ops per second = 920.15 ops/sec (avg), 1000.00 ops/sec (final) + 1000 ┤ ╭────────────────────────────────────────────────────────────────────────── + 900 ┤ ╭──╯ + 800 ┤ ╭─╯ + 700 ┤ ╭╯ + 600 ┤ ╭╯ + 500 ┤ ╭╯ + 400 ┤ ╭╯ + 300 ┤ ╭─╯ + 200 ┤ ╭╯ + 100 ┤╭─╯ + 0 ┼╯ + +# Delay should quickly decrease to ~0. +plot ops-per-fixup=50 fixups-per-sec=20 show-delay-millis +---- + Delay (ms) = 0.15 ms (avg), 0.00 ms (final) + 2.00 ┼╮ + 1.80 ┤╰─╮ + 1.60 ┤ ╰─╮ + 1.40 ┤ ╰╮ + 1.20 ┤ ╰─╮ + 1.00 ┤ ╰─╮ + 0.80 ┤ │ + 0.60 ┤ │ + 0.40 ┤ │ + 0.20 ┤ │ + 0.00 ┤ ╰─────────────────────────────────────────────────────────────────────────────── + +# Fixup queue size should stay between 0 and 1. +plot ops-per-fixup=50 fixups-per-sec=20 show-queue-size +---- + Fixup queue size = 0.08 fixups (avg), 1 fixups (final) + 1.00 ┤ ╭──╮ ╭╮ ╭╮ ╭╮ ╭ + 0.90 ┤ │ │ ││ ││ ││ │ + 0.80 ┤ │ │ ││ ││ ││ │ + 0.70 ┤ │ │ ││ ││ ││ │ + 0.60 ┤ ╭╮│ │ ││ ││ ││ │ + 0.50 ┤ │││ │ ││ ││ ││ │ + 0.40 ┤ │││ │ ││ ││ ││ ╭╮ ╭╮ │ + 0.30 ┤ │││ │ ││ ││ ││ ││ ││ │ + 0.20 ┤ │││ │ ││ ││ ││ ││ ││ │ + 0.10 ┤ │││ │ ││ ││ ││ ││ ││ │ + 0.00 ┼───╯╰╯ ╰──╯╰──╯╰──╯╰──╯╰───────────────────────────────────────────────────────────╯╰──╯ + +# ---------- +# Fixup processing is 10% below sustainable rate. +# ---------- + +# The test load requires 20 fixups/sec to keep up. At only 18 fixups/sec, the +# pacer needs to throttle traffic. Allowed ops/sec should converge to +# 50 * 18 = ~900. +plot ops-per-fixup=50 fixups-per-sec=18 +---- + Allowed ops per second = 890.32 ops/sec (avg), 890.39 ops/sec (final) + 1062 ┤ ╭────╮ + 1005 ┤ │ ╰───╮ + 949 ┤ ╭╯ ╰───╮ ╭╮ ╭╮ ╭╮ + 893 ┤ ╭╯ ╰───╮ ╭─────────────────╯╰───────────╯╰──────────╯╰───────── + 837 ┤ ╭╯ ╰─────────╯ + 781 ┤ │ + 725 ┤ ╭╯ + 668 ┤ ╭╯ + 612 ┤ ╭╯ + 556 ┤╭─╯ + 500 ┼╯ + +# This represents a delay of 1000 / (50 * 18) = ~1.1 ms. +plot ops-per-fixup=50 fixups-per-sec=18 show-delay-millis +---- + Delay (ms) = 0.98 ms (avg), 1.12 ms (final) + 2.00 ┼╮ + 1.80 ┤╰╮ + 1.60 ┤ ╰─╮ + 1.40 ┤ ╰─╮ + 1.20 ┤ ╰─╮ ╭───────────────────────╮ ╭───╮ ╭────╮╭───╮ ╭───╮ ╭──╮ ╭───╮ ╭─ + 1.00 ┤ ╰╮ │ ╰─╯ ╰──╯ ╰╯ ╰──╯ ╰─╯ ╰──╯ ╰─╯ + 0.80 ┤ │ │ + 0.60 ┤ │ │ + 0.40 ┤ │ │ + 0.20 ┤ │ │ + 0.00 ┤ ╰──────────────╯ + +# Ensure that actual ops/sec converges to ~900. +plot ops-per-fixup=50 fixups-per-sec=18 show-actual-ops-per-sec +---- + Actual ops per second = 845.16 ops/sec (avg), 900.00 ops/sec (final) + 1000 ┤ ╭───────────╮ + 900 ┤ ╭──╯ ╰──────╮ ╭──────────────────────────────────────────────────── + 800 ┤ ╭─╯ ╰──╯ + 700 ┤ ╭╯ + 600 ┤ ╭╯ + 500 ┤ ╭╯ + 400 ┤ ╭╯ + 300 ┤ ╭─╯ + 200 ┤ ╭╯ + 100 ┤╭─╯ + 0 ┼╯ + +# Fixup queue size should stay <= ~5. +plot ops-per-fixup=50 fixups-per-sec=18 show-queue-size +---- + Fixup queue size = 2.06 fixups (avg), 3 fixups (final) + 4.00 ┤ ╭────╮ + 3.60 ┤ │ ╰╮ + 3.20 ┤ ╭───╯ ╰───────────────╮ ╭─ + 2.80 ┤ │ │ │ + 2.40 ┤ │ ╰╮ ╭╯ + 2.00 ┤ ╭───╯ ╰────────────────────────────────────────╯ + 1.60 ┤ │ + 1.20 ┤ ╭───╯ + 0.80 ┤ │ + 0.40 ┤ │ + 0.00 ┼──────────╯ + +# ---------- +# Fixup processing is 50% below sustainable rate. +# ---------- + +# Allowed ops/sec should converge to 50 * 10 = ~500. +plot ops-per-fixup=50 fixups-per-sec=10 +---- + Allowed ops per second = 520.54 ops/sec (avg), 507.74 ops/sec (final) + 717 ┤ ╭───╮ + 691 ┤ │ ╰╮ + 665 ┤ ╭╯ ╰─╮ + 639 ┤ │ ╰─╮ + 613 ┤ ╭╯ ╰╮ + 586 ┤ ╭╯ ╰─╮ + 560 ┤ │ ╰──╮ + 534 ┤╭╯ ╰─╮ ╭────╮ ╭─╮ ╭──╮ ╭───╮ + 508 ┼╯ ╰╮ ╭─╯ ╰─────╮ ╭──╯ ╰────╮╭─╯ ╰─────╮╭──╯ ╰ + 482 ┤ ╰──╮╭────────────────╯ ╰─╯ ╰╯ ╰╯ + 456 ┤ ╰╯ + +# Delay should converge to 1000 / (50 * 10) = ~2 milliseconds. +plot ops-per-fixup=50 fixups-per-sec=10 show-delay-millis +---- + Delay (ms) = 1.94 ms (avg), 1.97 ms (final) + 2.19 ┤ ╭──────╮ ╭─╮ + 2.11 ┤ ╭─╯ ╰────────╯ ╰╮ ╭───╮ ╭─╮ ╭───╮ + 2.03 ┼╮ ╭╯ ╰─╮ ╭─────╯ ╰─╮ ╭───╯ ╰─╮ ╭───╯ ╰─╮ ╭ + 1.95 ┤╰╮ ╭─╯ ╰───╯ ╰─╯ ╰──╯ ╰───╯ + 1.87 ┤ │ ╭─╯ + 1.79 ┤ ╰╮ ╭──╯ + 1.71 ┤ ╰╮ ╭╯ + 1.63 ┤ │ ╭─╯ + 1.55 ┤ ╰╮ ╭─╯ + 1.47 ┤ ╰─╮╭─╯ + 1.39 ┤ ╰╯ + +# Actual ops/sec should converge to ~500. +plot ops-per-fixup=50 fixups-per-sec=10 show-actual-ops-per-sec +---- + Actual ops per second = 494.86 ops/sec (avg), 504.00 ops/sec (final) + 677 ┤ ╭─────╮ + 609 ┤ ╭╯ ╰────╮ + 542 ┤ ╭╯ ╰────╮ ╭───╮ + 474 ┤ ╭╯ ╰─────────────────────────╯ ╰───────────────────────────────── + 406 ┤ ╭╯ + 338 ┤ ╭╯ + 271 ┤ ╭╯ + 203 ┤ ╭╯ + 135 ┤ ╭╯ + 68 ┤╭╯ + 0 ┼╯ + +# Fixup queue size should stay <= ~5. +plot ops-per-fixup=50 fixups-per-sec=10 show-queue-size +---- + Fixup queue size = 3.91 fixups (avg), 5 fixups (final) + 6.00 ┤ ╭─╮ + 5.40 ┤ │ │ + 4.80 ┤ ╭──╮╭────╯ ╰───────╮ ╭─╮ ╭╮ ╭ + 4.20 ┤ ╭─╯ ╰╯ ╰─────╯ ╰──────────────╯╰───────────────────────────────╯ + 3.60 ┤ │ + 3.00 ┤ ╭╮╭─╯ + 2.40 ┤ │││ + 1.80 ┤ ╭─╯╰╯ + 1.20 ┤ ╭─╯ + 0.60 ┤ │ + 0.00 ┼───╯ + +# Rate of change in fixup queue size should initially be positive, then +# negative, and then stabilize at ~0. +plot ops-per-fixup=50 fixups-per-sec=10 show-queue-size-rate +---- + Fixup queue size rate = 0.40 fixups/sec (avg) + 2.90 ┤ ╭─╮ + 2.56 ┤ ╭╮ ╭─╯ ╰╮ + 2.22 ┤ ││╭╮│ │╭╮ ╭╮ + 1.87 ┤ ╭─╯││╰╯ ││╰─╮ │╰╮ + 1.53 ┤ │ ││ ││ ╰─╯ │ + 1.18 ┤ ╭─╯ ╰╯ ╰╯ │ ╭╮ ╭ + 0.84 ┤ │ ╰────╮ ╭╮ ││ │ + 0.49 ┤ │ ╰──╮ ││ ││ │ + 0.15 ┼───╯ │ ╭╯│ ╭────────╯╰───────────────────────────────╯ + -0.20 ┤ │ ╭──╯ ╰─────╯ + -0.54 ┤ ╰──╯ + +# ---------- +# Fixup processing is 75% below sustainable rate. +# ---------- + +# Allowed ops/sec should converge to 50 * 5 = ~250. +plot ops-per-fixup=50 fixups-per-sec=5 +---- + Allowed ops per second = 290.59 ops/sec (avg), 259.23 ops/sec (final) + 529 ┤╭╮ + 500 ┼╯╰──╮ + 470 ┤ ╰─╮ + 440 ┤ ╰─╮ + 410 ┤ ╰─╮ + 380 ┤ ╰──╮ + 350 ┤ ╰──╮ + 320 ┤ ╰─╮ + 290 ┤ ╰──╮ + 260 ┤ ╰────────╮ ╭──────╮ ╭────────╮ ╭─────── + 231 ┤ ╰─────────╯ ╰────────────╯ ╰───────────╯ + +# Delay should converge to 1000 / (50 * 5) = ~4 milliseconds. +plot height=15 ops-per-fixup=50 fixups-per-sec=5 show-delay-millis +---- + Delay (ms) = 3.62 ms (avg), 3.86 ms (final) + 4.40 ┤ ╭╮ ╭─╮ ╭─╮ + 4.23 ┤ │╰────╮ ╭─╯ ╰╮ ╭──╯ ╰─╮ + 4.06 ┤ ╭╯ ╰──╮ ╭───╯ ╰───╮ ╭─╯ ╰──╮ + 3.89 ┤ ╭─╯ ╰───╮ ╭╯ ╰───╮╭───╯ ╰───╮ ╭─ + 3.73 ┤ ╭────╯ ╰─╯ ╰╯ ╰─╯ + 3.56 ┤ ╭──╯ + 3.39 ┤ ╭─╯ + 3.23 ┤ ╭╯ + 3.06 ┤ ╭╯ + 2.89 ┤ ╭──╯ + 2.72 ┤ ╭╯ + 2.56 ┤ ╭─╯ + 2.39 ┤ ╭─╯ + 2.22 ┤ ╭──╯ + 2.06 ┼╮ ╭──╯ + 1.89 ┤╰─╯ + +# Actual ops/sec should converge to ~250. +plot height=15 ops-per-fixup=50 fixups-per-sec=5 show-actual-ops-per-sec +---- + Actual ops per second = 277.22 ops/sec (avg), 263.00 ops/sec (final) + 486 ┤ ╭─╮ + 453 ┤ ╭╯ ╰─╮ + 421 ┤ │ ╰──╮ + 389 ┤ ╭╯ ╰─╮ + 356 ┤ │ ╰──╮ + 324 ┤ ╭╯ ╰──╮ + 292 ┤ ╭╯ ╰─────╮ + 259 ┤ │ ╰────────╮ ╭───────────────╮ ╭──────────────╮ ╭────── + 227 ┤ ╭╯ ╰─╯ ╰────╯ ╰─────╯ + 194 ┤ │ + 162 ┤ ╭╯ + 130 ┤ ╭╯ + 97 ┤ │ + 65 ┤╭╯ + 32 ┤│ + 0 ┼╯ + +# Fixup queue size rises above limit. Because it's not too far from the target +# queue size, the pacer doesn't push to bring it down further towards the +# target. +plot height=15 seconds=60 ops-per-fixup=50 fixups-per-sec=5 show-queue-size +---- + Fixup queue size = 7.88 fixups (avg), 9 fixups (final) + 9.00 ┤ ╭╮ ╭╮ ╭╮ ╭╮ ╭╮ ╭ + 8.40 ┤ ││ ││ ││ ││ ││ │ + 7.80 ┤ ╭───╯╰──────╯╰──────╯╰─────────╯╰─────────────────────────────────────────╯╰─────────╯ + 7.20 ┤ ╭╯ + 6.60 ┤ │ + 6.00 ┤ ╭╯ + 5.40 ┤ │ + 4.80 ┤ │ + 4.20 ┤ │ + 3.60 ┤ │ + 3.00 ┤╭╯ + 2.40 ┤│ + 1.80 ┤│ + 1.20 ┤│ + 0.60 ┤│ + 0.00 ┼╯ diff --git a/pkg/sql/vecindex/testdata/pacer/fixup-backlog.ddt b/pkg/sql/vecindex/testdata/pacer/fixup-backlog.ddt new file mode 100644 index 000000000000..c75b4c7b4ebc --- /dev/null +++ b/pkg/sql/vecindex/testdata/pacer/fixup-backlog.ddt @@ -0,0 +1,128 @@ +# ---------- +# Start pacer with backlog of 100 fixups. +# ---------- + +# Pacer takes several seconds to discover the optimal allowed ops/sec (~400) +# that will reduce the fixup queue by the target of ~2 fixups per second. While +# it's possible to reduce it faster by throttling ops/sec more aggressively, +# it's a better user experience to reduce the queue more gradually over time. +plot initial-fixups=100 ops-per-fixup=50 fixups-per-sec=10 +---- + Allowed ops per second = 419.39 ops/sec (avg), 407.68 ops/sec (final) + 500 ┼─╮ + 489 ┤ ╰────╮ + 478 ┤ ╰─╮ + 467 ┤ ╰─╮ + 456 ┤ ╰─╮ + 445 ┤ ╰╮ + 434 ┤ ╰─╮ + 423 ┤ ╰──╮ ╭──╮ ╭──╮╭────╮╭───╮╭───╮╭───╮╭───╮ ╭╮ ╭───╮ + 412 ┤ ╰──╮ ╭──────────╯ ╰─╯ ╰╯ ╰╯ ╰╯ ╰╯ ╰╯ ╰──╯╰──╯ ╰ + 401 ┤ ╰─────╮╭───╯ + 390 ┤ ╰╯ + +# Query delay should converge to 1000 / 400 = ~2.5 milliseconds during backlog +# reduction. +plot initial-fixups=100 ops-per-fixup=50 fixups-per-sec=10 show-delay-millis +---- + Delay (ms) = 2.39 ms (avg), 2.45 ms (final) + 2.59 ┤ ╭╮ + 2.53 ┤ ╭╯╰────╮ + 2.47 ┤ ╭──╯ ╰───╮ ╭╮ ╭╮ ╭╮ + 2.42 ┤ ╭─╯ ╰───╯╰──╯╰──────────╮ ╭───────╮ ╭──╮ ╭──╮ ╭─────╯╰───── + 2.36 ┤ ╭──╯ ╰──╯ ╰─╯ ╰─╯ ╰─╯ + 2.30 ┤ ╭╯ + 2.24 ┤ ╭─╯ + 2.18 ┤ ╭─╯ + 2.12 ┤ ╭─╯ + 2.06 ┤ ╭────╯ + 2.00 ┼──╯ + +# Fixup queue size should steadily decrease over the 10 second period. +plot initial-fixups=100 ops-per-fixup=50 fixups-per-sec=10 show-queue-size +---- + Fixup queue size = 91.66 fixups (avg), 83 fixups (final) + 99.00 ┼────────╮ + 97.40 ┤ ╰───────────────╮ + 95.80 ┤ │ + 94.20 ┤ ╰────────╮ + 92.60 ┤ ╰───────────╮ + 91.00 ┤ ╰──╮ + 89.40 ┤ ╰──────────╮ + 87.80 ┤ ╰────╮ + 86.20 ┤ ╰─────────╮ + 84.60 ┤ ╰────────────╮ + 83.00 ┤ ╰─ + +# Average queue size reduction should be ~2 fixups per second. +plot initial-fixups=100 ops-per-fixup=50 fixups-per-sec=10 show-queue-size-rate +---- + Fixup queue size rate = -1.55 fixups/sec (avg) + -0.37 ┤ ╭──╮ ╭╮ + -0.59 ┤ ╭──╯ │ ╭╮ ╭╯│ + -0.82 ┼──╯ │ ╭─╯│ ╭╯ │ ╭╮ ╭╮ ╭╮ + -1.04 ┤ │╭───╯ │ ╭╯ │ ││ ╭╮ ╭╯│ ╭╯│ + -1.26 ┤ ╰╯ │ ╭─╯ │ ╭╯│ ╭╮ ╭╮ ╭╯│ ╭╮ ╭╯ │ ╭╯ │ + -1.49 ┤ ╰─╯ │ ╭╯ │ ╭╯│ ╭╯│ ╭╯ │╭╯│ ╭╮ ╭╯ │╭╮ ╭╮ ╭╯ │ + -1.71 ┤ │ ╭─╯ ╰╮ ╭╯ │ ╭╯ │ ╭╮ ╭╯ ╰╯ │ ╭╯│ ╭╯ ╰╯│ ╭─╯│ ╭╯ │╭ + -1.93 ┤ │ ╭╯ │ ╭╯ ╰─╯ │╭╯│ ╭─╯ │ ╭╯ │ ╭╯ │ ╭╯ │╭╯ ╰╯ + -2.16 ┤ │╭╯ │ ╭╯ ╰╯ │╭╯ ╰─╯ ╰─╯ │╭╯ ╰╯ + -2.38 ┤ ╰╯ │╭╯ ╰╯ ╰╯ + -2.60 ┤ ╰╯ + +# Plot again, this time over 60 seconds to show point where fixup queue has been +# reduced below the allowed level. Ensure that allowed ops/sec rises back up to +# ~500 ops/sec that maintains a constant queue size. +plot seconds=60 initial-fixups=100 ops-per-fixup=50 fixups-per-sec=10 +---- + Allowed ops per second = 426.90 ops/sec (avg), 477.04 ops/sec (final) + 500 ┼╮ + 489 ┤│ ╭─╮ + 478 ┤╰╮ ╭─╮ │ ╰ + 467 ┤ │ ╭╮╭╮│ ╰──╮│ + 456 ┤ │ ╭─╮╭─╯││╰╯ ╰╯ + 445 ┤ ╰╮ ╭╯ ╰╯ ╰╯ + 434 ┤ │ ╭──╯ + 424 ┤ │ ╭─╮ ╭╮ ╭╮ ╭──╯ + 413 ┤ ╰╮ ╭───────────────╮╭────────╮ ╭──╯ ╰────╮ ╭─╯╰─╮╭╯╰───╮╭╯ + 402 ┤ │╭───╯ ╰╯ ╰─╯ ╰───╯ ╰╯ ╰╯ + 391 ┤ ╰╯ + +# Over the same period, delay should decrease to 1000 / 500 = ~2 milliseconds +# from ~2.5 ms. +plot seconds=60 initial-fixups=100 ops-per-fixup=50 fixups-per-sec=10 show-delay-millis +---- + Delay (ms) = 2.35 ms (avg), 2.10 ms (final) + 2.59 ┤ ╭╮ + 2.53 ┤ ││ + 2.47 ┤ │╰╮╭╮ ╭╮ ╭╮ + 2.42 ┤ ╭╯ ╰╯╰╮╭╮╭────────╮╭────╮╭───────╯╰╮ ╭──╮╭─────╮ ╭╮ ╭───╯│ + 2.36 ┤ │ ╰╯╰╯ ╰╯ ╰╯ ╰───╯ ╰╯ ╰───╯╰─╯ ╰───╮ + 2.30 ┤ │ ╰─╮ + 2.24 ┤ ╭╯ ╰─╮ + 2.18 ┤ │ ╰──╮╭╮╭╮╭╮ ╭╮ + 2.12 ┤ │ ╰╯╰╯╰╯│ ╭──╯│ + 2.06 ┤╭╯ ╰─╯ │ ╭ + 2.00 ┼╯ ╰─╯ + +# Show the fixup queue over the 60 second interval. It should steadily drop down +# closer to the target. +plot seconds=60 height=15 initial-fixups=100 ops-per-fixup=50 fixups-per-sec=10 show-queue-size +---- + Fixup queue size = 49.91 fixups (avg), 12 fixups (final) + 99.00 ┼────╮ + 93.13 ┤ ╰───╮ + 87.27 ┤ ╰───╮ + 81.40 ┤ ╰────╮ + 75.53 ┤ ╰────╮ + 69.67 ┤ ╰─────╮ + 63.80 ┤ ╰────╮ + 57.93 ┤ ╰───╮ + 52.07 ┤ ╰─────╮ + 46.20 ┤ ╰───╮ + 40.33 ┤ ╰────╮ + 34.47 ┤ ╰────╮ + 28.60 ┤ ╰─────╮ + 22.73 ┤ ╰────╮ + 16.87 ┤ ╰──────────╮ + 11.00 ┤ ╰───────── diff --git a/pkg/sql/vecindex/testdata/pacer/fixups-per-sec.ddt b/pkg/sql/vecindex/testdata/pacer/fixups-per-sec.ddt new file mode 100644 index 000000000000..272439197910 --- /dev/null +++ b/pkg/sql/vecindex/testdata/pacer/fixups-per-sec.ddt @@ -0,0 +1,88 @@ +# ---------- +# Simulate edge case where fixups have slowed to only 1 per second. +# ---------- + +# Query rate starts high, which results in a rapidly increasing fixup queue. The +# pacer should react by increasing the delay until it has the fixup queue under +# control and it begins steadily decreasing. +plot seconds=60 ops-per-fixup=50 fixups-per-sec=1 +---- + Allowed ops per second = 55.89 ops/sec (avg), 42.93 ops/sec (final) + 500 ┼╮ + 451 ┤╰╮ + 403 ┤ │ + 354 ┤ ╰╮ + 306 ┤ │ + 257 ┤ │ + 209 ┤ ╰─╮ + 160 ┤ ╰╮ + 112 ┤ ╰──╮ + 63 ┤ ╰────────╮ ╭────────────────────────────────────────────────── + 15 ┤ ╰────────────────────╯ + +# Delay rises rapidly until fixup queue size starts decreasing and pacer can +# begin reducing delay. It should stabilize at 1000 / (50 * 1) = ~20 ms. +plot seconds=60 ops-per-fixup=50 fixups-per-sec=1 show-delay-millis +---- + Delay (ms) = 28.32 ms (avg), 23.29 ms (final) + 67.25 ┤ ╭──╮ + 60.73 ┤ ╭────╯ ╰──╮ + 54.20 ┤ │ ╰╮╭─╮ + 47.68 ┤ ╭──╯ ╰╯ ╰─╮ + 41.15 ┤ ╭╯ ╰──╮ + 34.63 ┤ ╭─╯ ╰╮╭╮ + 28.10 ┤ ╭──╯ ╰╯╰─╮ ╭─╮ ╭───╮ ╭─╮ ╭ + 21.58 ┤ ╭──╯ ╰──╮ ╭───╯ ╰──╮╭───╯ ╰───╮╭─────╯ ╰──────╯ + 15.05 ┤ ╭──╯ ╰─╯ ╰╯ ╰╯ + 8.53 ┤ ╭──╯ + 2.00 ┼──╯ + +# Fixup queue size grows rapidly and then is slowly reduced at 2 fixups / second +# until it stabilizes at something closer to 5 fixups. +plot seconds=60 ops-per-fixup=50 fixups-per-sec=1 show-queue-size +---- + Fixup queue size = 10.19 fixups (avg), 8 fixups (final) + 19.00 ┤ ╭╮ ╭╮ + 17.10 ┤ ╭───╯╰─╯╰─────╮ + 15.20 ┤ ╭─╯ ╰───╮╭╮ + 13.30 ┤ ╭╯ ╰╯╰──╮ + 11.40 ┤ │ ╰───╮╭╮ + 9.50 ┤ ╭╯ ╰╯╰───╮ + 7.60 ┤ │ ╰──────╮╭─╮╭───────╮╭──────────╮╭────────────────── + 5.70 ┤╭╯ ╰╯ ╰╯ ╰╯ ╰╯ + 3.80 ┤│ + 1.90 ┤│ + 0.00 ┼╯ + +# Fixup queue size increases at high rate and then goes negative once throttling +# kicks in. +plot seconds=60 ops-per-fixup=50 fixups-per-sec=1 show-queue-size-rate +---- + Fixup queue size rate = 0.10 fixups/sec (avg) + 4.79 ┤╭─╮ + 4.21 ┤│ │╭╮ + 3.63 ┤│ ╰╯│ + 3.05 ┤│ ╰╮ + 2.47 ┤│ │ + 1.89 ┤│ │╭╮ + 1.32 ┤│ ╰╯│ ╭╮ ╭╮ ╭╮ ╭╮ ╭╮ ╭╮ ╭╮ ╭ + 0.74 ┤│ ╰╮││ ╭╮ ╭╮ ╭╮ ╭╮ ││ ││ ││ ││ ││ ││ │ + 0.16 ┼╯ ╰╯│ ││╭─╮ ╭─╮ │╰╮ ╭─╮ ││ ╭╮ ╭╮││╭╮╭─╮╭───────╮╭──╯│ ││╭──╮╭──╯╰─╯│ │╰────╯╰─╯ + -0.42 ┤ ╰─╯╰╯ │╭─╮│ │ │ │ │ │ │╰╮ ││ │││╰╯││ ││ ││ ╰─╯╰╯ ││ ╰─╯ + -1.00 ┤ ╰╯ ╰╯ ╰─╯ ╰──╯ ╰─╯ ╰─╯╰─╯╰╯ ╰╯ ╰╯ ╰╯ ╰╯ + +# Show actual ops/sec, which should average ~50. +plot seconds=60 ops-per-fixup=50 fixups-per-sec=1 show-actual-ops-per-sec +---- + Actual ops per second = 55.51 ops/sec (avg), 48.00 ops/sec (final) + 406 ┤ ╭╮ + 365 ┤ ││ + 325 ┤╭╯│ + 284 ┤│ ╰╮ + 244 ┤│ │ + 203 ┤│ ╰╮ + 162 ┤│ ╰╮ + 122 ┤│ ╰╮ + 81 ┤│ ╰──╮ ╭╮ ╭╮ + 41 ┤│ ╰───────────╮ ╭───────────────────────╯╰───────────╯╰────────────── + 0 ┼╯ ╰──────────────╯ diff --git a/pkg/sql/vecindex/testdata/pacer/initial-ops-per-sec.ddt b/pkg/sql/vecindex/testdata/pacer/initial-ops-per-sec.ddt new file mode 100644 index 000000000000..b8e585a58e59 --- /dev/null +++ b/pkg/sql/vecindex/testdata/pacer/initial-ops-per-sec.ddt @@ -0,0 +1,177 @@ +# ---------- +# Start pacer at an allowed rate of 1 ops/sec and ramp to 500 ops/sec. +# ---------- + +# Pacer should be able to quickly ramp up from minimum allowed ops/sec in < 10 +# seconds. +plot initial-ops-per-sec=1 ops-per-fixup=50 fixups-per-sec=10 +---- + Allowed ops per second = 229.05 ops/sec (avg), 491.71 ops/sec (final) + 629 ┤ ╭──╮ + 567 ┤ ╭──────╯ ╰──────╮ + 504 ┤ ╭╯ ╰─────── + 441 ┤ ╭─╯ + 378 ┤ ╭─╯ + 315 ┤ ╭──╯ + 252 ┤ ╭─╯ + 190 ┤ ╭────╯ + 127 ┤ ╭──────╯ + 64 ┤ ╭──────────────╯ + 1 ┼───────────────────────────╯ + +# Queue size should stabilize at ~5 fixups. +plot initial-ops-per-sec=1 ops-per-fixup=50 fixups-per-sec=10 show-queue-size +---- + Fixup queue size = 0.86 fixups (avg), 5 fixups (final) + 5.00 ┤ ╭ + 4.50 ┤ │ + 4.00 ┤ ╭─╮ ╭─────╯ + 3.50 ┤ │ │ │ + 3.00 ┤ ╭───╯ ╰─╯ + 2.50 ┤ │ + 2.00 ┤ ╭────╯ + 1.50 ┤ │ + 1.00 ┤ ╭╮ ╭╮ ╭╮ ╭╮ ╭╮ ╭──────╯ + 0.50 ┤ ││ ││ ││ ││ ││ ╭╮ │ + 0.00 ┼───────────────────────────╯╰─────────────╯╰──╯╰─╯╰───╯╰──╯╰──╯ + +# Show the query delay during ramp. It starts at 1 second, but should rapidly +# drop to ~2 ms. +plot initial-ops-per-sec=1 ops-per-fixup=50 fixups-per-sec=10 show-delay-millis +---- + Delay (ms) = 119.09 ms (avg), 2.03 ms (final) + 1000 ┼────────╮ + 900 ┤ │ + 800 ┤ │ + 700 ┤ │ + 601 ┤ │ + 501 ┤ │ + 401 ┤ │ + 301 ┤ │ + 201 ┤ │ + 101 ┤ ╰─────────╮ + 2 ┤ ╰────────────────────────────────────────────────────────────────────── + +# ---------- +# Start pacer at an allowed rate of 1000 ops/sec and drop to 500 ops/sec. +# ---------- + +# Pacer should be able to throttle ops/sec in < 10 seconds, without too much +# over-correction. +plot initial-ops-per-sec=1000 ops-per-fixup=50 fixups-per-sec=10 +---- + Allowed ops per second = 539.52 ops/sec (avg), 507.35 ops/sec (final) + 1019 ┼──╮ + 960 ┤ ╰─╮ + 901 ┤ ╰─╮ + 842 ┤ ╰╮ + 783 ┤ ╰╮ + 724 ┤ ╰─╮ + 665 ┤ ╰─╮ + 605 ┤ ╰─╮ + 546 ┤ ╰──╮ ╭────────────╮ ╭─────────╮ ╭ + 487 ┤ ╰──────╮ ╭─────╯ ╰──────────────╯ ╰─────────╯ + 428 ┤ ╰─────────╯ + +# Fixup queue size will rise above allowed level, and ops/sec will be further +# throttled until queue size drops below threshold. +plot initial-ops-per-sec=1000 ops-per-fixup=50 fixups-per-sec=10 show-queue-size +---- + Fixup queue size = 7.99 fixups (avg), 8 fixups (final) + 10.00 ┤ ╭───╮╭──────╮ + 9.00 ┤ ╭─╯ ╰╯ ╰───────╮ ╭╮ ╭──╮ ╭╮ + 8.00 ┤ ╭──╯ ╰───────╮ ╭────╯╰────╯ ╰───────╮ ╭─────╮╭─────╯╰──────── + 7.00 ┤ ╭╯ ╰─╯ ╰─╯ ╰╯ + 6.00 ┤ ╭╯ + 5.00 ┤ ╭╯ + 4.00 ┤ ╭╯ + 3.00 ┤ ╭╯ + 2.00 ┤ ╭╯ + 1.00 ┤╭╯ + 0.00 ┼╯ + +# Delay should converge to the ~2 ms range. +plot initial-ops-per-sec=1000 ops-per-fixup=50 fixups-per-sec=10 show-delay-millis +---- + Delay (ms) = 1.90 ms (avg), 1.97 ms (final) + 2.34 ┤ ╭───────────╮ + 2.10 ┤ ╭─────╯ ╰────╮ ╭──────────────╮ ╭─────────╮ + 1.87 ┤ ╭───╯ ╰────────────╯ ╰─────────╯ ╰ + 1.64 ┤ ╭──╯ + 1.40 ┤ ╭──╯ + 1.17 ┤ ╭───╯ + 0.93 ┼╮ │ + 0.70 ┤│ │ + 0.47 ┤│ ╭╯ + 0.23 ┤│ │ + 0.00 ┤╰─╯ + +# ---------- +# Start pacer at an allowed rate of 2000 ops/sec that is significantly higher +# than the operation arrival rate. +# ---------- + +# Since the allowed rate of 2000 ops/sec is higher than actual ops/sec, this +# rate should not materially change over the course of the run. +plot initial-ops-per-sec=2000 ops-per-fixup=50 fixups-per-sec=40 +---- + Allowed ops per second = 2058.51 ops/sec (avg), 2058.80 ops/sec (final) + 2059 ┤╭──────────────────────────────────────────────────────────────────────────────────────── + 2053 ┤│ + 2047 ┤│ + 2041 ┤│ + 2035 ┤│ + 2029 ┤│ + 2024 ┤│ + 2018 ┤│ + 2012 ┤│ + 2006 ┤│ + 2000 ┼╯ + +# Actual ops/sec should quickly rise to the max rate of 1000. +plot initial-ops-per-sec=2000 ops-per-fixup=50 fixups-per-sec=40 show-actual-ops-per-sec +---- + Actual ops per second = 949.95 ops/sec (avg), 1000.00 ops/sec (final) + 1000 ┤ ╭──────────────────────────────────────────────────────────────────────────────── + 900 ┤ ╭╯ + 800 ┤ ╭╯ + 700 ┤ ╭╯ + 600 ┤ ╭╯ + 500 ┤ │ + 400 ┤ ╭╯ + 300 ┤ ╭╯ + 200 ┤ ╭╯ + 100 ┤╭╯ + 0 ┼╯ + +# Average fixup queue size should be very low. +plot initial-ops-per-sec=2000 ops-per-fixup=50 fixups-per-sec=40 show-queue-size +---- + Fixup queue size = 0.02 fixups (avg), 1 fixups (final) + 1.00 ┤ ╭ + 0.90 ┤ │ + 0.80 ┤ ╭╮ │ + 0.70 ┤ ││ │ + 0.60 ┤ ╭╮ ││ │ + 0.50 ┤ ││ ││ │ + 0.40 ┤ ││ ││ ╭╮ │ + 0.30 ┤ ││ ││ ││ │ + 0.20 ┤ ││ ││ ╭╮ ││ │ + 0.10 ┤ ││ ││ ││ ││ │ + 0.00 ┼───╯╰──╯╰──╯╰───────────────────────────────────────────────────────────────────────╯╰──╯ + +# Delay should quickly fall to zero. +plot initial-ops-per-sec=2000 ops-per-fixup=50 fixups-per-sec=40 show-delay-millis +---- + Delay (ms) = 0.00 ms (avg), 0.00 ms (final) + 0.50 ┼╮ + 0.45 ┤│ + 0.40 ┤│ + 0.35 ┤│ + 0.30 ┤│ + 0.25 ┤│ + 0.20 ┤│ + 0.15 ┤│ + 0.10 ┤│ + 0.05 ┤│ + 0.00 ┤╰──────────────────────────────────────────────────────────────────────────────────────── diff --git a/pkg/sql/vecindex/testdata/pacer/noise.ddt b/pkg/sql/vecindex/testdata/pacer/noise.ddt new file mode 100644 index 000000000000..4194f59ad6a2 --- /dev/null +++ b/pkg/sql/vecindex/testdata/pacer/noise.ddt @@ -0,0 +1,104 @@ +# ---------- +# Add 25% random noise to rate of operation arrival and fixup completion. +# ---------- + +# Expect allowed ops/sec to stay ~500 +- the noise. +plot ops-per-fixup=50 fixups-per-sec=10 noise=0.2 +---- + Allowed ops per second = 527.80 ops/sec (avg), 515.00 ops/sec (final) + 722 ┤ ╭──╮ + 694 ┤ │ ╰─╮ + 665 ┤ │ ╰╮ + 637 ┤ ╭╯ ╰─╮ + 608 ┤ │ ╰─╮ + 580 ┤ ╭╯ ╰─╮ ╭─╮ ╭─╮ + 551 ┤╭╯ ╰╮ ╭──╯ ╰──╯ ╰───╮ ╭──────╮ + 523 ┼╯ ╰─╮ ╭╯ ╰────────╮╭╮ ╭─╮ ╭───────╯ ╰─ + 494 ┤ ╰───╮ ╭─╯ ╰╯╰──╯ ╰──╯ + 466 ┤ ╰────╮╭───────╯ + 437 ┤ ╰╯ + +# Show actual ops/sec. Note that the scale starts at 0, so it appears smoother. +plot ops-per-fixup=50 fixups-per-sec=10 noise=0.2 show-actual-ops-per-sec +---- + Actual ops per second = 501.16 ops/sec (avg), 534.00 ops/sec (final) + 672 ┤ ╭─────╮ + 605 ┤ ╭╯ ╰───╮ + 538 ┤ │ ╰────╮ ╭─────────────────────╮ ╭─────────── + 470 ┤ ╭╯ ╰──────────────────╯ ╰────────────╯ + 403 ┤ ╭╯ + 336 ┤ ╭╯ + 269 ┤ ╭╯ + 202 ┤ ╭╯ + 134 ┤ ╭╯ + 67 ┤╭╯ + 0 ┼╯ + +# Fixup queue size will fluctuate a bit more with noise. +plot ops-per-fixup=50 fixups-per-sec=10 noise=0.2 show-queue-size +---- + Fixup queue size = 4.80 fixups (avg), 6 fixups (final) + 7.00 ┤ ╭╮ + 6.30 ┤ ╭────╮ ╭╮ ╭───────╮╭────╮ ╭──╮ ╭───╯╰── + 5.60 ┤ │ │ ││╭╮ │ ╰╯ │ │ │ │ + 4.90 ┤ ╭───╯ ╰──────╮ ╭──╮╭────╯╰╯╰──╯ ╰──╯ ╰──╯ + 4.20 ┤ ╭╮╭───╯ ╰───────╮╭───╯ ╰╯ + 3.50 ┤ │││ ││ + 2.80 ┤ ╭─╯╰╯ ╰╯ + 2.10 ┤ ╭╯ + 1.40 ┤ │ + 0.70 ┤╭────╯ + 0.00 ┼╯ + +# Delay will fluctuate a bit more with noise. +plot ops-per-fixup=50 fixups-per-sec=10 noise=0.2 show-delay-millis +---- + Delay (ms) = 1.95 ms (avg), 1.63 ms (final) + 2.53 ┤ ╭╮ ╭──╮ + 2.41 ┤ ╭╮ ╭╮││ │ ╰╮ ╭╮ + 2.29 ┤ ╭╯│ ││││╭╯ │╭╮ ╭╮ ╭╮ ╭╮╭╮ ││ + 2.17 ┤╭╮ ╭─╯ │╭╮ │││╰╯ │││ ││ ╭╮╭╮ ││ │╰╯│ ╭─╯│╭╮ ╭╮ + 2.05 ┤││ ╭╮ │ ││╰╮ │││ ╰╯│ ││╭─╮ ╭╮ ││││╭╯╰╮ ╭╯ ╰─╮ │ ╰╯╰╮ ││ + 1.93 ┼╯╰─╮ ││╭╯ ││ ╰─╯╰╯ ╰╮│││ ╰╮ ││ ╭╮ ╭╯││││ │╭╮│ │ │ │ ╭╯│ + 1.80 ┤ │ │╰╯ ││ ││││ │ ││╭╯│ ╭╯ ││╰╯ ╰╯╰╯ │ ╭╯ │ ╭╮│ │╭╮ + 1.68 ┤ │ ╭╮ │ ╰╯ ╰╯││ │ │╰╯ ╰─╮╭╯ ╰╯ ╰─╯ │╭╯││ ╰╯│ + 1.56 ┤ │ ╭─╮ ╭╯╰─╯ ╰╯ ╰╮│ ╰╯ ╰╯ ╰╯ ╰ + 1.44 ┤ ╰╮│ ╰╮│ ╰╯ + 1.32 ┤ ╰╯ ╰╯ + +# ---------- +# Add 60% random noise to rate of operation arrival and fixup completion. +# ---------- + +# Halve the fixups per second and show 20 seconds of data rather than 10. +# Allowed ops/sec should hover ~250. +plot seconds=20 ops-per-fixup=50 fixups-per-sec=5 noise=0.6 +---- + Allowed ops per second = 261.85 ops/sec (avg), 222.33 ops/sec (final) + 520 ┼──╮ + 488 ┤ ╰╮ + 456 ┤ ╰╮ + 424 ┤ ╰╮ + 392 ┤ ╰╮ + 360 ┤ │ + 329 ┤ ╰╮ + 297 ┤ ╰─╮ ╭────╮ ╭╮ ╭─────╮ + 265 ┤ ╰───╮ ╭───╯ ╰────────╯╰───────╮ ╭───╯ ╰──╮ + 233 ┤ ╰────────╯ ╰───────────────╯ ╰────╮ ╭── + 201 ┤ ╰──╯ + +# Show actual ops/sec. +plot seconds=20 ops-per-fixup=50 fixups-per-sec=5 noise=0.6 show-actual-ops-per-sec +---- + Actual ops per second = 256.47 ops/sec (avg), 210.00 ops/sec (final) + 472 ┤ ╭╮ + 425 ┤ ╭╯╰─╮ + 378 ┤ │ ╰╮ + 331 ┤ ╭╯ ╰─╮ + 283 ┤ │ ╰─╮ ╭──────────╮ ╭───╮ ╭───────╮ + 236 ┤ ╭╯ ╰─────────────╯ ╰───╯ ╰────────────────────────╯ ╰──────╮ + 189 ┤ │ ╰─── + 142 ┤ │ + 94 ┤╭╯ + 47 ┤│ + 0 ┼╯ diff --git a/pkg/sql/vecindex/vector_index_test.go b/pkg/sql/vecindex/vector_index_test.go index 1c92cc207915..f26421a8e080 100644 --- a/pkg/sql/vecindex/vector_index_test.go +++ b/pkg/sql/vecindex/vector_index_test.go @@ -10,6 +10,7 @@ import ( "cmp" "context" "fmt" + "regexp" "runtime" "sort" "strconv" @@ -31,7 +32,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestDataDriven(t *testing.T) { +func TestVectorIndex(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -40,6 +41,10 @@ func TestDataDriven(t *testing.T) { defer state.Stopper.Stop(ctx) datadriven.Walk(t, "testdata", func(t *testing.T, path string) { + if regexp.MustCompile("/.+/").MatchString(path) { + // Skip files that are in subdirs. + return + } if !strings.HasSuffix(path, ".ddt") { // Skip files that are not data-driven tests. return