From a82c9639e05d1c88a1031abe2a7c89a362a1e398 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Thu, 6 Feb 2025 17:30:11 +0000 Subject: [PATCH] logictest: raise TxnLivenessThreshold for multitenant configs Logic tests are flaky due to overload when running in multitenant mode. This patch increases the threshold for transaction heartbeat timeouts, which will make it less likely for foreground operations to be aborted by background jobs like the span config reconciler or the job registry loop to reclaim jobs from dead sessions. Release note: None --- pkg/sql/logictest/BUILD.bazel | 1 + pkg/sql/logictest/logic.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pkg/sql/logictest/BUILD.bazel b/pkg/sql/logictest/BUILD.bazel index a2c8f4a7510e..0f2360775efb 100644 --- a/pkg/sql/logictest/BUILD.bazel +++ b/pkg/sql/logictest/BUILD.bazel @@ -59,6 +59,7 @@ go_library( "//pkg/kv/kvclient/rangefeed", "//pkg/kv/kvserver", "//pkg/kv/kvserver/kvserverbase", + "//pkg/kv/kvserver/txnwait", "//pkg/multitenant/tenantcapabilities", "//pkg/security/username", "//pkg/server", diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index 0d8ffea26318..8dfec5ef67bd 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -42,6 +42,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangefeed" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait" "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities" "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/server" @@ -1709,6 +1710,21 @@ func (t *logicTest) newCluster( t.Fatal(err) } } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.closed_timestamp.target_duration", + ); err != nil { + t.Fatal(err) + } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.closed_timestamp.side_transport_interval", + ); err != nil { + t.Fatal(err) + } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.rangefeed.closed_timestamp_refresh_interval", + ); err != nil { + t.Fatal(err) + } } capabilities := toa.capabilities @@ -4353,6 +4369,18 @@ func RunLogicTest( if *printErrorSummary { defer lt.printErrorSummary() } + if config.UseSecondaryTenant == logictestbase.Always { + // Under multitenant configs running in EngFlow, we have seen that logic + // tests can be flaky due to an overload condition where schema change + // transactions do not heartbeat quickly enough. This allows background jobs + // such as the spanconfig reconciler or the job registry "remove claims from + // dead sessions" loop. + // See https://github.com/cockroachdb/cockroach/pull/140400#issuecomment-2634346278 + // and https://github.com/cockroachdb/cockroach/issues/140494#issuecomment-2640208187 + // for a detailed analysis of this issue. + cleanup := txnwait.TestingOverrideTxnLivenessThreshold(30 * time.Second) + defer cleanup() + } // Each test needs a copy because of Parallel serverArgsCopy := serverArgs serverArgsCopy.ForceProductionValues = serverArgs.ForceProductionValues || nonMetamorphicBatchSizes