Skip to content

Commit

Permalink
Merge pull request #138671 from fqazi/backport24.1-138385
Browse files Browse the repository at this point in the history
release-24.1: catalog/lease: handle transition to multi-region system database
  • Loading branch information
fqazi authored Jan 8, 2025
2 parents 650d44c + 64bdc57 commit 802e1cf
Showing 1 changed file with 39 additions and 2 deletions.
41 changes: 39 additions & 2 deletions pkg/sql/catalog/lease/count.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema"
"github.com/cockroachdb/cockroach/pkg/sql/enum"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/regionliveness"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
Expand Down Expand Up @@ -58,6 +60,30 @@ func CountLeases(
return detail.count, nil
}

// isRegionColumnError detects if a InvalidParameterValue or
// UndefinedFunction are observed because of the region column.
// This can happen because of the following reasons:
// 1. The currently leased system database is not multi-region, but the leased
// system.lease table is multi-region.
// 2. The currently leased system database is multi-region, but the system.lease
// descriptor we have is not multi-region.
//
// Both cases are transient and are a side effect of using a cache system
// database descriptor for checks.
func isTransientRegionColumnError(err error) bool {
// No error detected nothing else needs to be checked.
if err == nil {
return false
}
// Some unrelated error was observed, so this is not linked to the
// region column transitioning from bytes to crdb_region.
if pgerror.GetPGCode(err) != pgcode.UndefinedFunction &&
pgerror.GetPGCode(err) != pgcode.InvalidParameterValue {
return false
}
return strings.Contains(err.Error(), "crdb_internal_region")
}

func countLeasesWithDetail(
ctx context.Context,
db isql.DB,
Expand All @@ -83,6 +109,8 @@ func countLeasesWithDetail(
clusterversion.RemoveDevOffset(clusterversion.V24_1_SessionBasedLeasingUpgradeDescriptor.Version()))
leasingMode := readSessionBasedLeasingMode(ctx, settings)
whereClauses := make([][]string, 2)
forceMultiRegionQuery := false
useBytesOnRetry := false
for _, t := range versions {
versionClause := ""
if !forAnyVersion {
Expand Down Expand Up @@ -145,14 +173,23 @@ func countLeasesWithDetail(
err := txn.WithSyntheticDescriptors(descsToInject,
func() error {
var err error
if cachedDatabaseRegions != nil && cachedDatabaseRegions.IsMultiRegion() {
if (cachedDatabaseRegions != nil && cachedDatabaseRegions.IsMultiRegion()) ||
forceMultiRegionQuery {
// If we are injecting a raw leases descriptors, that will not have the enum
// type set, so convert the region to byte equivalent physical representation.
detail, err = countLeasesByRegion(ctx, txn, prober, regionMap, cachedDatabaseRegions,
len(descsToInject) > 0, at, whereClause, usesOldSchema[i])
len(descsToInject) > 0 || useBytesOnRetry, at, whereClause, usesOldSchema[i])
} else {
detail, err = countLeasesNonMultiRegion(ctx, txn, at, whereClause, usesOldSchema[i])
}
// If any transient region column errors occur then we should retry the count query.
if isTransientRegionColumnError(err) {
forceMultiRegionQuery = true
// If the query was already multi-region aware, then the system database is MR,
// but our lease descriptor has not been upgraded yet.
useBytesOnRetry = cachedDatabaseRegions != nil && cachedDatabaseRegions.IsMultiRegion()
return txn.KV().GenerateForcedRetryableErr(ctx, "forcing retry once with MR columns")
}
return err
})
if err != nil {
Expand Down

0 comments on commit 802e1cf

Please sign in to comment.