diff --git a/pkg/sql/catalog/lease/count.go b/pkg/sql/catalog/lease/count.go index 11e44a326f68..11c6bad3bd1b 100644 --- a/pkg/sql/catalog/lease/count.go +++ b/pkg/sql/catalog/lease/count.go @@ -59,6 +59,30 @@ func CountLeases( return detail.count, nil } +// isRegionColumnError detects if a InvalidParameterValue or +// UndefinedFunction are observed because of the region column. +// This can happen because of the following reasons: +// 1. The currently leased system database is not multi-region, but the leased +// system.lease table is multi-region. +// 2. The currently leased system database is multi-region, but the system.lease +// descriptor we have is not multi-region. +// +// Both cases are transient and are a side effect of using a cache system +// database descriptor for checks. +func isTransientRegionColumnError(err error) bool { + // No error detected nothing else needs to be checked. + if err == nil { + return false + } + // Some unrelated error was observed, so this is not linked to the + // region column transitioning from bytes to crdb_region. + if pgerror.GetPGCode(err) != pgcode.UndefinedFunction && + pgerror.GetPGCode(err) != pgcode.InvalidParameterValue { + return false + } + return strings.Contains(err.Error(), "crdb_internal_region") +} + func countLeasesWithDetail( ctx context.Context, db isql.DB, @@ -84,13 +108,10 @@ func countLeasesWithDetail( leasingDescIsSessionBased := systemDBVersion != nil leasingMode := readSessionBasedLeasingMode(ctx, settings) whereClauses := make([][]string, 2) - containsSystemDatabase := false forceMultiRegionQuery := false + useBytesOnRetry := false for _, t := range versions { versionClause := "" - if t.ID == keys.SystemDatabaseID { - containsSystemDatabase = true - } if !forAnyVersion { versionClause = fmt.Sprintf("AND version = %d", t.Version) } @@ -156,20 +177,17 @@ func countLeasesWithDetail( // If we are injecting a raw leases descriptors, that will not have the enum // type set, so convert the region to byte equivalent physical representation. detail, err = countLeasesByRegion(ctx, txn, prober, regionMap, cachedDatabaseRegions, - len(descsToInject) > 0, at, whereClause, usesOldSchema[i]) + len(descsToInject) > 0 || useBytesOnRetry, at, whereClause, usesOldSchema[i]) } else { detail, err = countLeasesNonMultiRegion(ctx, txn, at, whereClause, usesOldSchema[i]) - // It's possible that our cached database descriptor is stale relative to the - // visible lease table descriptor. Because the lease manager uses rangefeeds to - // keep track of new descriptor versions, when converting to a multi-region - // system database its possible for the lease table descriptor update to precede - // the system database descriptor update. - if err != nil && - pgerror.GetPGCode(err) == pgcode.UndefinedFunction && - containsSystemDatabase { - forceMultiRegionQuery = true - return txn.KV().GenerateForcedRetryableErr(ctx, "forcing retry once with MR columns") - } + } + // If any transient region column errors occur then we should retry the count query. + if isTransientRegionColumnError(err) { + forceMultiRegionQuery = true + // If the query was already multi-region aware, then the system database is MR, + // but our lease descriptor has not been upgraded yet. + useBytesOnRetry = cachedDatabaseRegions != nil && cachedDatabaseRegions.IsMultiRegion() + return txn.KV().GenerateForcedRetryableErr(ctx, "forcing retry once with MR columns") } return err })