diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java index ee584bfe2d853b..4a00e0bf15a02a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java @@ -25,6 +25,7 @@ import org.apache.doris.common.Status; import org.apache.doris.common.ThreadPoolManager; import org.apache.doris.common.util.MasterDaemon; +import org.apache.doris.metric.MetricRepo; import org.apache.doris.system.Backend; import org.apache.doris.thrift.BackendService; import org.apache.doris.thrift.TNetworkAddress; @@ -244,7 +245,7 @@ protected void runAfterCatalogReady() { public void waitForTabletStatUpdate() { boolean ok = true; try { - if (!updateTabletStatsLatch.await(60, TimeUnit.SECONDS)) { + if (!updateTabletStatsLatch.await(600, TimeUnit.SECONDS)) { LOG.info("timeout waiting {} update tablet stats tasks finish after {} seconds.", updateTabletStatsLatch.getCount(), 60); ok = false; @@ -261,6 +262,9 @@ public void waitForTabletStatUpdate() { } LOG.warn("Failed to update tablet stats reason: {}, unfinished backends: {}", status.getErrorMsg(), unfinishedBackendIds); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_UPDATE_TABLET_STAT_FAILED.increase(1L); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java index 2f87515509a764..9705e6a05426a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java @@ -101,6 +101,8 @@ public final class MetricRepo { public static LongCounterMetric COUNTER_CACHE_HIT_SQL; public static LongCounterMetric COUNTER_CACHE_HIT_PARTITION; + public static LongCounterMetric COUNTER_UPDATE_TABLET_STAT_FAILED; + public static LongCounterMetric COUNTER_EDIT_LOG_WRITE; public static LongCounterMetric COUNTER_EDIT_LOG_READ; public static LongCounterMetric COUNTER_EDIT_LOG_CURRENT; @@ -495,6 +497,10 @@ public Long getValue() { "counter of failed transactions"); COUNTER_TXN_FAILED.addLabel(new MetricLabel("type", "failed")); DORIS_METRIC_REGISTER.addMetrics(COUNTER_TXN_FAILED); + COUNTER_UPDATE_TABLET_STAT_FAILED = new LongCounterMetric("update_tablet_stat_failed", MetricUnit.REQUESTS, + "counter of failed to update tablet stat"); + COUNTER_UPDATE_TABLET_STAT_FAILED.addLabel(new MetricLabel("type", "failed")); + DORIS_METRIC_REGISTER.addMetrics(COUNTER_UPDATE_TABLET_STAT_FAILED); HISTO_TXN_EXEC_LATENCY = METRIC_REGISTER.histogram( MetricRegistry.name("txn", "exec", "latency", "ms")); HISTO_TXN_PUBLISH_LATENCY = METRIC_REGISTER.histogram(