From dc9cd3dbdace23ac1f590aaee7966705a7e12825 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:12:39 +0800 Subject: [PATCH 01/86] In-memory engine: rearrange parts of metrics panel (#17733) ref tikv/tikv#16141 rearrange parts of metrics panel Signed-off-by: SpadeA-Tang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.dashboard.py | 8 +++ metrics/grafana/tikv_details.json | 60 +++++++++++------------ metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 402e7ab58b1..2794c863b9c 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -4503,6 +4503,10 @@ def InMemoryEngine() -> RowPanel: ), ], ), + ] + ) + layout.row( + [ graph_panel( title="GC Filter", description="Rang cache engine garbage collection information", @@ -4746,6 +4750,10 @@ def InMemoryEngine() -> RowPanel: ), ], ), + ] + ) + layout.row( + [ graph_panel( title="Auto GC SafePoint Gap", description="The gap between newest auto gc safe point and oldest auto gc safe point of regions cached in the in-memroy engine", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 584d399814e..2e48886a520 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -38226,7 +38226,7 @@ }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, "y": 14 }, @@ -38359,8 +38359,8 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 6, + "w": 12, + "x": 12, "y": 14 }, "height": null, @@ -38492,9 +38492,9 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 14 + "w": 12, + "x": 0, + "y": 21 }, "height": null, "hideTimeOverride": false, @@ -38629,9 +38629,9 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 14 + "w": 12, + "x": 12, + "y": 21 }, "heatmap": {}, "height": null, @@ -38736,7 +38736,7 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 28 }, "heatmap": {}, "height": null, @@ -38837,7 +38837,7 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 28 }, "height": null, "hideTimeOverride": false, @@ -38974,7 +38974,7 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 35 }, "heatmap": {}, "height": null, @@ -39075,7 +39075,7 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -39212,7 +39212,7 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 42 }, "heatmap": {}, "height": null, @@ -39313,7 +39313,7 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 42 }, "height": null, "hideTimeOverride": false, @@ -39518,7 +39518,7 @@ "h": 7, "w": 12, "x": 0, - "y": 42 + "y": 49 }, "heatmap": {}, "height": null, @@ -39619,7 +39619,7 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 49 }, "height": null, "hideTimeOverride": false, @@ -39820,7 +39820,7 @@ "h": 7, "w": 12, "x": 0, - "y": 49 + "y": 56 }, "height": null, "hideTimeOverride": false, @@ -40028,7 +40028,7 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 56 }, "height": null, "hideTimeOverride": false, @@ -40204,9 +40204,9 @@ }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 56 + "y": 63 }, "height": null, "hideTimeOverride": false, @@ -40337,9 +40337,9 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 56 + "w": 12, + "x": 12, + "y": 63 }, "height": null, "hideTimeOverride": false, @@ -40470,9 +40470,9 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 56 + "w": 12, + "x": 0, + "y": 70 }, "height": null, "hideTimeOverride": false, @@ -40603,9 +40603,9 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 56 + "w": 12, + "x": 12, + "y": 70 }, "height": null, "hideTimeOverride": false, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 3817d376747..ecc7487be82 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -a0f1b4d3924faf1a0eaef40179f4b05653e449b94d2f9659cd2ca61990530d0c ./metrics/grafana/tikv_details.json +489eb016896d5f733f2db9787b85de0c51a1cf8373affebe85045b779a560cd0 ./metrics/grafana/tikv_details.json From 5ee4eecde457f386f7f9a8dc68d18030f0acd983 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 31 Oct 2024 16:56:09 +0800 Subject: [PATCH 02/86] In-memory Engine: evaluate SkiplistEngine memory usage (#17746) ref tikv/tikv#16141 Add test to simulate insertion of 200MB (logical size) of TiDB unqiue index and secondary index records and measure SkiplistEngine memory usage. Test results: * For secondary index * The key-value encoding amplification is approximately 3.10 * SkiplistEngine amplification is approximately 7.66 * For unique index * The key-value encoding amplification is approximately 3.38 * SkiplistEngine amplification is approximately 8.19 Signed-off-by: Neil Shen --- Cargo.lock | 1 + components/in_memory_engine/Cargo.toml | 1 + components/in_memory_engine/src/lib.rs | 2 + .../in_memory_engine/src/memory_usage_test.rs | 202 ++++++++++++++++++ components/in_memory_engine/src/prop_test.rs | 62 +++--- components/tikv_alloc/src/jemalloc.rs | 6 +- 6 files changed, 246 insertions(+), 28 deletions(-) create mode 100644 components/in_memory_engine/src/memory_usage_test.rs diff --git a/Cargo.lock b/Cargo.lock index 0fea51bf3f1..b109e0255b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2596,6 +2596,7 @@ dependencies = [ "test_pd", "test_util", "thiserror", + "tikv_alloc", "tikv_util", "tokio", "txn_types", diff --git a/components/in_memory_engine/Cargo.toml b/components/in_memory_engine/Cargo.toml index a29ad0d6daf..966281d04a7 100644 --- a/components/in_memory_engine/Cargo.toml +++ b/components/in_memory_engine/Cargo.toml @@ -61,3 +61,4 @@ tempfile = "3.0" test_pd = { workspace = true } test_util = { workspace = true } proptest = "1.0.0" +tikv_alloc = { workspace = true, features = ["jemalloc"] } diff --git a/components/in_memory_engine/src/lib.rs b/components/in_memory_engine/src/lib.rs index 6de1a74d036..351907ce24a 100644 --- a/components/in_memory_engine/src/lib.rs +++ b/components/in_memory_engine/src/lib.rs @@ -19,6 +19,8 @@ mod cross_check; mod engine; mod keys; mod memory_controller; +#[cfg(test)] +mod memory_usage_test; mod metrics; mod perf_context; #[cfg(test)] diff --git a/components/in_memory_engine/src/memory_usage_test.rs b/components/in_memory_engine/src/memory_usage_test.rs new file mode 100644 index 00000000000..1258c04fa2e --- /dev/null +++ b/components/in_memory_engine/src/memory_usage_test.rs @@ -0,0 +1,202 @@ +// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. + +/// These tests are heavy so we shouldn't run them daily. +/// Run them with the following command (recommending release mode) and see the +/// printed stats: +/// +/// ```text +/// RUST_TEST_THREADS=1 +/// cargo test --package in_memory_engine --lib --release \ +/// -- memory_usage_test::test_memory_usage --nocapture --ignored +/// ``` +use std::fmt::Write as _; + +use engine_traits::CF_WRITE; +use hex::FromHex; +use tikv_util::config::ReadableSize; +use txn_types::{Key, Write, WriteType}; + +use crate::prop_test::new_skiplist_engine_for_test; + +struct Case { + name: &'static str, + key: Vec, + value: Option>, + logical_size_pre_key: u64, + logical_size_in_total: u64, +} + +#[test] +#[ignore] +fn test_memory_usage_two_fields_secondary_index_two_fields_clustered_index() { + // Say there is a secondary index composed of 2 fields, a 5-bytes string, + // and an 8-bytes integer, and it maps to a clustered index with two + // 8-byes integers. + // + // ```text + // A raw key looks like: + // t{table_id}_i{index_id}\001{string}\003{integer} + // For example: + // t\200\000\000\000\000\000\000l_i\200\000\000\000\000\000\000\017\001Error\000\000\000\374\003\200\000\000\000\000\000\000\005\003\200\017\220\331\313 \210h\003\200\000\0001G\221\245\331 + // + // An encoded key looks like: + // BytesEncoder::encode_bytes(raw_key) + // For example: + // t\200\000\000\000\000\000\000\377l_i\200\000\000\000\000\377\000\000\017\001Erro\377r\000\000\000\374\003\200\000\377\000\000\000\000\000\005\003\200\377\017\220\331\313 \210h\003\377\200\000\0001G\221\245\331\377\000\000\000\000\000\000\000\000\367 + // + // A data key in the write CF is prefixed with 'z' and suffix a 8 bytes integer. + // z{encoded_key}{timestamp} + // For example: + // zt\200\000\000\000\000\000\000\377l_i\200\000\000\000\000\377\000\000\017\001Erro\377r\000\000\000\374\003\200\000\377\000\000\000\000\000\005\003\200\377\017\220\331\313 \210h\003\377\200\000\0001G\221\245\331\377\000\000\000\000\000\000\000\000\367\371\306\021\275x\177\377\377 + // ``` + // + // An encoded key (without timestamp) in hex format. + let key = Vec::from_hex( + "7480000000000000FF6C5F698000000000FF00000F014572726FFF72000000FC038000\ + FF0000000000050380FF0F90D9CB20886803FF800000314791A5D9FF00000000000000\ + 00F7", + ) + .unwrap(); + // For a secondary index record, it doesn't have a value. + let value = None; + + // Let's define the logical size of a secondary index record is 29 bytes + // which is the sum of the size of the two fields and the clustered index. + let logical_size_pre_key = 5 + 8 + 8 + 8; + let logical_size_in_total = ReadableSize::mb(200).0; + + let case = Case { + name: "two_fields_secondary_index_two_fields_clustered_index", + key, + value, + logical_size_pre_key, + logical_size_in_total, + }; + evaluate_memory_usage(case) +} + +#[test] +#[ignore] +fn test_memory_usage_two_fields_unique_index_two_fields_clustered_index() { + // Say there is a unique index composed of 2 fields, a 5-bytes string, + // and an 8-bytes integer, and it maps to a clustered index with two + // 8-byes integers. + // + // ```text + // A raw key looks like: + // t{table_id}_i{index_id}\001{string}\003{integer} + // For example: + // t\200\000\000\000\000\000\000l_i\200\000\000\000\000\000\000\017\001Error\000\000\000\374\003\200\000\000\000\000\000\000\005 + // + // An encoded key looks like: + // BytesEncoder::encode_bytes(raw_key) + // For example: + // t\200\000\000\000\000\000\000\377l_i\200\000\000\000\000\377\000\000\017\001Erro\377r\000\000\000\374\003\200\000\377\000\000\000\000\000\005\000\000\375 + // + // A data key in the write CF is prefixed with 'z' and suffix a 8 bytes integer. + // z{encoded_key}{timestamp} + // For example: + // zt\200\000\000\000\000\000\000\377l_i\200\000\000\000\000\377\000\000\017\001Erro\377r\000\000\000\374\003\200\000\377\000\000\000\000\000\005\000\000\375\371\306\021\275x\177\377\377 + // ``` + // + // An encoded key (without timestamp) in hex format. + let key = Vec::from_hex( + "7A7480000000000000FF6C5F698000000000FF00000F014572726FFF72000000FC0380\ + 00FF0000000000050000FDF9C611BD787FFFFF", + ) + .unwrap(); + // For a unique index record, it has a value in the format: + // + // ```text + // Layout: TailLen | VersionFlag | Version | Options | [UntouchedFlag] + // Length: 1 | 1 | 1 | len(options) | 1 + // + // Where Options for common handle (aka clustered index) is: + // + // Layout: CHandle flag | CHandle Len | CHandle | + // Length: 1 | 2 | len(CHandle) | + // + // For example: + // 007D017F001203800162F749A43FAE038000000727DB8931 can be interpreted as: + // + // TailLen | VersionFlag | Version | Options | [UntouchedFlag] + // | CHandle flag | CHandle Len | CHandle | + // 00 | 7D | 01 | 7F | 00 12 | 03800162F749A43FAE 038000000727DB8931 + // ``` + // See: https://github.com/pingcap/tidb/blob/c201eb7335/table/tables/index.go#L134 + let value = Some(Vec::from_hex("007D017F001203800162F749A43FAE038000000727DB8931").unwrap()); + + // Let's define the logical size of a unique index record is 29 bytes + // which is the sum of the size of the two fields and the clustered index. + let logical_size_pre_key = 5 + 8 + 8 + 8; + let logical_size_in_total = ReadableSize::mb(200).0; + + let case = Case { + name: "two_fields_unique_index_two_fields_clustered_index", + key, + value, + logical_size_pre_key, + logical_size_in_total, + }; + evaluate_memory_usage(case) +} + +fn evaluate_memory_usage(case: Case) { + let Case { + name, + key, + value, + logical_size_pre_key, + logical_size_in_total, + } = case; + // 2024-03-27 00:21:00.348 +0800 CST + let commit_ts = 448651607500000000u64; + + // Preallocate 8KB buffer to avoid reallocation. + let mut log_buf = String::from_utf8(vec![b'0'; ReadableSize::kb(8).0 as _]).unwrap(); + log_buf.clear(); + writeln!(log_buf, "\nCase: {}", name).unwrap(); + + let (skiplist, skiplist_args) = new_skiplist_engine_for_test(); + let start = tikv_alloc::fetch_stats().unwrap().unwrap(); + for i in 0..=logical_size_in_total / logical_size_pre_key { + // Append a timestamp to the key to make it unique. + let key = Key::from_encoded(key.clone()).append_ts(i.into()); + let value = Write::new(WriteType::Put, commit_ts.into(), value.clone()) + .as_ref() + .to_bytes(); + if i == 0 { + writeln!( + log_buf, + " Pre key amplification: {:.2}", + key.len() as f64 / logical_size_pre_key as f64, + ) + .unwrap(); + writeln!( + log_buf, + " Pre key value amplification: {:.2}", + (key.len() + value.len()) as f64 / logical_size_pre_key as f64, + ) + .unwrap(); + } + + let handle = skiplist.cf_handle(CF_WRITE); + let (key, value, guard) = skiplist_args(key.into_encoded(), Some(value)); + handle.insert(key, value.unwrap(), &guard) + } + let end = tikv_alloc::fetch_stats().unwrap().unwrap(); + + let resident_start = start.iter().find(|(k, _)| *k == "resident").unwrap(); + let resident_end = end.iter().find(|(k, _)| *k == "resident").unwrap(); + writeln!( + log_buf, + " SkiplistEngine amplification: {:.2}", + (resident_end.1 - resident_start.1) as f64 / logical_size_in_total as f64, + ) + .unwrap(); + for (i, (k, v)) in end.into_iter().enumerate() { + writeln!(log_buf, " {}: {}", k, v.saturating_sub(start[i].1)).unwrap(); + } + println!("{}", log_buf); + drop(skiplist); +} diff --git a/components/in_memory_engine/src/prop_test.rs b/components/in_memory_engine/src/prop_test.rs index f25d21e6733..be9912bec01 100644 --- a/components/in_memory_engine/src/prop_test.rs +++ b/components/in_memory_engine/src/prop_test.rs @@ -20,6 +20,33 @@ use crate::{ // This fixed mvcc suffix is used for CF_WRITE and CF_DEFAULT in prop test. const MVCC_SUFFIX: u64 = 10; +pub fn new_skiplist_engine_for_test() -> ( + SkiplistEngine, + Box, Option>) -> (InternalBytes, Option, epoch::Guard)>, +) { + let skiplist = SkiplistEngine::default(); + let mut cfg = InMemoryEngineConfig::default(); + cfg.evict_threshold = Some(ReadableSize::gb(1)); + cfg.capacity = Some(ReadableSize::gb(2)); + let controller = Arc::new(MemoryController::new( + Arc::new(VersionTrack::new(cfg)), + skiplist.clone(), + )); + + let skiplist_args = Box::new(move |k: Vec, v: Option>| { + let mut key = encode_key(&k, 0, crate::ValueType::Value); + key.set_memory_controller(controller.clone()); + let value = v.map(|v| { + let mut value = InternalBytes::from_vec(v); + value.set_memory_controller(controller.clone()); + value + }); + let guard = epoch::pin(); + (key, value, guard) + }); + (skiplist, skiplist_args) +} + #[derive(Clone)] enum Operation { Put(Vec, Vec), @@ -132,8 +159,6 @@ fn scan_skiplist( } fn test_rocksdb_skiplist_basic_operations(cf: CfName, operations: Vec) { - let skiplist = SkiplistEngine::default(); - let path_rocks = tempfile::tempdir().unwrap(); let db_rocks = new_engine( path_rocks.path().to_str().unwrap(), @@ -141,26 +166,7 @@ fn test_rocksdb_skiplist_basic_operations(cf: CfName, operations: Vec ) .unwrap(); - let mut cfg = InMemoryEngineConfig::default(); - cfg.evict_threshold = Some(ReadableSize::gb(1)); - cfg.capacity = Some(ReadableSize::gb(2)); - let controller = Arc::new(MemoryController::new( - Arc::new(VersionTrack::new(cfg)), - skiplist.clone(), - )); - - let skiplist_args = |k: Vec, v: Option>| { - let mut key = encode_key(&k, 0, crate::ValueType::Value); - key.set_memory_controller(controller.clone()); - let value = v.map(|v| { - let mut value = InternalBytes::from_vec(v); - value.set_memory_controller(controller.clone()); - value - }); - let guard = epoch::pin(); - let handle = skiplist.cf_handle(cf); - (handle, key, value, guard) - }; + let (skiplist, skiplist_args) = new_skiplist_engine_for_test(); // Delete range in SkiplistEngine considers MVCC suffix for CF_DEFAULT and // CF_WRITE, so we append the suffix for them. @@ -174,7 +180,8 @@ fn test_rocksdb_skiplist_basic_operations(cf: CfName, operations: Vec } db_rocks.put_cf(cf, &k, &v).unwrap(); - let (handle, key, value, guard) = skiplist_args(k, Some(v)); + let handle = skiplist.cf_handle(cf); + let (key, value, guard) = skiplist_args(k, Some(v)); handle.insert(key, value.unwrap(), &guard) } Operation::Get(mut k) => { @@ -184,7 +191,8 @@ fn test_rocksdb_skiplist_basic_operations(cf: CfName, operations: Vec .into_encoded(); } let res_rocks = db_rocks.get_value_cf(cf, &k).unwrap(); - let (handle, key, _value, guard) = skiplist_args(k, None); + let handle = skiplist.cf_handle(cf); + let (key, _value, guard) = skiplist_args(k, None); let res_skiplist = handle.get(&key, &guard); assert_eq!( res_rocks.as_deref(), @@ -199,7 +207,8 @@ fn test_rocksdb_skiplist_basic_operations(cf: CfName, operations: Vec } db_rocks.delete_cf(cf, &k).unwrap(); - let (handle, key, _value, guard) = skiplist_args(k, None); + let handle = skiplist.cf_handle(cf); + let (key, _value, guard) = skiplist_args(k, None); handle.remove(&key, &guard) } Operation::Scan(mut k, limit) => { @@ -209,7 +218,8 @@ fn test_rocksdb_skiplist_basic_operations(cf: CfName, operations: Vec .into_encoded(); } let res_rocks = scan_rocksdb(&db_rocks, cf, &k, limit); - let (handle, key, _value, _guard) = skiplist_args(k, None); + let handle = skiplist.cf_handle(cf); + let (key, _value, _guard) = skiplist_args(k, None); let res_titan = scan_skiplist(handle, &key, limit); assert_eq!(res_rocks, res_titan); } diff --git a/components/tikv_alloc/src/jemalloc.rs b/components/tikv_alloc/src/jemalloc.rs index b81c90a2af9..d504d4956c7 100644 --- a/components/tikv_alloc/src/jemalloc.rs +++ b/components/tikv_alloc/src/jemalloc.rs @@ -181,11 +181,13 @@ pub fn fetch_stats() -> Result, Error> { ("retained", stats::retained::read()?), ( "dirty", - stats::resident::read()? - stats::active::read()? - stats::metadata::read()?, + stats::resident::read()? + .saturating_sub(stats::active::read()?) + .saturating_sub(stats::metadata::read()?), ), ( "fragmentation", - stats::active::read()? - stats::allocated::read()?, + stats::active::read()?.saturating_sub(stats::allocated::read()?), ), ])) } From 254d1f0540fc916a4ef0fc6d1351bcd6e97cec1b Mon Sep 17 00:00:00 2001 From: ekexium Date: Thu, 31 Oct 2024 17:26:04 +0800 Subject: [PATCH 03/86] resolved-ts: track the number of locks of large txns in resolver (#17629) ref tikv/tikv#17459 Track the number of locks of large txns in resolver Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/resolver.rs | 96 ++++++++++++++++---------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 7c3f1d74976..f0bc30e3d05 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -2,7 +2,7 @@ use std::{cmp, collections::BTreeMap, sync::Arc, time::Duration}; -use collections::{HashMap, HashMapEntry, HashSet}; +use collections::{HashMap, HashMapEntry}; use raftstore::store::RegionReadProgress; use tikv::storage::txn::txn_status_cache::{TxnState, TxnStatusCache}; use tikv_util::{ @@ -81,9 +81,9 @@ pub struct Resolver { locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap, - // the start_ts of large transactions, which use a different tracking strategy with normal - // transactions. - large_txn_ts: HashSet, + // the start_ts and lock samples of large transactions, which use a different tracking strategy + // from normal transactions. + large_txns: HashMap, // each large transaction tracked by this resolver has a representative key tracked. So that // when the large transaction is rolled back, we can rely on this key to guarantee that // eventually there will be orphaned transactions. @@ -207,7 +207,7 @@ impl Resolver { resolved_ts: TimeStamp::zero(), locks_by_key: HashMap::default(), lock_ts_heap: BTreeMap::new(), - large_txn_ts: HashSet::::default(), + large_txns: Default::default(), large_txn_key_representative: HashMap::, TimeStamp>::default(), last_aggressive_shrink_time: Instant::now_coarse(), read_progress, @@ -275,9 +275,9 @@ impl Resolver { + self .large_txn_key_representative .keys() - .map(|k| k.len() + std::mem::size_of::()) + .map(|k| k.len() * 2 /* count for the key in TxnLocks */ + std::mem::size_of::()) .sum::() - + self.large_txn_ts.len() * std::mem::size_of::() + + self.large_txns.len() * (std::mem::size_of::() + std::mem::size_of::()) } fn lock_heap_size(&self, key: &[u8]) -> usize { @@ -388,8 +388,11 @@ impl Resolver { let shrink_ratio = 8; self.shrink_ratio(shrink_ratio); } else if let Some(start_ts) = self.large_txn_key_representative.remove(key) { - let is_new = self.large_txn_ts.remove(&start_ts); - debug_assert!(is_new, "large txn lock should be untracked only once"); + let entry = self.large_txns.remove(&start_ts); + debug_assert!( + entry.is_some(), + "large txn lock should be untracked only once" + ); debug!( "untrack lock {}@{}", &log_wrappers::Value::key(key), @@ -504,9 +507,9 @@ impl Resolver { fn log_min_large_txn(&self, lower_bound: TimeStamp) { let min_min_commit_ts_txn = self - .large_txn_ts + .large_txns .iter() - .filter_map(|&start_ts| { + .filter_map(|(&start_ts, _)| { self.lookup_min_commit_ts(start_ts) .map(|min_commit_ts| (start_ts, min_commit_ts)) }) @@ -541,15 +544,24 @@ impl Resolver { } } + // This may be inaccurate for large transactions. But it's just for monitoring + // and diagnosis. + // The inaccuracy comes from + // 1. Untracking large txn locks, because we do not know the ts when untracking + // a lock. + // 2. The same key written in multiple generations can also be counted multiple + // times. pub(crate) fn num_locks(&self) -> u64 { - // this is inaccurate, but it's just for monitoring. - // TODO: count the number of locks of large transactions, namely also track - // TxnLocks - (self.locks_by_key.len() + self.large_txn_ts.len()) as u64 + (self.locks_by_key.len() + + self + .large_txns + .values() + .map(|locks| locks.lock_count) + .sum::()) as u64 } pub(crate) fn num_transactions(&self) -> u64 { - (self.lock_ts_heap.len() + self.large_txn_ts.len()) as u64 + (self.lock_ts_heap.len() + self.large_txns.len()) as u64 } pub(crate) fn read_progress(&self) -> Option<&Arc> { @@ -566,20 +578,13 @@ impl Resolver { .map(|(ts, txn_locks)| (ts, txn_locks.clone())); let oldest_large_txn = self - .large_txn_ts + .large_txns .iter() - .filter_map(|start_ts| self.lookup_min_commit_ts(*start_ts)) - .min() - .map(|ts| { - ( - ts, - TxnLocks { - lock_count: 1, - // TODO: maybe fill this - sample_lock: None, - }, - ) - }); + .filter_map(|(start_ts, txn_locks)| { + self.lookup_min_commit_ts(*start_ts) + .map(|ts| (ts, txn_locks.clone())) + }) + .min_by_key(|(ts, _)| *ts); match (oldest_normal_txn, oldest_large_txn) { (Some((&ts1, txn_locks1)), Some((ts2, txn_locks2))) => { @@ -604,10 +609,18 @@ impl Resolver { start_ts: TimeStamp, key: Vec, ) -> Result<(), MemoryQuotaExceeded> { - let is_new = self.large_txn_ts.insert(start_ts); - if is_new { - self.large_txn_key_representative.insert(key, start_ts); - } + self.large_txns + .entry(start_ts) + .and_modify(|entry| entry.lock_count += 1) + .or_insert_with(|| { + self.large_txn_key_representative + .insert(key.clone(), start_ts); + TxnLocks { + lock_count: 1, + sample_lock: Some(key.into_boxed_slice().into()), + } + }); + Ok(()) } } @@ -864,18 +877,22 @@ mod tests { let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); let txn_status_cache = Arc::new(TxnStatusCache::new(100)); let mut resolver = Resolver::new(1, memory_quota, txn_status_cache.clone()); - let key: Vec = vec![1, 2, 3, 4]; + let key1: Vec = vec![1, 2, 3, 4]; let key2: Vec = vec![5, 6, 7, 8]; + let key3: Vec = vec![9, 10, 11, 12]; - // track 2 large txns - resolver.track_lock(1.into(), key.clone(), None, 1).unwrap(); + // track 2 large txns, T1{key1}, T2{key2, key3} + resolver + .track_lock(1.into(), key1.clone(), None, 1) + .unwrap(); resolver .track_lock(2.into(), key2.clone(), None, 1) .unwrap(); - assert_eq!(resolver.num_locks(), 2); + resolver.track_lock(2.into(), key3, None, 2).unwrap(); + assert_eq!(resolver.num_locks(), 3); assert_eq!(resolver.num_transactions(), 2); assert_eq!(resolver.locks_by_key.len(), 0); - assert_eq!(resolver.large_txn_ts.len(), 2); + assert_eq!(resolver.large_txns.len(), 2); assert_eq!(resolver.large_txn_key_representative.len(), 2); assert_eq!(resolver.resolved_ts(), TimeStamp::zero()); @@ -897,5 +914,8 @@ mod tests { ); assert_eq!(resolver.resolve(20.into(), None, TsSource::PdTso), 5.into()); + let oldest_txn = resolver.oldest_transaction().unwrap(); + assert_eq!(oldest_txn.0, 5.into()); + assert_eq!(oldest_txn.1.lock_count, 2); } } From b01e4adf3c3de1234aeba8ecb049ee1712b4bfff Mon Sep 17 00:00:00 2001 From: Bisheng Huang Date: Fri, 1 Nov 2024 07:53:22 +0800 Subject: [PATCH 04/86] raftstore: move snapshot generation out of the region worker (#17438) close tikv/tikv#12587, fix tikv/tikv#16001 To fix the issue where slow region destruction can block snapshot generation, this PR moves the snapshot generation logic out of the region worker. A new worker is added to handle snap gen requests but it reuses the existing snap generator pool, so the change doesn't introduce any new threads. This is a simpler approach than the earlier attempt because it doesn't deal with the interactions between snapshot apply and destroy. Since snapshot generation has always been an independent task handled by its own thread pool, this change does not add significant complexity. Signed-off-by: Bisheng Huang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/apply.rs | 72 ++--- components/raftstore/src/store/fsm/peer.rs | 4 +- components/raftstore/src/store/fsm/store.rs | 34 ++- components/raftstore/src/store/mod.rs | 2 +- components/raftstore/src/store/peer.rs | 2 +- .../raftstore/src/store/peer_storage.rs | 111 +++---- components/raftstore/src/store/worker/mod.rs | 4 +- .../raftstore/src/store/worker/region.rs | 276 +++--------------- .../raftstore/src/store/worker/snap_gen.rs | 269 +++++++++++++++++ components/tikv_util/src/worker/pool.rs | 25 +- 10 files changed, 455 insertions(+), 344 deletions(-) create mode 100644 components/raftstore/src/store/worker/snap_gen.rs diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index c4002708655..eadb3103fda 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -95,7 +95,7 @@ use crate::{ self, admin_cmd_epoch_lookup, check_flashback_state, check_req_region_epoch, compare_region_epoch, ChangePeerI, ConfChangeKind, KeysInfoFormatter, }, - Config, RegionSnapshot, RegionTask, WriteCallback, + Config, RegionSnapshot, SnapGenTask, WriteCallback, }, Error, Result, }; @@ -397,7 +397,7 @@ where timer: Option, host: CoprocessorHost, importer: Arc>, - region_scheduler: Scheduler>, + snap_gen_scheduler: Scheduler>, router: ApplyRouter, notifier: Box>, engine: EK, @@ -482,7 +482,7 @@ where tag: String, host: CoprocessorHost, importer: Arc>, - region_scheduler: Scheduler>, + snap_gen_scheduler: Scheduler>, engine: EK, router: ApplyRouter, notifier: Box>, @@ -499,7 +499,7 @@ where timer: None, host, importer, - region_scheduler, + snap_gen_scheduler, engine, router, notifier, @@ -3709,14 +3709,14 @@ impl GenSnapTask { kv_snap: EK::Snapshot, last_applied_term: u64, last_applied_state: RaftApplyState, - region_sched: &Scheduler>, + snap_gen_sched: &Scheduler>, ) -> Result<()> where EK: KvEngine, { self.index .store(last_applied_state.applied_index, Ordering::SeqCst); - let snapshot = RegionTask::Gen { + let snapshot = SnapGenTask::Gen { region_id: self.region_id, notifier: self.snap_notifier, for_balance: self.for_balance, @@ -3728,7 +3728,7 @@ impl GenSnapTask { kv_snap, to_store_id: self.to_peer.store_id, }; - box_try!(region_sched.schedule(snapshot)); + box_try!(snap_gen_sched.schedule(snapshot)); Ok(()) } } @@ -4283,7 +4283,7 @@ where apply_ctx.engine.snapshot(), self.delegate.applied_term, self.delegate.apply_state.clone(), - &apply_ctx.region_scheduler, + &apply_ctx.snap_gen_scheduler, ) { error!( "schedule snapshot failed"; @@ -4786,7 +4786,7 @@ pub struct Builder { cfg: Arc>, coprocessor_host: CoprocessorHost, importer: Arc>, - region_scheduler: Scheduler::Snapshot>>, + snap_gen_scheduler: Scheduler>, engine: EK, sender: Box>, router: ApplyRouter, @@ -4805,7 +4805,7 @@ impl Builder { cfg: builder.cfg.clone(), coprocessor_host: builder.coprocessor_host.clone(), importer: builder.importer.clone(), - region_scheduler: builder.region_scheduler.clone(), + snap_gen_scheduler: builder.snap_gen_scheduler.clone(), engine: builder.engines.kv.clone(), sender, router, @@ -4829,7 +4829,7 @@ where self.tag.clone(), self.coprocessor_host.clone(), self.importer.clone(), - self.region_scheduler.clone(), + self.snap_gen_scheduler.clone(), self.engine.clone(), self.router.clone(), self.sender.clone_box(), @@ -4855,7 +4855,7 @@ where cfg: self.cfg.clone(), coprocessor_host: self.coprocessor_host.clone(), importer: self.importer.clone(), - region_scheduler: self.region_scheduler.clone(), + snap_gen_scheduler: self.snap_gen_scheduler.clone(), engine: self.engine.clone(), sender: self.sender.clone_box(), router: self.router.clone(), @@ -5180,7 +5180,7 @@ mod tests { msg::WriteResponse, peer_storage::RAFT_INIT_LOG_INDEX, simple_write::{SimpleWriteEncoder, SimpleWriteReqEncoder}, - Config, RegionTask, + Config, SnapGenTask, }, }; @@ -5503,7 +5503,7 @@ mod tests { let sender = Box::new(TestNotifier { tx }); let (_tmp, engine) = create_tmp_engine("apply-basic"); let (_dir, importer) = create_tmp_importer("apply-basic"); - let (region_scheduler, mut snapshot_rx) = dummy_scheduler(); + let (snap_gen_scheduler, mut snapshot_rx) = dummy_scheduler(); let cfg = Arc::new(VersionTrack::new(Config::default())); let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); @@ -5512,7 +5512,7 @@ mod tests { cfg, coprocessor_host: CoprocessorHost::::default(), importer, - region_scheduler, + snap_gen_scheduler, sender, engine, router: router.clone(), @@ -5605,7 +5605,7 @@ mod tests { }; let apply_state_key = keys::apply_state_key(2); let apply_state = match snapshot_rx.recv_timeout(Duration::from_secs(3)) { - Ok(Some(RegionTask::Gen { kv_snap, .. })) => kv_snap + Ok(Some(SnapGenTask::Gen { kv_snap, .. })) => kv_snap .get_msg_cf(CF_RAFT, &apply_state_key) .unwrap() .unwrap(), @@ -6072,7 +6072,7 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(obs.clone())); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Arc::new(VersionTrack::new(Config::default())); let (router, mut system) = create_apply_batch_system(&cfg.value(), None); @@ -6081,7 +6081,7 @@ mod tests { tag: "test-store".to_owned(), cfg, sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer: importer.clone(), engine: engine.clone(), @@ -6411,7 +6411,7 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(obs.clone())); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let mut config = Config::default(); config.enable_v2_compatible_learner = true; @@ -6422,7 +6422,7 @@ mod tests { tag: "test-store".to_owned(), cfg, sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer: importer.clone(), engine: engine.clone(), @@ -6756,7 +6756,7 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(obs)); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Arc::new(VersionTrack::new(Config::default())); let (router, mut system) = create_apply_batch_system(&cfg.value(), None); @@ -6765,7 +6765,7 @@ mod tests { tag: "test-store".to_owned(), cfg: cfg.clone(), sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer, engine, @@ -6842,7 +6842,7 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(obs)); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = { let mut cfg = Config::default(); @@ -6856,7 +6856,7 @@ mod tests { tag: "test-store".to_owned(), cfg, sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer: importer.clone(), engine: engine.clone(), @@ -7025,7 +7025,7 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(obs)); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = { let mut cfg = Config::default(); @@ -7039,7 +7039,7 @@ mod tests { tag: "test-store".to_owned(), cfg, sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer, engine, @@ -7123,7 +7123,7 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(obs.clone())); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Config::default(); let (router, mut system) = create_apply_batch_system(&cfg, None); @@ -7132,7 +7132,7 @@ mod tests { tag: "test-exec-observer".to_owned(), cfg: Arc::new(VersionTrack::new(cfg)), sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer: importer.clone(), engine: engine.clone(), @@ -7348,7 +7348,7 @@ mod tests { .register_cmd_observer(1, BoxCmdObserver::new(obs)); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Config::default(); let (router, mut system) = create_apply_batch_system(&cfg, None); @@ -7357,7 +7357,7 @@ mod tests { tag: "test-store".to_owned(), cfg: Arc::new(VersionTrack::new(cfg)), sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer, engine, @@ -7629,7 +7629,7 @@ mod tests { obs.cmd_sink = Some(Arc::new(Mutex::new(sink))); host.registry .register_cmd_observer(1, BoxCmdObserver::new(obs)); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let cfg = Arc::new(VersionTrack::new(Config::default())); let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); @@ -7638,7 +7638,7 @@ mod tests { cfg, sender, importer, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, engine: engine.clone(), router: router.clone(), @@ -7849,7 +7849,7 @@ mod tests { let (tx, apply_res_rx) = mpsc::channel(); let sender = Box::new(TestNotifier { tx }); let coprocessor_host = CoprocessorHost::::default(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let cfg = Arc::new(VersionTrack::new(Config::default())); let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); @@ -7858,7 +7858,7 @@ mod tests { cfg, sender, importer, - region_scheduler, + snap_gen_scheduler, coprocessor_host, engine: engine.clone(), router: router.clone(), @@ -7973,7 +7973,7 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(ApplyObserver::default())); let (tx, rx) = mpsc::channel(); - let (region_scheduler, _) = dummy_scheduler(); + let (snap_gen_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Arc::new(VersionTrack::new(Config::default())); let (router, mut system) = create_apply_batch_system(&cfg.value(), None); @@ -7982,7 +7982,7 @@ mod tests { tag: "flashback_need_to_be_applied".to_owned(), cfg, sender, - region_scheduler, + snap_gen_scheduler, coprocessor_host: host, importer, engine: engine.clone(), diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 5f5a9f6b773..42f8873bf36 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -255,7 +255,7 @@ where pub fn create( store_id: u64, cfg: &Config, - region_scheduler: Scheduler>, + region_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, engines: Engines, region: &metapb::Region, @@ -317,7 +317,7 @@ where pub fn replicate( store_id: u64, cfg: &Config, - region_scheduler: Scheduler>, + region_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, engines: Engines, region_id: u64, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 8da5743fecb..965b5d38fbb 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -69,7 +69,7 @@ use tikv_util::{ time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant, SlowTimer}, timer::SteadyTimer, warn, - worker::{LazyWorker, Scheduler, Worker}, + worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, yatp_pool::FuturePool, Either, RingQueue, }; @@ -107,7 +107,7 @@ use crate::{ CompactRunner, CompactTask, ConsistencyCheckRunner, ConsistencyCheckTask, GcSnapshotRunner, GcSnapshotTask, PdRunner, RaftlogGcRunner, RaftlogGcTask, ReadDelegate, RefreshConfigRunner, RefreshConfigTask, RegionRunner, RegionTask, - SplitCheckTask, + SnapGenRunner, SnapGenTask, SplitCheckTask, SNAP_GENERATOR_MAX_POOL_SIZE, }, worker_metrics::PROCESS_STAT_CPU_USAGE, Callback, CasualMessage, CompactThreshold, FullCompactController, GlobalReplicationState, @@ -563,7 +563,7 @@ where pub cleanup_scheduler: Scheduler, pub raftlog_gc_scheduler: Scheduler, pub raftlog_fetch_scheduler: Scheduler>, - pub region_scheduler: Scheduler>, + pub region_scheduler: Scheduler, pub apply_router: ApplyRouter, pub router: RaftRouter, pub importer: Arc>, @@ -1255,7 +1255,8 @@ pub struct RaftPollerBuilder { cleanup_scheduler: Scheduler, raftlog_gc_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, - pub region_scheduler: Scheduler>, + pub snap_gen_scheduler: Scheduler>, + pub region_scheduler: Scheduler, apply_router: ApplyRouter, pub router: RaftRouter, pub importer: Arc>, @@ -1568,6 +1569,7 @@ where cleanup_scheduler: self.cleanup_scheduler.clone(), raftlog_gc_scheduler: self.raftlog_gc_scheduler.clone(), raftlog_fetch_scheduler: self.raftlog_fetch_scheduler.clone(), + snap_gen_scheduler: self.snap_gen_scheduler.clone(), region_scheduler: self.region_scheduler.clone(), apply_router: self.apply_router.clone(), router: self.router.clone(), @@ -1596,6 +1598,8 @@ struct Workers { // background_workers. This is because the underlying compact_range call is a // blocking operation, which can take an extensive amount of time. cleanup_worker: Worker, + // The worker dedicated to handling snapshot generation tasks. + snap_gen_worker: Worker, region_worker: Worker, // Used for calling `manual_purge` if the specific engine implementation requires it // (`need_manual_purge`). @@ -1691,10 +1695,15 @@ impl RaftBatchSystem { None }; let bgworker_remote = background_worker.remote(); + let snap_gen_worker = WorkerBuilder::new("snap-generator") + .thread_count(cfg.value().snap_generator_pool_size) + .thread_count_limits(1, SNAP_GENERATOR_MAX_POOL_SIZE) + .create(); let workers = Workers { pd_worker, background_worker, cleanup_worker: Worker::new("cleanup-worker"), + snap_gen_worker, region_worker: Worker::new("region-worker"), purge_worker, raftlog_fetch_worker: Worker::new("raftlog-fetch-worker"), @@ -1702,19 +1711,29 @@ impl RaftBatchSystem { refresh_config_worker: LazyWorker::new("refreash-config-worker"), }; mgr.init()?; + + let snap_gen_runner = SnapGenRunner::new( + engines.kv.clone(), + mgr.clone(), + self.router(), + Some(Arc::clone(&pd_client)), + workers.snap_gen_worker.pool(), // Reuse the worker's FuturePool + ); + let region_runner = RegionRunner::new( engines.kv.clone(), mgr.clone(), cfg.clone(), workers.coprocessor_host.clone(), self.router(), - Some(Arc::clone(&pd_client)), ); - let snap_generator_pool = region_runner.snap_generator_pool(); + let snap_generator_pool = workers.snap_gen_worker.pool(); + let snap_gen_scheduler: Scheduler::Snapshot>> = workers + .snap_gen_worker + .start("snap-generator", snap_gen_runner); let region_scheduler = workers .region_worker .start_with_timer("region-worker", region_runner); - let raftlog_gc_runner = RaftlogGcRunner::new( engines.clone(), cfg.value().raft_log_compact_sync_interval.0, @@ -1767,6 +1786,7 @@ impl RaftBatchSystem { router: self.router.clone(), split_check_scheduler, region_scheduler, + snap_gen_scheduler, pd_scheduler: workers.pd_worker.scheduler(), consistency_check_scheduler, cleanup_scheduler, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 2c1f69d8eb4..9e8e66b7522 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -91,7 +91,7 @@ pub use self::{ CompactThreshold, FlowStatistics, FlowStatsReporter, FullCompactController, KeyEntry, LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, - SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, + SnapGenTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 0f4f25950ff..07595d4a620 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -935,7 +935,7 @@ where pub fn new( store_id: u64, cfg: &Config, - region_scheduler: Scheduler>, + region_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, engines: Engines, region: &metapb::Region, diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 06072c4a05f..aa404d8cf2d 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -234,7 +234,7 @@ where snap_state: RefCell, gen_snap_task: RefCell>, - region_scheduler: Scheduler>, + region_scheduler: Scheduler, snap_tried_cnt: RefCell, entry_storage: EntryStorage, @@ -304,7 +304,7 @@ where pub fn new( engines: Engines, region: &metapb::Region, - region_scheduler: Scheduler>, + region_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, peer_id: u64, tag: String, @@ -1268,21 +1268,18 @@ pub mod tests { }; use super::*; - use crate::{ - coprocessor::CoprocessorHost, - store::{ - async_io::{read::ReadRunner, write::write_to_db_for_test}, - bootstrap_store, - entry_storage::tests::validate_cache, - fsm::apply::compact_raft_log, - initial_region, prepare_bootstrap_cluster, - worker::{make_region_worker_raftstore_cfg, RegionRunner, RegionTask}, - AsyncReadNotifier, FetchedLogs, GenSnapRes, - }, + use crate::store::{ + async_io::{read::ReadRunner, write::write_to_db_for_test}, + bootstrap_store, + entry_storage::tests::validate_cache, + fsm::apply::compact_raft_log, + initial_region, prepare_bootstrap_cluster, + worker::{RegionTask, SnapGenRunner, SnapGenTask}, + AsyncReadNotifier, FetchedLogs, GenSnapRes, }; fn new_storage( - region_scheduler: Scheduler>, + region_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, path: &TempDir, ) -> PeerStorage { @@ -1315,7 +1312,7 @@ pub mod tests { } pub fn new_storage_from_ents( - region_scheduler: Scheduler>, + region_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, path: &TempDir, ents: &[Entry], @@ -1647,7 +1644,7 @@ pub mod tests { fn generate_and_schedule_snapshot( gen_task: GenSnapTask, engines: &Engines, - sched: &Scheduler>, + sched: &Scheduler>, ) -> Result<()> { let apply_state: RaftApplyState = engines .kv @@ -1687,21 +1684,22 @@ pub mod tests { let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); mgr.init().unwrap(); - let mut worker = Worker::new("region-worker").lazy_build("region-worker"); - let sched = worker.scheduler(); + let (sched, _) = dummy_scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); let mut s = new_storage_from_ents(sched.clone(), dummy_scheduler, &td, &ents); let (router, _) = mpsc::sync_channel(100); - let cfg = make_region_worker_raftstore_cfg(true); - let runner = RegionRunner::new( + + let mut snap_gen_worker = LazyWorker::new("snap-generator"); + let snap_gen_sched = snap_gen_worker.scheduler(); + let snap_gen_runner = SnapGenRunner::new( s.engines.kv.clone(), mgr, - cfg, - CoprocessorHost::::default(), router, Option::>::None, + snap_gen_worker.pool(), ); - worker.start_with_timer(runner); + snap_gen_worker.start(snap_gen_runner); + let to_peer_id = s.peer_id; let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); @@ -1723,7 +1721,7 @@ pub mod tests { assert_eq!(*s.snap_tried_cnt.borrow(), 1); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); - generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap(); + generate_and_schedule_snapshot(gen_task, &s.engines, &snap_gen_sched).unwrap(); let snap = match *s.snap_state.borrow() { SnapState::Generating { ref receiver, .. } => { receiver.recv_timeout(Duration::from_secs(3)).unwrap() @@ -1791,11 +1789,11 @@ pub mod tests { assert_eq!(*s.snap_tried_cnt.borrow(), 1); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); - generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap(); + generate_and_schedule_snapshot(gen_task, &s.engines, &snap_gen_sched).unwrap(); match *s.snap_state.borrow() { SnapState::Generating { ref receiver, .. } => { receiver.recv_timeout(Duration::from_secs(3)).unwrap(); - worker.stop(); + snap_gen_worker.stop(); match receiver.recv_timeout(Duration::from_secs(3)) { Err(RecvTimeoutError::Disconnected) => {} res => panic!("unexpected result: {:?}", res), @@ -1806,7 +1804,7 @@ pub mod tests { // Disconnected channel should trigger another try. assert_eq!(s.snapshot(0, to_peer_id).unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); - generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap_err(); + generate_and_schedule_snapshot(gen_task, &s.engines, &snap_gen_sched).unwrap_err(); assert_eq!(*s.snap_tried_cnt.borrow(), 2); for cnt in 2..super::MAX_SNAP_TRY_CNT + 10 { @@ -1821,7 +1819,7 @@ pub mod tests { // Scheduled job failed should trigger . assert_eq!(s.snapshot(0, to_peer_id).unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); - generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap_err(); + generate_and_schedule_snapshot(gen_task, &s.engines, &snap_gen_sched).unwrap_err(); } // When retry too many times, it should report a different error. @@ -1842,10 +1840,9 @@ pub mod tests { mgr.init().unwrap(); mgr.set_enable_multi_snapshot_files(true); mgr.set_max_per_file_size(500); - let mut worker = Worker::new("region-worker").lazy_build("region-worker"); - let sched = worker.scheduler(); + let (sched, _) = dummy_scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); - let s = new_storage_from_ents(sched.clone(), dummy_scheduler, &td, &ents); + let s = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); let (router, _) = mpsc::sync_channel(100); let mut pd_client = TestPdClient::new(); let labels = vec![StoreLabel { @@ -1856,22 +1853,24 @@ pub mod tests { let store = new_store(1, labels); pd_client.add_store(store); let pd_mock = Arc::new(pd_client); - let cfg = make_region_worker_raftstore_cfg(true); - let runner = RegionRunner::new( + + let mut snap_gen_worker = LazyWorker::new("snap-generator"); + let snap_gen_sched = snap_gen_worker.scheduler(); + let snap_gen_runner = SnapGenRunner::new( s.engines.kv.clone(), mgr, - cfg, - CoprocessorHost::::default(), router, Some(pd_mock), + snap_gen_worker.pool(), ); - worker.start_with_timer(runner); + snap_gen_worker.start(snap_gen_runner); + let snap = s.snapshot(0, 1); let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); assert_eq!(snap.unwrap_err(), unavailable); assert_eq!(*s.snap_tried_cnt.borrow(), 1); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); - generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap(); + generate_and_schedule_snapshot(gen_task, &s.engines, &snap_gen_sched).unwrap(); let snap = match *s.snap_state.borrow() { SnapState::Generating { ref receiver, .. } => { receiver.recv_timeout(Duration::from_secs(3)).unwrap() @@ -1912,21 +1911,22 @@ pub mod tests { let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); mgr.init().unwrap(); - let mut worker = Worker::new("region-worker").lazy_build("region-worker"); - let sched = worker.scheduler(); + + let (sched, _) = dummy_scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); - let mut s = new_storage_from_ents(sched.clone(), dummy_scheduler, &td, &ents); - let cfg = make_region_worker_raftstore_cfg(true); + let mut s = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); let (router, _) = mpsc::sync_channel(100); - let runner = RegionRunner::new( + + let mut snap_gen_worker = LazyWorker::new("snap-generator"); + let snap_gen_sched = snap_gen_worker.scheduler(); + let snap_gen_runner = SnapGenRunner::new( s.engines.kv.clone(), mgr, - cfg, - CoprocessorHost::::default(), router, Option::>::None, + snap_gen_worker.pool(), ); - worker.start_with_timer(runner); + snap_gen_worker.start(snap_gen_runner); let mut r = s.region().clone(); r.mut_peers().push(new_peer(2, 2)); @@ -1945,7 +1945,7 @@ pub mod tests { assert_eq!(snap.unwrap_err(), unavailable); assert_eq!(*s.snap_tried_cnt.borrow(), 1); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); - generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap(); + generate_and_schedule_snapshot(gen_task, &s.engines, &snap_gen_sched).unwrap(); let snap = match *s.snap_state.borrow() { SnapState::Generating { ref receiver, .. } => { receiver.recv_timeout(Duration::from_secs(3)).unwrap() @@ -1992,24 +1992,25 @@ pub mod tests { let snap_dir = Builder::new().prefix("snap").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); mgr.init().unwrap(); - let mut worker = LazyWorker::new("snap-manager"); - let sched = worker.scheduler(); + let (sched, _) = dummy_scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); let s1 = new_storage_from_ents(sched.clone(), dummy_scheduler.clone(), &td1, &ents); let (router, _) = mpsc::sync_channel(100); - let cfg = make_region_worker_raftstore_cfg(true); - let runner = RegionRunner::new( + + let mut snap_gen_worker = LazyWorker::new("snap-generator"); + let snap_gen_sched = snap_gen_worker.scheduler(); + let snap_gen_runner = SnapGenRunner::new( s1.engines.kv.clone(), mgr, - cfg, - CoprocessorHost::::default(), router, Option::>::None, + snap_gen_worker.pool(), ); - worker.start(runner); + snap_gen_worker.start(snap_gen_runner); + s1.snapshot(0, 1).unwrap_err(); let gen_task = s1.gen_snap_task.borrow_mut().take().unwrap(); - generate_and_schedule_snapshot(gen_task, &s1.engines, &sched).unwrap(); + generate_and_schedule_snapshot(gen_task, &s1.engines, &snap_gen_sched).unwrap(); let snap1 = match *s1.snap_state.borrow() { SnapState::Generating { ref receiver, .. } => { @@ -2019,7 +2020,7 @@ pub mod tests { }; assert_eq!(s1.truncated_index(), 3); assert_eq!(s1.truncated_term(), 3); - worker.stop(); + snap_gen_worker.stop(); let td2 = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); let mut s2 = new_storage(sched.clone(), dummy_scheduler.clone(), &td2); diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index c47461d62ff..af620bdef6e 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -12,12 +12,11 @@ mod raftlog_gc; mod read; mod refresh_config; mod region; +mod snap_gen; mod split_check; mod split_config; mod split_controller; -#[cfg(test)] -pub use self::region::tests::make_raftstore_cfg as make_region_worker_raftstore_cfg; pub use self::{ check_leader::{Runner as CheckLeaderRunner, Task as CheckLeaderTask}, cleanup::{Runner as CleanupRunner, Task as CleanupTask}, @@ -44,6 +43,7 @@ pub use self::{ Task as RefreshConfigTask, WriterContoller, }, region::{Runner as RegionRunner, Task as RegionTask}, + snap_gen::{Runner as SnapGenRunner, Task as SnapGenTask, SNAP_GENERATOR_MAX_POOL_SIZE}, split_check::{ Bucket, BucketRange, BucketStatsInfo, KeyEntry, Runner as SplitCheckRunner, Task as SplitCheckTask, diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 4af19c53035..3e7a1e7848f 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -8,38 +8,32 @@ use std::{ }, fmt::{self, Display, Formatter}, sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, - mpsc::SyncSender, + atomic::{AtomicUsize, Ordering}, Arc, }, time::Duration, u64, }; -use collections::HashMap; use engine_traits::{ DeleteStrategy, KvEngine, Mutable, Range, WriteBatch, WriteOptions, CF_LOCK, CF_RAFT, }; use fail::fail_point; -use file_system::{IoType, WithIoType}; use kvproto::raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}; -use pd_client::PdClient; -use raft::eraftpb::Snapshot as RaftSnapshot; use tikv_util::{ box_err, box_try, config::VersionTrack, defer, error, info, - time::{Instant, UnixSecs}, + time::Instant, warn, worker::{Runnable, RunnableWithTimer}, - yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, }; use super::metrics::*; use crate::{ coprocessor::CoprocessorHost, store::{ - self, check_abort, + check_abort, peer_storage::{ JOB_STATUS_CANCELLED, JOB_STATUS_CANCELLING, JOB_STATUS_FAILED, JOB_STATUS_FINISHED, JOB_STATUS_PENDING, JOB_STATUS_RUNNING, @@ -51,24 +45,10 @@ use crate::{ }; const CLEANUP_MAX_REGION_COUNT: usize = 64; -const SNAP_GENERATOR_MAX_POOL_SIZE: usize = 16; - -const TIFLASH: &str = "tiflash"; -const ENGINE: &str = "engine"; /// Region related task #[derive(Debug)] -pub enum Task { - Gen { - region_id: u64, - last_applied_term: u64, - last_applied_state: RaftApplyState, - kv_snap: S, - canceled: Arc, - notifier: SyncSender, - for_balance: bool, - to_store_id: u64, - }, +pub enum Task { Apply { region_id: u64, status: Arc, @@ -86,8 +66,8 @@ pub enum Task { }, } -impl Task { - pub fn destroy(region_id: u64, start_key: Vec, end_key: Vec) -> Task { +impl Task { + pub fn destroy(region_id: u64, start_key: Vec, end_key: Vec) -> Task { Task::Destroy { region_id, start_key, @@ -96,10 +76,9 @@ impl Task { } } -impl Display for Task { +impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { - Task::Gen { region_id, .. } => write!(f, "Snap gen for {}", region_id), Task::Apply { region_id, .. } => write!(f, "Snap apply for {}", region_id), Task::Destroy { region_id, @@ -239,112 +218,9 @@ impl PendingDeleteRanges { } } -struct SnapGenContext { - engine: EK, - mgr: SnapManager, - router: R, - start: UnixSecs, -} - -impl SnapGenContext +pub struct Runner where EK: KvEngine, - R: CasualRouter, -{ - /// Generates the snapshot of the Region. - fn generate_snap( - &self, - region_id: u64, - last_applied_term: u64, - last_applied_state: RaftApplyState, - kv_snap: EK::Snapshot, - notifier: SyncSender, - for_balance: bool, - allow_multi_files_snapshot: bool, - ) -> Result<()> { - // do we need to check leader here? - let snap = box_try!(store::do_snapshot::( - self.mgr.clone(), - &self.engine, - kv_snap, - region_id, - last_applied_term, - last_applied_state, - for_balance, - allow_multi_files_snapshot, - self.start - )); - // Only enable the fail point when the region id is equal to 1, which is - // the id of bootstrapped region in tests. - fail_point!("region_gen_snap", region_id == 1, |_| Ok(())); - if let Err(e) = notifier.try_send(snap) { - info!( - "failed to notify snap result, leadership may have changed, ignore error"; - "region_id" => region_id, - "err" => %e, - ); - } - // The error can be ignored as snapshot will be sent in next heartbeat in the - // end. - let _ = self - .router - .send(region_id, CasualMessage::SnapshotGenerated); - Ok(()) - } - - /// Handles the task of generating snapshot of the Region. It calls - /// `generate_snap` to do the actual work. - fn handle_gen( - &self, - region_id: u64, - last_applied_term: u64, - last_applied_state: RaftApplyState, - kv_snap: EK::Snapshot, - canceled: Arc, - notifier: SyncSender, - for_balance: bool, - allow_multi_files_snapshot: bool, - ) { - fail_point!("before_region_gen_snap", |_| ()); - SNAP_COUNTER.generate.start.inc(); - if canceled.load(Ordering::Relaxed) { - info!("generate snap is canceled"; "region_id" => region_id); - SNAP_COUNTER.generate.abort.inc(); - return; - } - - let start = Instant::now(); - let _io_type_guard = WithIoType::new(if for_balance { - IoType::LoadBalance - } else { - IoType::Replication - }); - - if let Err(e) = self.generate_snap( - region_id, - last_applied_term, - last_applied_state, - kv_snap, - notifier, - for_balance, - allow_multi_files_snapshot, - ) { - error!(%e; "failed to generate snap!!!"; "region_id" => region_id,); - SNAP_COUNTER.generate.fail.inc(); - return; - } - - SNAP_COUNTER.generate.success.inc(); - SNAP_HISTOGRAM - .generate - .observe(start.saturating_elapsed_secs()); - } -} - -pub struct Runner -where - EK: KvEngine, - T: PdClient + 'static, { batch_size: usize, use_delete_range: bool, @@ -353,10 +229,9 @@ where clean_stale_check_interval: Duration, clean_stale_ranges_tick: usize, - tiflash_stores: HashMap, // we may delay some apply tasks if level 0 files to write stall threshold, // pending_applies records all delayed apply task, and will check again later - pending_applies: VecDeque>, + pending_applies: VecDeque, // Ranges that have been logically destroyed at a specific sequence number. We can // assume there will be no reader (engine snapshot) newer than that sequence number. Therefore, // they can be physically deleted with `DeleteFiles` when we're sure there is no older @@ -371,15 +246,12 @@ where mgr: SnapManager, coprocessor_host: CoprocessorHost, router: R, - pd_client: Option>, - pool: FuturePool, } -impl Runner +impl Runner where EK: KvEngine, R: CasualRouter, - T: PdClient + 'static, { pub fn new( engine: EK, @@ -387,8 +259,7 @@ where cfg: Arc>, coprocessor_host: CoprocessorHost, router: R, - pd_client: Option>, - ) -> Runner { + ) -> Runner { Runner { batch_size: cfg.value().snap_apply_batch_size.0 as usize, use_delete_range: cfg.value().use_delete_range, @@ -398,29 +269,15 @@ where cfg.value().region_worker_tick_interval.as_millis(), ), clean_stale_ranges_tick: cfg.value().clean_stale_ranges_tick, - tiflash_stores: HashMap::default(), pending_applies: VecDeque::new(), pending_delete_ranges: PendingDeleteRanges::default(), engine, mgr, coprocessor_host, router, - pd_client, - pool: YatpPoolBuilder::new(DefaultTicker::default()) - .name_prefix("snap-generator") - .thread_count( - 1, - cfg.value().snap_generator_pool_size, - SNAP_GENERATOR_MAX_POOL_SIZE, - ) - .build_future_pool(), } } - pub fn snap_generator_pool(&self) -> FuturePool { - self.pool.clone() - } - fn region_state(&self, region_id: u64) -> Result { let region_key = keys::region_state_key(region_id); let region_state: RegionLocalState = @@ -763,7 +620,7 @@ where /// Calls observer `pre_apply_snapshot` for every task. /// Multiple task can be `pre_apply_snapshot` at the same time. - fn pre_apply_snapshot(&self, task: &Task) -> Result<()> { + fn pre_apply_snapshot(&self, task: &Task) -> Result<()> { let (region_id, abort, peer_id) = match task { Task::Apply { region_id, @@ -845,76 +702,15 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where EK: KvEngine, R: CasualRouter + Send + Clone + 'static, - T: PdClient, { - type Task = Task; + type Task = Task; - fn run(&mut self, task: Task) { + fn run(&mut self, task: Task) { match task { - Task::Gen { - region_id, - last_applied_term, - last_applied_state, - kv_snap, - canceled, - notifier, - for_balance, - to_store_id, - } => { - // It is safe for now to handle generating and applying snapshot concurrently, - // but it may not when merge is implemented. - let mut allow_multi_files_snapshot = false; - // if to_store_id is 0, it means the to_store_id cannot be found - if to_store_id != 0 { - if let Some(is_tiflash) = self.tiflash_stores.get(&to_store_id) { - allow_multi_files_snapshot = !is_tiflash; - } else { - let is_tiflash = self.pd_client.as_ref().map_or(false, |pd_client| { - if let Ok(s) = pd_client.get_store(to_store_id) { - return s.get_labels().iter().any(|label| { - label.get_key().to_lowercase() == ENGINE - && label.get_value().to_lowercase() == TIFLASH - }); - } - true - }); - self.tiflash_stores.insert(to_store_id, is_tiflash); - allow_multi_files_snapshot = !is_tiflash; - } - } - SNAP_COUNTER.generate.all.inc(); - let ctx = SnapGenContext { - engine: self.engine.clone(), - mgr: self.mgr.clone(), - router: self.router.clone(), - start: UnixSecs::now(), - }; - let scheduled_time = Instant::now_coarse(); - self.pool.spawn(async move { - SNAP_GEN_WAIT_DURATION_HISTOGRAM - .observe(scheduled_time.saturating_elapsed_secs()); - - ctx.handle_gen( - region_id, - last_applied_term, - last_applied_state, - kv_snap, - canceled, - notifier, - for_balance, - allow_multi_files_snapshot, - ); - }).unwrap_or_else( - |e| { - error!("failed to generate snapshot"; "region_id" => region_id, "err" => ?e); - SNAP_COUNTER.generate.fail.inc(); - }, - ); - } task @ Task::Apply { .. } => { fail_point!("on_region_worker_apply", true, |_| {}); if self.coprocessor_host.should_pre_apply_snapshot() { @@ -945,11 +741,10 @@ where } } -impl RunnableWithTimer for Runner +impl RunnableWithTimer for Runner where EK: KvEngine, R: CasualRouter + Send + Clone + 'static, - T: PdClient + 'static, { fn on_timeout(&mut self) { self.handle_pending_applies(true); @@ -969,15 +764,15 @@ where pub(crate) mod tests { use std::{ io, - sync::{atomic::AtomicUsize, mpsc, Arc}, + sync::{ + atomic::{AtomicBool, AtomicUsize}, + mpsc, Arc, + }, thread, time::Duration, }; - use engine_test::{ - ctor::CfOptions, - kv::{KvTestEngine, KvTestSnapshot}, - }; + use engine_test::{ctor::CfOptions, kv::KvTestEngine}; use engine_traits::{ CompactExt, FlowControlFactorsExt, KvEngine, MiscExt, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, WriteBatch, WriteBatchExt, CF_DEFAULT, CF_WRITE, @@ -999,8 +794,10 @@ pub(crate) mod tests { ObserverContext, }, store::{ - peer_storage::JOB_STATUS_PENDING, snap::tests::get_test_db_for_regions, - worker::RegionRunner, CasualMessage, SnapKey, SnapManager, + peer_storage::JOB_STATUS_PENDING, + snap::tests::get_test_db_for_regions, + worker::{RegionRunner, SnapGenRunner, SnapGenTask}, + CasualMessage, SnapKey, SnapManager, }, }; @@ -1109,7 +906,7 @@ pub(crate) mod tests { let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); let bg_worker = Worker::new("region-worker"); - let mut worker: LazyWorker> = bg_worker.lazy_build("region-worker"); + let mut worker: LazyWorker = bg_worker.lazy_build("region-worker"); let sched = worker.scheduler(); let (router, _) = mpsc::sync_channel(11); let cfg = make_raftstore_cfg(false); @@ -1119,7 +916,6 @@ pub(crate) mod tests { cfg, CoprocessorHost::::default(), router, - Option::>::None, ); runner.clean_stale_check_interval = Duration::from_millis(100); @@ -1223,13 +1019,23 @@ pub(crate) mod tests { let cfg = make_raftstore_cfg(true); let runner = RegionRunner::new( engine.kv.clone(), - mgr, - cfg, + mgr.clone(), + cfg.clone(), host, + router.clone(), + ); + worker.start_with_timer(runner); + + let mut snap_gen_worker = LazyWorker::new("snap-generator"); + let snap_gen_sched = snap_gen_worker.scheduler(); + let snap_gen_runner = SnapGenRunner::new( + engine.kv.clone(), + mgr, router, Option::>::None, + snap_gen_worker.pool(), ); - worker.start_with_timer(runner); + snap_gen_worker.start(snap_gen_runner); let gen_and_apply_snap = |id: u64| { // construct snapshot @@ -1241,8 +1047,8 @@ pub(crate) mod tests { .unwrap(); let idx = apply_state.get_applied_index(); let entry = engine.raft.get_entry(id, idx).unwrap().unwrap(); - sched - .schedule(Task::Gen { + snap_gen_sched + .schedule(SnapGenTask::Gen { region_id: id, kv_snap: engine.kv.snapshot(), last_applied_term: entry.get_term(), diff --git a/components/raftstore/src/store/worker/snap_gen.rs b/components/raftstore/src/store/worker/snap_gen.rs new file mode 100644 index 00000000000..b791ff6e9e1 --- /dev/null +++ b/components/raftstore/src/store/worker/snap_gen.rs @@ -0,0 +1,269 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fmt::{self, Display, Formatter}, + sync::{ + atomic::{AtomicBool, Ordering}, + mpsc::SyncSender, + Arc, + }, + u64, +}; + +use collections::HashMap; +use engine_traits::KvEngine; +use fail::fail_point; +use file_system::{IoType, WithIoType}; +use kvproto::raft_serverpb::RaftApplyState; +use pd_client::PdClient; +use raft::eraftpb::Snapshot as RaftSnapshot; +use tikv_util::{ + box_try, error, info, + time::{Instant, UnixSecs}, + worker::Runnable, + yatp_pool::FuturePool, +}; + +use super::metrics::*; +use crate::store::{self, snap::Result, transport::CasualRouter, CasualMessage, SnapManager}; + +pub const SNAP_GENERATOR_MAX_POOL_SIZE: usize = 16; + +const TIFLASH: &str = "tiflash"; +const ENGINE: &str = "engine"; + +/// Defines the snapshot generation task. +#[derive(Debug)] +pub enum Task { + Gen { + region_id: u64, + last_applied_term: u64, + last_applied_state: RaftApplyState, + kv_snap: S, + canceled: Arc, + notifier: SyncSender, + for_balance: bool, + to_store_id: u64, + }, +} + +impl Display for Task { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match *self { + Task::Gen { region_id, .. } => write!(f, "Snap gen for {}", region_id), + } + } +} + +struct SnapGenContext { + engine: EK, + mgr: SnapManager, + router: R, + start: UnixSecs, +} + +impl SnapGenContext +where + EK: KvEngine, + R: CasualRouter, +{ + /// Generates the snapshot of the Region. + fn generate_snap( + &self, + region_id: u64, + last_applied_term: u64, + last_applied_state: RaftApplyState, + kv_snap: EK::Snapshot, + notifier: SyncSender, + for_balance: bool, + allow_multi_files_snapshot: bool, + ) -> Result<()> { + // do we need to check leader here? + let snap = box_try!(store::do_snapshot::( + self.mgr.clone(), + &self.engine, + kv_snap, + region_id, + last_applied_term, + last_applied_state, + for_balance, + allow_multi_files_snapshot, + self.start + )); + // Only enable the fail point when the region id is equal to 1, which is + // the id of bootstrapped region in tests. + fail_point!("region_gen_snap", region_id == 1, |_| Ok(())); + if let Err(e) = notifier.try_send(snap) { + info!( + "failed to notify snap result, leadership may have changed, ignore error"; + "region_id" => region_id, + "err" => %e, + ); + } + // The error can be ignored as snapshot will be sent in next heartbeat in the + // end. + let _ = self + .router + .send(region_id, CasualMessage::SnapshotGenerated); + Ok(()) + } + + /// Handles the task of generating snapshot of the Region. It calls + /// `generate_snap` to do the actual work. + fn handle_gen( + &self, + region_id: u64, + last_applied_term: u64, + last_applied_state: RaftApplyState, + kv_snap: EK::Snapshot, + canceled: Arc, + notifier: SyncSender, + for_balance: bool, + allow_multi_files_snapshot: bool, + ) { + fail_point!("before_region_gen_snap", |_| ()); + SNAP_COUNTER.generate.start.inc(); + if canceled.load(Ordering::Relaxed) { + info!("generate snap is canceled"; "region_id" => region_id); + SNAP_COUNTER.generate.abort.inc(); + return; + } + + let start = Instant::now(); + let _io_type_guard = WithIoType::new(if for_balance { + IoType::LoadBalance + } else { + IoType::Replication + }); + + if let Err(e) = self.generate_snap( + region_id, + last_applied_term, + last_applied_state, + kv_snap, + notifier, + for_balance, + allow_multi_files_snapshot, + ) { + error!(%e; "failed to generate snap!!!"; "region_id" => region_id,); + SNAP_COUNTER.generate.fail.inc(); + return; + } + + SNAP_COUNTER.generate.success.inc(); + SNAP_HISTOGRAM + .generate + .observe(start.saturating_elapsed_secs()); + } +} + +pub struct Runner +where + EK: KvEngine, + T: PdClient + 'static, +{ + tiflash_stores: HashMap, + + engine: EK, + mgr: SnapManager, + router: R, + pd_client: Option>, + pool: FuturePool, +} + +impl Runner +where + EK: KvEngine, + R: CasualRouter, + T: PdClient + 'static, +{ + pub fn new( + engine: EK, + mgr: SnapManager, + router: R, + pd_client: Option>, + pool: FuturePool, + ) -> Runner { + Runner { + tiflash_stores: HashMap::default(), + engine, + mgr, + router, + pd_client, + pool, + } + } +} + +impl Runnable for Runner +where + EK: KvEngine, + R: CasualRouter + Send + Clone + 'static, + T: PdClient, +{ + type Task = Task; + + fn run(&mut self, task: Task) { + match task { + Task::Gen { + region_id, + last_applied_term, + last_applied_state, + kv_snap, + canceled, + notifier, + for_balance, + to_store_id, + } => { + let mut allow_multi_files_snapshot = false; + // if to_store_id is 0, it means the to_store_id cannot be found + if to_store_id != 0 { + if let Some(is_tiflash) = self.tiflash_stores.get(&to_store_id) { + allow_multi_files_snapshot = !is_tiflash; + } else { + let is_tiflash = self.pd_client.as_ref().map_or(false, |pd_client| { + if let Ok(s) = pd_client.get_store(to_store_id) { + return s.get_labels().iter().any(|label| { + label.get_key().to_lowercase() == ENGINE + && label.get_value().to_lowercase() == TIFLASH + }); + } + true + }); + self.tiflash_stores.insert(to_store_id, is_tiflash); + allow_multi_files_snapshot = !is_tiflash; + } + } + SNAP_COUNTER.generate.all.inc(); + let ctx = SnapGenContext { + engine: self.engine.clone(), + mgr: self.mgr.clone(), + router: self.router.clone(), + start: UnixSecs::now(), + }; + + let scheduled_time = Instant::now_coarse(); + self.pool.spawn(async move { + SNAP_GEN_WAIT_DURATION_HISTOGRAM + .observe(scheduled_time.saturating_elapsed_secs()); + + ctx.handle_gen( + region_id, + last_applied_term, + last_applied_state, + kv_snap, + canceled, + notifier, + for_balance, + allow_multi_files_snapshot, + ); + }).unwrap_or_else( + |e| { + error!("failed to generate snapshot"; "region_id" => region_id, "err" => ?e); + SNAP_COUNTER.generate.fail.inc(); + }, + ); + } + } + } +} diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index a5c9e1b323a..72cab2ff0d6 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -300,7 +300,9 @@ pub fn dummy_scheduler() -> (Scheduler, ReceiverWrapper #[derive(Copy, Clone)] pub struct Builder> { name: S, - thread_count: usize, + core_thread_count: usize, + min_thread_count: Option, + max_thread_count: Option, pending_capacity: usize, } @@ -308,7 +310,9 @@ impl> Builder { pub fn new(name: S) -> Self { Builder { name, - thread_count: 1, + core_thread_count: 1, + min_thread_count: None, + max_thread_count: None, pending_capacity: usize::MAX, } } @@ -322,21 +326,32 @@ impl> Builder { #[must_use] pub fn thread_count(mut self, thread_count: usize) -> Self { - self.thread_count = thread_count; + self.core_thread_count = thread_count; + self + } + + #[must_use] + pub fn thread_count_limits(mut self, min_thread_count: usize, max_thread_count: usize) -> Self { + self.min_thread_count = Some(min_thread_count); + self.max_thread_count = Some(max_thread_count); self } pub fn create(self) -> Worker { let pool = YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix(self.name) - .thread_count(self.thread_count, self.thread_count, self.thread_count) + .thread_count( + self.min_thread_count.unwrap_or(self.core_thread_count), + self.core_thread_count, + self.max_thread_count.unwrap_or(self.core_thread_count), + ) .build_future_pool(); Worker { stop: Arc::new(AtomicBool::new(false)), pool, counter: Arc::new(AtomicUsize::new(0)), pending_capacity: self.pending_capacity, - thread_count: self.thread_count, + thread_count: self.core_thread_count, } } } From f51e8489e208650f91c1434f430fabe20df70cac Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 1 Nov 2024 15:28:15 +0800 Subject: [PATCH 05/86] batch_system: Add fsm schedule related metrics (#17723) ref tikv/tikv#15990 Add fsm schedule related metrics Signed-off-by: Connor Signed-off-by: Connor1996 Co-authored-by: Bisheng Huang --- Cargo.lock | 1 + components/batch-system/Cargo.toml | 1 + components/batch-system/src/batch.rs | 82 +- components/batch-system/src/fsm.rs | 4 +- components/batch-system/src/lib.rs | 1 + components/batch-system/src/metrics.rs | 78 + components/batch-system/src/scheduler.rs | 15 +- components/batch-system/src/test_runner.rs | 2 + components/raftstore-v2/src/fsm/peer.rs | 2 + components/raftstore-v2/src/fsm/store.rs | 2 + components/raftstore/src/store/fsm/apply.rs | 11 +- components/raftstore/src/store/fsm/peer.rs | 35 +- components/raftstore/src/store/fsm/store.rs | 6 +- components/raftstore/src/store/metrics.rs | 10 +- metrics/grafana/tikv_details.dashboard.py | 90 + metrics/grafana/tikv_details.json | 1890 +++++++++++++++---- metrics/grafana/tikv_details.json.sha256 | 2 +- 17 files changed, 1778 insertions(+), 454 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b109e0255b3..0504f5dc74e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -545,6 +545,7 @@ dependencies = [ "lazy_static", "online_config", "prometheus", + "prometheus-static-metric", "resource_control", "serde", "slog", diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index 09607aa8fe2..5f6cf792c6d 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -18,6 +18,7 @@ kvproto = { workspace = true } lazy_static = "1.3" online_config = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } +prometheus-static-metric = "0.5" resource_control = { workspace = true } serde = { version = "1.0", features = ["derive"] } slog = { workspace = true } diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index c09b63b33cf..8cc873bd4b6 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -30,35 +30,61 @@ use crate::{ config::Config, fsm::{Fsm, FsmScheduler, Priority}, mailbox::BasicMailbox, + metrics::*, router::Router, scheduler::{ControlScheduler, NormalScheduler}, }; /// A unify type for FSMs so that they can be sent to channel easily. pub enum FsmTypes { - Normal(Box), - Control(Box), + Normal((Box, Instant)), + Control((Box, Instant)), // Used as a signal that scheduler should be shutdown. Empty, } -pub struct NormalFsm { + +struct MetricsCollector { + timer: Instant, // time since polled + round: usize, // how many round the fsm has been continuously polled + _phantom: std::marker::PhantomData, +} + +impl MetricsCollector { + fn new() -> MetricsCollector { + MetricsCollector { + timer: Instant::now_coarse(), + round: 0, + _phantom: std::marker::PhantomData, + } + } +} + +impl Drop for MetricsCollector { + fn drop(&mut self) { + FSM_POLL_ROUND.get(N::FSM_TYPE).observe(self.round as f64); + FSM_POLL_DURATION + .get(N::FSM_TYPE) + .observe(self.timer.saturating_elapsed_secs()); + } +} + +pub struct NormalFsm { fsm: Box, - timer: Instant, + metrics: MetricsCollector, policy: Option, } -impl NormalFsm { +impl NormalFsm { #[inline] fn new(fsm: Box) -> NormalFsm { NormalFsm { fsm, - timer: Instant::now_coarse(), + metrics: MetricsCollector::::new(), policy: None, } } } - -impl Deref for NormalFsm { +impl Deref for NormalFsm { type Target = N; #[inline] @@ -67,7 +93,7 @@ impl Deref for NormalFsm { } } -impl DerefMut for NormalFsm { +impl DerefMut for NormalFsm { #[inline] fn deref_mut(&mut self) -> &mut N { &mut self.fsm @@ -76,7 +102,7 @@ impl DerefMut for NormalFsm { /// A basic struct for a round of polling. #[allow(clippy::vec_box)] -pub struct Batch { +pub struct Batch { normals: Vec>>, control: Option>, } @@ -90,12 +116,27 @@ impl Batch { } } + fn tick_round(&mut self) { + FSM_COUNT_PER_POLL + .get(N::FSM_TYPE) + .observe(self.normals.len() as f64); + for f in self.normals.iter_mut().filter_map(Option::as_mut) { + f.metrics.round += 1; + } + } + fn push(&mut self, fsm: FsmTypes) -> bool { match fsm { - FsmTypes::Normal(n) => { + FsmTypes::Normal((n, schedule_time)) => { + FSM_SCHEDULE_WAIT_DURATION + .get(N::FSM_TYPE) + .observe(schedule_time.saturating_elapsed_secs()); self.normals.push(Some(NormalFsm::new(n))); } - FsmTypes::Control(c) => { + FsmTypes::Control((c, schedule_time)) => { + FSM_SCHEDULE_WAIT_DURATION + .get(C::FSM_TYPE) + .observe(schedule_time.saturating_elapsed_secs()); assert!(self.control.is_none()); self.control = Some(c); } @@ -167,6 +208,7 @@ impl Batch { Some(ReschedulePolicy::Release(l)) => self.release(to_schedule, l), Some(ReschedulePolicy::Remove) => self.remove(to_schedule), Some(ReschedulePolicy::Schedule) => { + FSM_RESCHEDULE_COUNTER.get(N::FSM_TYPE).inc(); router.normal_scheduler.schedule(to_schedule.fsm); None } @@ -354,15 +396,9 @@ impl> Poller { // hungry if some regions are hot points. let mut max_batch_size = std::cmp::max(self.max_batch_size, batch.normals.len()); // Update some online config if needed. - { - // TODO: rust 2018 does not support capture disjoint field within a closure. - // See https://github.com/rust-lang/rust/issues/53488 for more details. - // We can remove this once we upgrade to rust 2021 or later edition. - let batch_size = &mut self.max_batch_size; - self.handler.begin(max_batch_size, |cfg| { - *batch_size = cfg.max_batch_size(); - }); - } + self.handler.begin(max_batch_size, |cfg| { + self.max_batch_size = cfg.max_batch_size(); + }); max_batch_size = std::cmp::max(self.max_batch_size, batch.normals.len()); if batch.control.is_some() { @@ -385,7 +421,7 @@ impl> Poller { p.policy = Some(ReschedulePolicy::Schedule); reschedule_fsms.push(i); } else { - if p.timer.saturating_elapsed() >= self.reschedule_duration { + if p.metrics.timer.saturating_elapsed() >= self.reschedule_duration { hot_fsm_count += 1; // We should only reschedule a half of the hot regions, otherwise, // it's possible all the hot regions are fetched in a batch the @@ -439,6 +475,8 @@ impl> Poller { to_skip_end.clear(); self.handler.end(&mut batch.normals); + // Update round times for metrics. + batch.tick_round(); // Iterate larger index first, so that `swap_reclaim` won't affect other FSMs // in the list. for index in reschedule_fsms.iter().rev() { diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 148550760c4..9002e16e0a2 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -12,7 +12,7 @@ use std::{ use resource_control::ResourceMetered; -use crate::mailbox::BasicMailbox; +use crate::{mailbox::BasicMailbox, metrics::FsmType}; #[derive(Clone, Copy, Debug, PartialEq)] pub enum Priority { @@ -41,6 +41,8 @@ pub trait FsmScheduler { pub trait Fsm: Send + 'static { type Message: Send + ResourceMetered; + const FSM_TYPE: FsmType; + fn is_stopped(&self) -> bool; /// Set a mailbox to FSM, which should be used to send message to itself. diff --git a/components/batch-system/src/lib.rs b/components/batch-system/src/lib.rs index 2e59d42808c..1a45f2c5cdf 100644 --- a/components/batch-system/src/lib.rs +++ b/components/batch-system/src/lib.rs @@ -19,5 +19,6 @@ pub use self::{ config::Config, fsm::{Fsm, FsmScheduler, Priority}, mailbox::{BasicMailbox, Mailbox}, + metrics::FsmType, router::Router, }; diff --git a/components/batch-system/src/metrics.rs b/components/batch-system/src/metrics.rs index a4728f32ad7..f5a1e0ec616 100644 --- a/components/batch-system/src/metrics.rs +++ b/components/batch-system/src/metrics.rs @@ -2,6 +2,35 @@ use lazy_static::lazy_static; use prometheus::*; +use prometheus_static_metric::*; + +make_auto_flush_static_metric! { + pub label_enum FsmType { + store, + apply, + } + + pub struct FsmRescheduleCounterVec: LocalIntCounter { + "type" => FsmType, + } + + pub struct FsmScheduleWaitDurationVec: LocalHistogram { + "type" => FsmType, + } + + pub struct FsmPollDurationVec: LocalHistogram { + "type" => FsmType, + } + + pub struct FsmPollRoundVec: LocalHistogram { + "type" => FsmType, + } + + pub struct FsmCountPerPollVec: LocalHistogram { + "type" => FsmType, + } + +} lazy_static! { pub static ref CHANNEL_FULL_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( @@ -11,6 +40,55 @@ lazy_static! { ) .unwrap(); + pub static ref FSM_RESCHEDULE_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( + "tikv_batch_system_fsm_reschedule_total", + "Total number of fsm reschedule.", + &["type"] + ) + .unwrap(); + pub static ref FSM_RESCHEDULE_COUNTER: FsmRescheduleCounterVec = + auto_flush_from!(FSM_RESCHEDULE_COUNTER_VEC, FsmRescheduleCounterVec); + + pub static ref FSM_SCHEDULE_WAIT_DURATION_VEC: HistogramVec = + register_histogram_vec!( + "tikv_batch_system_fsm_schedule_wait_seconds", + "Duration of fsm waiting to be polled.", + &["type"], + exponential_buckets(0.001, 1.59, 20).unwrap(), // max 10s + ).unwrap(); + pub static ref FSM_SCHEDULE_WAIT_DURATION: FsmScheduleWaitDurationVec = + auto_flush_from!(FSM_SCHEDULE_WAIT_DURATION_VEC, FsmScheduleWaitDurationVec); + + pub static ref FSM_POLL_DURATION_VEC: HistogramVec = + register_histogram_vec!( + "tikv_batch_system_fsm_poll_seconds", + "Total time for an FSM to finish processing all messages, potentially over multiple polling rounds.", + &["type"], + exponential_buckets(0.001, 1.59, 20).unwrap(), // max 10s + ).unwrap(); + pub static ref FSM_POLL_DURATION: FsmPollDurationVec = + auto_flush_from!(FSM_POLL_DURATION_VEC, FsmPollDurationVec); + + pub static ref FSM_POLL_ROUND_VEC: HistogramVec = + register_histogram_vec!( + "tikv_batch_system_fsm_poll_rounds", + "Number of polling rounds for an FSM to finish processing all messages.", + &["type"], + exponential_buckets(1.0, 2.0, 20).unwrap(), + ).unwrap(); + pub static ref FSM_POLL_ROUND: FsmPollRoundVec = + auto_flush_from!(FSM_POLL_ROUND_VEC, FsmPollRoundVec); + + pub static ref FSM_COUNT_PER_POLL_VEC: HistogramVec = + register_histogram_vec!( + "tikv_batch_system_fsm_count_per_poll", + "Number of fsm polled in one poll.", + &["type"], + exponential_buckets(1.0, 2.0, 20).unwrap(), + ).unwrap(); + pub static ref FSM_COUNT_PER_POLL: FsmCountPerPollVec = + auto_flush_from!(FSM_COUNT_PER_POLL_VEC, FsmCountPerPollVec); + pub static ref BROADCAST_NORMAL_DURATION: Histogram = register_histogram!( "tikv_broadcast_normal_duration_seconds", diff --git a/components/batch-system/src/scheduler.rs b/components/batch-system/src/scheduler.rs index 723863249fb..12db92b8fa7 100644 --- a/components/batch-system/src/scheduler.rs +++ b/components/batch-system/src/scheduler.rs @@ -2,7 +2,7 @@ use crossbeam::channel::SendError; use resource_control::channel::Sender; -use tikv_util::warn; +use tikv_util::{time::Instant, warn}; use crate::{ fsm::{Fsm, FsmScheduler, Priority}, @@ -44,9 +44,9 @@ where Priority::Low => &self.low_sender, }; - match sender.send(FsmTypes::Normal(fsm), None) { + match sender.send(FsmTypes::Normal((fsm, Instant::now_coarse())), None) { Ok(_) => {} - Err(SendError(FsmTypes::Normal(fsm))) => warn!("failed to schedule fsm {:p}", fsm), + Err(SendError(FsmTypes::Normal((fsm, _)))) => warn!("failed to schedule fsm {:p}", fsm), _ => unreachable!(), } } @@ -88,9 +88,14 @@ where #[inline] fn schedule(&self, fsm: Box) { - match self.sender.send(FsmTypes::Control(fsm), None) { + match self + .sender + .send(FsmTypes::Control((fsm, Instant::now_coarse())), None) + { Ok(_) => {} - Err(SendError(FsmTypes::Control(fsm))) => warn!("failed to schedule fsm {:p}", fsm), + Err(SendError(FsmTypes::Control((fsm, _)))) => { + warn!("failed to schedule fsm {:p}", fsm) + } _ => unreachable!(), } } diff --git a/components/batch-system/src/test_runner.rs b/components/batch-system/src/test_runner.rs index ad9c3f54d04..7723fa88882 100644 --- a/components/batch-system/src/test_runner.rs +++ b/components/batch-system/src/test_runner.rs @@ -54,6 +54,8 @@ pub struct Runner { impl Fsm for Runner { type Message = Message; + const FSM_TYPE: FsmType = FsmType::store; + fn is_stopped(&self) -> bool { self.is_stopped } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 47a1aee1ef4..f41cd1cc76e 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -103,6 +103,8 @@ impl PeerFsm { impl Fsm for PeerFsm { type Message = PeerMsg; + const FSM_TYPE: batch_system::FsmType = batch_system::FsmType::store; + #[inline] fn is_stopped(&self) -> bool { self.is_stopped diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 0fa5927e3d4..107c35f5559 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -233,6 +233,8 @@ impl StoreFsm { impl Fsm for StoreFsm { type Message = StoreMsg; + const FSM_TYPE: batch_system::FsmType = batch_system::FsmType::store; + #[inline] fn is_stopped(&self) -> bool { false diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index eadb3103fda..03741bebc11 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -23,8 +23,8 @@ use std::{ }; use batch_system::{ - BasicMailbox, BatchRouter, BatchSystem, Config as BatchSystemConfig, Fsm, HandleResult, - HandlerBuilder, PollHandler, Priority, + BasicMailbox, BatchRouter, BatchSystem, Config as BatchSystemConfig, Fsm, FsmType, + HandleResult, HandlerBuilder, PollHandler, Priority, }; use collections::{HashMap, HashMapEntry, HashSet}; use crossbeam::channel::{TryRecvError, TrySendError}; @@ -454,6 +454,7 @@ where /// The pending inspector should be cleaned at the end of a write. pending_latency_inspect: Vec, apply_wait: LocalHistogram, + apply_msg_len: LocalHistogram, apply_time: LocalHistogram, key_size: LocalHistogram, value_size: LocalHistogram, @@ -525,6 +526,7 @@ where pending_ssts: vec![], pending_latency_inspect: vec![], apply_wait: APPLY_TASK_WAIT_TIME_HISTOGRAM.local(), + apply_msg_len: APPLY_MSG_LEN.local(), apply_time: APPLY_TIME_HISTOGRAM.local(), key_size: STORE_APPLY_KEY_SIZE_HISTOGRAM.local(), value_size: STORE_APPLY_VALUE_SIZE_HISTOGRAM.local(), @@ -4448,6 +4450,7 @@ where #[allow(clippy::vec_box)] fn handle_tasks(&mut self, apply_ctx: &mut ApplyContext, msgs: &mut Vec>>) { + apply_ctx.apply_msg_len.observe(msgs.len() as f64); let mut drainer = msgs.drain(..); let mut batch_apply = None; loop { @@ -4559,6 +4562,8 @@ where { type Message = Box>; + const FSM_TYPE: FsmType = FsmType::apply; + #[inline] fn is_stopped(&self) -> bool { self.delegate.stopped @@ -4654,6 +4659,8 @@ impl ControlFsm { impl Fsm for ControlFsm { type Message = ControlMsg; + const FSM_TYPE: FsmType = FsmType::apply; + #[inline] fn is_stopped(&self) -> bool { self.stopped diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 42f8873bf36..1c06d9f1496 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -16,7 +16,7 @@ use std::{ u64, }; -use batch_system::{BasicMailbox, Fsm}; +use batch_system::{BasicMailbox, Fsm, FsmType}; use collections::{HashMap, HashSet}; use engine_traits::{ Engines, KvEngine, RaftEngine, RaftLogBatch, SstMetaInfo, WriteBatchExt, CF_LOCK, CF_RAFT, @@ -576,6 +576,8 @@ where { type Message = PeerMsg; + const FSM_TYPE: FsmType = FsmType::store; + #[inline] fn is_stopped(&self) -> bool { self.stopped @@ -761,22 +763,27 @@ where // Propose batch request which may be still waiting for more raft-command if should_propose && !force_delay_fp() { self.propose_pending_batch_raft_command(); - } else if self.fsm.batch_req_builder.has_proposed_cb - && self.fsm.batch_req_builder.propose_checked.is_none() - && let Some(cmd) = self.fsm.batch_req_builder.request.take() - { - // We are delaying these requests to next loop. Try to fulfill their - // proposed callback early. - self.fsm.batch_req_builder.propose_checked = Some(false); - if let Ok(None) = self.pre_propose_raft_command(&cmd) { - if self.fsm.peer.will_likely_propose(&cmd) { - self.fsm.batch_req_builder.propose_checked = Some(true); - for cb in &mut self.fsm.batch_req_builder.callbacks { - cb.invoke_proposed(); + } else { + if self.fsm.batch_req_builder.has_proposed_cb + && self.fsm.batch_req_builder.propose_checked.is_none() + && let Some(cmd) = self.fsm.batch_req_builder.request.take() + { + // We are delaying these requests to next loop. Try to fulfill their + // proposed callback early. + self.fsm.batch_req_builder.propose_checked = Some(false); + if let Ok(None) = self.pre_propose_raft_command(&cmd) { + if self.fsm.peer.will_likely_propose(&cmd) { + self.fsm.batch_req_builder.propose_checked = Some(true); + for cb in &mut self.fsm.batch_req_builder.callbacks { + cb.invoke_proposed(); + } } } + self.fsm.batch_req_builder.request = Some(cmd); + } + if self.fsm.batch_req_builder.request.is_some() { + self.ctx.raft_metrics.ready.propose_delay.inc(); } - self.fsm.batch_req_builder.request = Some(cmd); } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 965b5d38fbb..bb12e8c0ed7 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -19,8 +19,8 @@ use std::{ }; use batch_system::{ - BasicMailbox, BatchRouter, BatchSystem, Config as BatchSystemConfig, Fsm, HandleResult, - HandlerBuilder, PollHandler, Priority, + BasicMailbox, BatchRouter, BatchSystem, Config as BatchSystemConfig, Fsm, FsmType, + HandleResult, HandlerBuilder, PollHandler, Priority, }; use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashMapEntry, HashSet}; @@ -793,6 +793,8 @@ where { type Message = StoreMsg; + const FSM_TYPE: FsmType = FsmType::store; + #[inline] fn is_stopped(&self) -> bool { self.store.stopped diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 6f03f04844c..762ce4d3001 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -144,6 +144,7 @@ make_static_metric! { snapshot, pending_region, has_ready_region, + propose_delay, } pub label_enum RaftSentMessageCounterType { @@ -518,6 +519,13 @@ lazy_static! { exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); + pub static ref APPLY_MSG_LEN: Histogram = + register_histogram!( + "tikv_raftstore_apply_msg_len", + "Length of apply msg.", + exponential_buckets(1.0, 2.0, 20).unwrap() // max 1024 * 1024 + ).unwrap(); + pub static ref STORE_RAFT_READY_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_raftstore_raft_ready_handled_total", @@ -738,7 +746,7 @@ lazy_static! { register_histogram!( "tikv_raftstore_peer_msg_len", "Length of peer msg.", - exponential_buckets(1.0, 2.0, 20).unwrap() // max 1000s + exponential_buckets(1.0, 2.0, 20).unwrap() // max 1024 * 1024 ).unwrap(); pub static ref RAFT_READ_INDEX_PENDING_DURATION: Histogram = diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 2794c863b9c..96456ae1844 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -2306,11 +2306,101 @@ def RaftProcess() -> RowPanel: yaxis=yaxis(format=UNITS.SECONDS), metric="tikv_replica_read_lock_check_duration_seconds_bucket", ), + graph_panel( + title="Fsm reschedule ops", + description="The number of fsm reschedule ops", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_batch_system_fsm_reschedule_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Store fsm schedule wait duration", + description="Duration of store fsm waiting to be polled", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_batch_system_fsm_schedule_wait_seconds_bucket", + label_selectors=['type="store"'], + ), + heatmap_panel( + title="Apply fsm schedule wait duration", + description="Duration of apply fsm waiting to be polled.e", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_batch_system_fsm_schedule_wait_seconds_bucket", + label_selectors=['type="apply"'], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Store fsm poll duration", + description="Total time for an store FSM to finish processing all messages, potentially over multiple polling rounds.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_batch_system_fsm_poll_seconds_bucket", + label_selectors=['type="store"'], + ), + heatmap_panel( + title="Apply fsm poll duration", + description="Total time for an apply FSM to finish processing all messages, potentially over multiple polling rounds", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_batch_system_fsm_poll_seconds_bucket", + label_selectors=['type="apply"'], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Store fsm poll round", + description="Number of polling rounds for an store FSM to finish processing all messages", + metric="tikv_batch_system_fsm_poll_rounds_bucket", + label_selectors=['type="store"'], + ), + heatmap_panel( + title="Apply fsm poll round", + description="Number of polling rounds for an apply FSM to finish processing all messages", + metric="tikv_batch_system_fsm_poll_rounds_bucket", + label_selectors=['type="apply"'], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Store fsm count per poll", + description="Number of store fsm polled in one poll", + metric="tikv_batch_system_fsm_count_per_poll_bucket", + label_selectors=['type="store"'], + ), + heatmap_panel( + title="Apply fsm count per poll", + description="Number of apply fsm polled in one poll", + metric="tikv_batch_system_fsm_count_per_poll_bucket", + label_selectors=['type="apply"'], + ), + ] + ) + layout.row( + [ heatmap_panel( title="Peer msg length distribution", description="The length of peer msgs for each round handling", metric="tikv_raftstore_peer_msg_len_bucket", ), + heatmap_panel( + title="Apply msg length distribution", + description="The length of apply msgs for each round handling", + metric="tikv_raftstore_apply_msg_len_bucket", + ), ] ) layout.row( diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 2e48886a520..df7e9cd62f1 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -21460,6 +21460,139 @@ "yBucketNumber": null, "yBucketSize": null }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The number of fsm reschedule ops", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 153, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_batch_system_fsm_reschedule_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_batch_system_fsm_reschedule_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fsm reschedule ops", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, { "cacheTimeout": null, "cards": { @@ -21477,7 +21610,112 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The length of peer msgs for each round handling", + "description": "Duration of store fsm waiting to be polled", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 154, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_schedule_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_schedule_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Store fsm schedule wait duration", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Duration of apply fsm waiting to be polled.e", "editable": true, "error": false, "fieldConfig": { @@ -21492,14 +21730,749 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 14 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 153, + "id": 155, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_schedule_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_schedule_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply fsm schedule wait duration", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Total time for an store FSM to finish processing all messages, potentially over multiple polling rounds.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 156, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_poll_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_poll_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Store fsm poll duration", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Total time for an apply FSM to finish processing all messages, potentially over multiple polling rounds", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 157, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_poll_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_poll_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply fsm poll duration", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Number of polling rounds for an store FSM to finish processing all messages", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 158, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_poll_rounds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_poll_rounds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Store fsm poll round", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Number of polling rounds for an apply FSM to finish processing all messages", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 159, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_poll_rounds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_poll_rounds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply fsm poll round", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Number of store fsm polled in one poll", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 160, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_count_per_poll_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_count_per_poll_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"store\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Store fsm count per poll", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Number of apply fsm polled in one poll", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 161, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_batch_system_fsm_count_per_poll_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_batch_system_fsm_count_per_poll_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply fsm count per poll", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The length of peer msgs for each round handling", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 162, "interval": null, "legend": { "show": false @@ -21565,6 +22538,111 @@ "yBucketNumber": null, "yBucketSize": null }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The length of apply msgs for each round handling", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 163, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_raftstore_apply_msg_len_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_raftstore_apply_msg_len_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply msg length distribution", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, @@ -21593,11 +22671,11 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 154, + "id": 164, "interval": null, "isNew": true, "legend": { @@ -21726,11 +22804,11 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 155, + "id": 165, "interval": null, "isNew": true, "legend": { @@ -21881,7 +22959,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 156, + "id": 166, "interval": null, "links": [], "maxDataPoints": 100, @@ -21920,7 +22998,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 157, + "id": 167, "interval": null, "isNew": true, "legend": { @@ -22053,7 +23131,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 158, + "id": 168, "interval": null, "isNew": true, "legend": { @@ -22186,7 +23264,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 159, + "id": 169, "interval": null, "isNew": true, "legend": { @@ -22319,7 +23397,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 160, + "id": 170, "interval": null, "isNew": true, "legend": { @@ -22452,7 +23530,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 161, + "id": 171, "interval": null, "isNew": true, "legend": { @@ -22585,7 +23663,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 162, + "id": 172, "interval": null, "isNew": true, "legend": { @@ -22736,7 +23814,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 163, + "id": 173, "interval": null, "links": [], "maxDataPoints": 100, @@ -22775,7 +23853,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 164, + "id": 174, "interval": null, "isNew": true, "legend": { @@ -22908,7 +23986,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 165, + "id": 175, "interval": null, "isNew": true, "legend": { @@ -23041,7 +24119,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 166, + "id": 176, "interval": null, "isNew": true, "legend": { @@ -23174,7 +24252,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 167, + "id": 177, "interval": null, "isNew": true, "legend": { @@ -23307,7 +24385,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 168, + "id": 178, "interval": null, "isNew": true, "legend": { @@ -23440,7 +24518,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 169, + "id": 179, "interval": null, "isNew": true, "legend": { @@ -23603,7 +24681,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 170, + "id": 180, "interval": null, "isNew": true, "legend": { @@ -23739,7 +24817,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 171, + "id": 181, "interval": null, "links": [], "maxDataPoints": 100, @@ -23778,7 +24856,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 172, + "id": 182, "interval": null, "isNew": true, "legend": { @@ -23926,7 +25004,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 173, + "id": 183, "interval": null, "isNew": true, "legend": { @@ -24074,7 +25152,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 174, + "id": 184, "interval": null, "isNew": true, "legend": { @@ -24207,7 +25285,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 175, + "id": 185, "interval": null, "isNew": true, "legend": { @@ -24340,7 +25418,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 176, + "id": 186, "interval": null, "isNew": true, "legend": { @@ -24473,7 +25551,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 177, + "id": 187, "interval": null, "isNew": true, "legend": { @@ -24606,7 +25684,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 178, + "id": 188, "interval": null, "isNew": true, "legend": { @@ -24739,7 +25817,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 179, + "id": 189, "interval": null, "isNew": true, "legend": { @@ -24872,7 +25950,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 180, + "id": 190, "interval": null, "isNew": true, "legend": { @@ -25049,7 +26127,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 181, + "id": 191, "interval": null, "links": [], "maxDataPoints": 100, @@ -25088,7 +26166,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 182, + "id": 192, "interval": null, "isNew": true, "legend": { @@ -25251,7 +26329,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 183, + "id": 193, "interval": null, "isNew": true, "legend": { @@ -25452,7 +26530,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 184, + "id": 194, "interval": null, "isNew": true, "legend": { @@ -25600,7 +26678,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 185, + "id": 195, "interval": null, "isNew": true, "legend": { @@ -25763,7 +26841,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 186, + "id": 196, "interval": null, "isNew": true, "legend": { @@ -25964,7 +27042,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 187, + "id": 197, "interval": null, "isNew": true, "legend": { @@ -26142,7 +27220,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 188, + "id": 198, "interval": null, "isNew": true, "legend": { @@ -26305,7 +27383,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 189, + "id": 199, "interval": null, "isNew": true, "legend": { @@ -26468,7 +27546,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 190, + "id": 200, "interval": null, "isNew": true, "legend": { @@ -26601,7 +27679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 191, + "id": 201, "interval": null, "isNew": true, "legend": { @@ -26805,7 +27883,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 192, + "id": 202, "interval": null, "links": [], "maxDataPoints": 100, @@ -26844,7 +27922,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 193, + "id": 203, "interval": null, "isNew": true, "legend": { @@ -27037,7 +28115,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 194, + "id": 204, "interval": null, "isNew": true, "legend": { @@ -27215,7 +28293,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 195, + "id": 205, "interval": null, "isNew": true, "legend": { @@ -27423,7 +28501,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 196, + "id": 206, "interval": null, "isNew": true, "legend": { @@ -27601,7 +28679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 197, + "id": 207, "interval": null, "isNew": true, "legend": { @@ -27764,7 +28842,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 198, + "id": 208, "interval": null, "isNew": true, "legend": { @@ -27942,7 +29020,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 199, + "id": 209, "interval": null, "isNew": true, "legend": { @@ -28075,7 +29153,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 200, + "id": 210, "interval": null, "isNew": true, "legend": { @@ -28253,7 +29331,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 201, + "id": 211, "interval": null, "isNew": true, "legend": { @@ -28386,7 +29464,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 202, + "id": 212, "interval": null, "isNew": true, "legend": { @@ -28564,7 +29642,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 203, + "id": 213, "interval": null, "isNew": true, "legend": { @@ -28697,7 +29775,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 204, + "id": 214, "interval": null, "isNew": true, "legend": { @@ -28875,7 +29953,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 205, + "id": 215, "interval": null, "isNew": true, "legend": { @@ -29053,7 +30131,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 206, + "id": 216, "interval": null, "isNew": true, "legend": { @@ -29231,7 +30309,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 207, + "id": 217, "interval": null, "isNew": true, "legend": { @@ -29364,7 +30442,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 208, + "id": 218, "interval": null, "isNew": true, "legend": { @@ -29497,7 +30575,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 209, + "id": 219, "interval": null, "isNew": true, "legend": { @@ -29630,7 +30708,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 210, + "id": 220, "interval": null, "isNew": true, "legend": { @@ -29853,7 +30931,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 211, + "id": 221, "interval": null, "isNew": true, "legend": { @@ -30046,7 +31124,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 212, + "id": 222, "interval": null, "isNew": true, "legend": { @@ -30209,7 +31287,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 213, + "id": 223, "interval": null, "isNew": true, "legend": { @@ -30402,7 +31480,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 214, + "id": 224, "interval": null, "isNew": true, "legend": { @@ -30550,7 +31628,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 215, + "id": 225, "interval": null, "isNew": true, "legend": { @@ -30683,7 +31761,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 216, + "id": 226, "interval": null, "isNew": true, "legend": { @@ -30831,7 +31909,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 217, + "id": 227, "interval": null, "isNew": true, "legend": { @@ -31009,7 +32087,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 218, + "id": 228, "interval": null, "isNew": true, "legend": { @@ -31172,7 +32250,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 219, + "id": 229, "interval": null, "isNew": true, "legend": { @@ -31350,7 +32428,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 220, + "id": 230, "interval": null, "isNew": true, "legend": { @@ -31483,7 +32561,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 221, + "id": 231, "interval": null, "isNew": true, "legend": { @@ -31616,7 +32694,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 222, + "id": 232, "interval": null, "isNew": true, "legend": { @@ -31749,7 +32827,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 223, + "id": 233, "interval": null, "isNew": true, "legend": { @@ -31882,7 +32960,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 224, + "id": 234, "interval": null, "isNew": true, "legend": { @@ -32015,7 +33093,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 225, + "id": 235, "interval": null, "isNew": true, "legend": { @@ -32155,7 +33233,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 226, + "id": 236, "interval": null, "legend": { "show": false @@ -32253,7 +33331,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 227, + "id": 237, "interval": null, "isNew": true, "legend": { @@ -32454,7 +33532,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 228, + "id": 238, "interval": null, "isNew": true, "legend": { @@ -32587,7 +33665,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 229, + "id": 239, "interval": null, "isNew": true, "legend": { @@ -32765,7 +33843,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 230, + "id": 240, "interval": null, "isNew": true, "legend": { @@ -32898,7 +33976,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 231, + "id": 241, "interval": null, "isNew": true, "legend": { @@ -33034,7 +34112,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 232, + "id": 242, "interval": null, "links": [], "maxDataPoints": 100, @@ -33073,7 +34151,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 233, + "id": 243, "interval": null, "isNew": true, "legend": { @@ -33221,7 +34299,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 234, + "id": 244, "interval": null, "isNew": true, "legend": { @@ -33369,7 +34447,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 235, + "id": 245, "interval": null, "isNew": true, "legend": { @@ -33502,7 +34580,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 236, + "id": 246, "interval": null, "isNew": true, "legend": { @@ -33635,7 +34713,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 237, + "id": 247, "interval": null, "isNew": true, "legend": { @@ -33813,7 +34891,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 238, + "id": 248, "interval": null, "isNew": true, "legend": { @@ -33991,7 +35069,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 239, + "id": 249, "interval": null, "isNew": true, "legend": { @@ -34169,7 +35247,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 240, + "id": 250, "interval": null, "isNew": true, "legend": { @@ -34302,7 +35380,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 241, + "id": 251, "interval": null, "isNew": true, "legend": { @@ -34480,7 +35558,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 242, + "id": 252, "interval": null, "isNew": true, "legend": { @@ -34613,7 +35691,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 243, + "id": 253, "interval": null, "isNew": true, "legend": { @@ -34776,7 +35854,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 244, + "id": 254, "interval": null, "isNew": true, "legend": { @@ -34954,7 +36032,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 245, + "id": 255, "interval": null, "isNew": true, "legend": { @@ -35132,7 +36210,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 246, + "id": 256, "interval": null, "isNew": true, "legend": { @@ -35310,7 +36388,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 247, + "id": 257, "interval": null, "isNew": true, "legend": { @@ -35443,7 +36521,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 248, + "id": 258, "interval": null, "isNew": true, "legend": { @@ -35621,7 +36699,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 249, + "id": 259, "interval": null, "isNew": true, "legend": { @@ -35754,7 +36832,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 250, + "id": 260, "interval": null, "isNew": true, "legend": { @@ -35932,7 +37010,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 251, + "id": 261, "interval": null, "isNew": true, "legend": { @@ -36065,7 +37143,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 252, + "id": 262, "interval": null, "isNew": true, "legend": { @@ -36198,7 +37276,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 253, + "id": 263, "interval": null, "isNew": true, "legend": { @@ -36376,7 +37454,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 254, + "id": 264, "interval": null, "isNew": true, "legend": { @@ -36554,7 +37632,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 255, + "id": 265, "interval": null, "isNew": true, "legend": { @@ -36687,7 +37765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 256, + "id": 266, "interval": null, "isNew": true, "legend": { @@ -36865,7 +37943,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 257, + "id": 267, "interval": null, "isNew": true, "legend": { @@ -36998,7 +38076,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 258, + "id": 268, "interval": null, "isNew": true, "legend": { @@ -37176,7 +38254,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 259, + "id": 269, "interval": null, "isNew": true, "legend": { @@ -37312,7 +38390,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 260, + "id": 270, "interval": null, "links": [], "maxDataPoints": 100, @@ -37351,7 +38429,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 261, + "id": 271, "interval": null, "isNew": true, "legend": { @@ -37484,7 +38562,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 262, + "id": 272, "interval": null, "isNew": true, "legend": { @@ -37632,7 +38710,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 263, + "id": 273, "interval": null, "isNew": true, "legend": { @@ -37833,7 +38911,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 264, + "id": 274, "interval": null, "isNew": true, "legend": { @@ -37966,7 +39044,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 265, + "id": 275, "interval": null, "isNew": true, "legend": { @@ -38099,7 +39177,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 266, + "id": 276, "interval": null, "isNew": true, "legend": { @@ -38232,7 +39310,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 267, + "id": 277, "interval": null, "isNew": true, "legend": { @@ -38365,7 +39443,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 268, + "id": 278, "interval": null, "isNew": true, "legend": { @@ -38498,7 +39576,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 269, + "id": 279, "interval": null, "isNew": true, "legend": { @@ -38638,7 +39716,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 270, + "id": 280, "interval": null, "legend": { "show": false @@ -38743,7 +39821,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 271, + "id": 281, "interval": null, "legend": { "show": false @@ -38841,7 +39919,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 272, + "id": 282, "interval": null, "isNew": true, "legend": { @@ -38981,7 +40059,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 273, + "id": 283, "interval": null, "legend": { "show": false @@ -39079,7 +40157,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 274, + "id": 284, "interval": null, "isNew": true, "legend": { @@ -39219,7 +40297,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 275, + "id": 285, "interval": null, "legend": { "show": false @@ -39317,7 +40395,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 276, + "id": 286, "interval": null, "isNew": true, "legend": { @@ -39525,7 +40603,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 277, + "id": 287, "interval": null, "legend": { "show": false @@ -39623,7 +40701,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 278, + "id": 288, "interval": null, "isNew": true, "legend": { @@ -39824,7 +40902,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 279, + "id": 289, "interval": null, "isNew": true, "legend": { @@ -40032,7 +41110,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 280, + "id": 290, "interval": null, "isNew": true, "legend": { @@ -40210,7 +41288,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 281, + "id": 291, "interval": null, "isNew": true, "legend": { @@ -40343,7 +41421,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 282, + "id": 292, "interval": null, "isNew": true, "legend": { @@ -40476,7 +41554,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 283, + "id": 293, "interval": null, "isNew": true, "legend": { @@ -40609,7 +41687,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 284, + "id": 294, "interval": null, "isNew": true, "legend": { @@ -40745,7 +41823,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 285, + "id": 295, "interval": null, "links": [], "maxDataPoints": 100, @@ -40784,7 +41862,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 286, + "id": 296, "interval": null, "isNew": true, "legend": { @@ -40932,7 +42010,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 287, + "id": 297, "interval": null, "isNew": true, "legend": { @@ -41072,7 +42150,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 288, + "id": 298, "interval": null, "legend": { "show": false @@ -41170,7 +42248,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 289, + "id": 299, "interval": null, "isNew": true, "legend": { @@ -41303,7 +42381,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 290, + "id": 300, "interval": null, "isNew": true, "legend": { @@ -41436,7 +42514,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 291, + "id": 301, "interval": null, "isNew": true, "legend": { @@ -41614,7 +42692,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 292, + "id": 302, "interval": null, "isNew": true, "legend": { @@ -41777,7 +42855,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 293, + "id": 303, "interval": null, "isNew": true, "legend": { @@ -41925,7 +43003,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 294, + "id": 304, "interval": null, "isNew": true, "legend": { @@ -42058,7 +43136,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 295, + "id": 305, "interval": null, "isNew": true, "legend": { @@ -42194,7 +43272,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 296, + "id": 306, "interval": null, "links": [], "maxDataPoints": 100, @@ -42233,7 +43311,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 297, + "id": 307, "interval": null, "isNew": true, "legend": { @@ -42381,7 +43459,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 298, + "id": 308, "interval": null, "isNew": true, "legend": { @@ -42514,7 +43592,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 299, + "id": 309, "interval": null, "isNew": true, "legend": { @@ -42647,7 +43725,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 300, + "id": 310, "interval": null, "isNew": true, "legend": { @@ -42780,7 +43858,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 301, + "id": 311, "interval": null, "isNew": true, "legend": { @@ -42913,7 +43991,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 302, + "id": 312, "interval": null, "isNew": true, "legend": { @@ -43068,7 +44146,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 303, + "id": 313, "interval": null, "legend": { "show": false @@ -43169,7 +44247,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 304, + "id": 314, "interval": null, "links": [], "maxDataPoints": 100, @@ -43208,7 +44286,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 305, + "id": 315, "interval": null, "isNew": true, "legend": { @@ -43341,7 +44419,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 306, + "id": 316, "interval": null, "isNew": true, "legend": { @@ -43474,7 +44552,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 307, + "id": 317, "interval": null, "isNew": true, "legend": { @@ -43614,7 +44692,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 308, + "id": 318, "interval": null, "legend": { "show": false @@ -43712,7 +44790,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 309, + "id": 319, "interval": null, "isNew": true, "legend": { @@ -43913,7 +44991,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 310, + "id": 320, "interval": null, "isNew": true, "legend": { @@ -44114,7 +45192,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 311, + "id": 321, "interval": null, "isNew": true, "legend": { @@ -44318,7 +45396,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 312, + "id": 322, "interval": null, "links": [], "maxDataPoints": 100, @@ -44357,7 +45435,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 313, + "id": 323, "interval": null, "isNew": true, "legend": { @@ -44505,7 +45583,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 314, + "id": 324, "interval": null, "isNew": true, "legend": { @@ -44706,7 +45784,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 315, + "id": 325, "interval": null, "isNew": true, "legend": { @@ -44907,7 +45985,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 316, + "id": 326, "interval": null, "isNew": true, "legend": { @@ -45108,7 +46186,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 317, + "id": 327, "interval": null, "isNew": true, "legend": { @@ -45309,7 +46387,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 318, + "id": 328, "interval": null, "isNew": true, "legend": { @@ -45442,7 +46520,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 319, + "id": 329, "interval": null, "isNew": true, "legend": { @@ -45575,7 +46653,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 320, + "id": 330, "interval": null, "isNew": true, "legend": { @@ -45708,7 +46786,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 321, + "id": 331, "interval": null, "isNew": true, "legend": { @@ -45841,7 +46919,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 322, + "id": 332, "interval": null, "isNew": true, "legend": { @@ -46049,7 +47127,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 323, + "id": 333, "interval": null, "legend": { "show": false @@ -46150,7 +47228,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 324, + "id": 334, "interval": null, "links": [], "maxDataPoints": 100, @@ -46196,7 +47274,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 325, + "id": 335, "interval": null, "legend": { "show": false @@ -46294,7 +47372,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 326, + "id": 336, "interval": null, "isNew": true, "legend": { @@ -46495,7 +47573,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 327, + "id": 337, "interval": null, "isNew": true, "legend": { @@ -46628,7 +47706,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 328, + "id": 338, "interval": null, "isNew": true, "legend": { @@ -46761,7 +47839,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 329, + "id": 339, "interval": null, "isNew": true, "legend": { @@ -46894,7 +47972,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 330, + "id": 340, "interval": null, "isNew": true, "legend": { @@ -47095,7 +48173,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 331, + "id": 341, "interval": null, "isNew": true, "legend": { @@ -47228,7 +48306,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 332, + "id": 342, "interval": null, "isNew": true, "legend": { @@ -47361,7 +48439,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 333, + "id": 343, "interval": null, "isNew": true, "legend": { @@ -47497,7 +48575,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 334, + "id": 344, "interval": null, "links": [], "maxDataPoints": 100, @@ -47536,7 +48614,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 335, + "id": 345, "interval": null, "isNew": true, "legend": { @@ -47737,7 +48815,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 336, + "id": 346, "interval": null, "isNew": true, "legend": { @@ -47938,7 +49016,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 337, + "id": 347, "interval": null, "isNew": true, "legend": { @@ -48139,7 +49217,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 338, + "id": 348, "interval": null, "isNew": true, "legend": { @@ -48340,7 +49418,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 339, + "id": 349, "interval": null, "isNew": true, "legend": { @@ -48473,7 +49551,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 340, + "id": 350, "interval": null, "isNew": true, "legend": { @@ -48606,7 +49684,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 341, + "id": 351, "interval": null, "isNew": true, "legend": { @@ -48739,7 +49817,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 342, + "id": 352, "interval": null, "isNew": true, "legend": { @@ -48872,7 +49950,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 343, + "id": 353, "interval": null, "isNew": true, "legend": { @@ -49005,7 +50083,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 344, + "id": 354, "interval": null, "isNew": true, "legend": { @@ -49145,7 +50223,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 345, + "id": 355, "interval": null, "legend": { "show": false @@ -49243,7 +50321,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 346, + "id": 356, "interval": null, "isNew": true, "legend": { @@ -49447,7 +50525,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 347, + "id": 357, "interval": null, "links": [], "maxDataPoints": 100, @@ -49486,7 +50564,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 348, + "id": 358, "interval": null, "isNew": true, "legend": { @@ -49619,7 +50697,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 349, + "id": 359, "interval": null, "isNew": true, "legend": { @@ -49752,7 +50830,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 350, + "id": 360, "interval": null, "isNew": true, "legend": { @@ -49892,7 +50970,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 351, + "id": 361, "interval": null, "legend": { "show": false @@ -49990,7 +51068,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 352, + "id": 362, "interval": null, "isNew": true, "legend": { @@ -50191,7 +51269,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 353, + "id": 363, "interval": null, "isNew": true, "legend": { @@ -50392,7 +51470,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 354, + "id": 364, "interval": null, "isNew": true, "legend": { @@ -50596,7 +51674,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 355, + "id": 365, "interval": null, "links": [], "maxDataPoints": 100, @@ -50635,7 +51713,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 356, + "id": 366, "interval": null, "isNew": true, "legend": { @@ -50813,7 +51891,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 357, + "id": 367, "interval": null, "isNew": true, "legend": { @@ -51014,7 +52092,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 358, + "id": 368, "interval": null, "isNew": true, "legend": { @@ -51147,7 +52225,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 359, + "id": 369, "interval": null, "isNew": true, "legend": { @@ -51280,7 +52358,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 360, + "id": 370, "interval": null, "isNew": true, "legend": { @@ -51413,7 +52491,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 361, + "id": 371, "interval": null, "isNew": true, "legend": { @@ -51546,7 +52624,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 362, + "id": 372, "interval": null, "isNew": true, "legend": { @@ -51679,7 +52757,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 363, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -51808,7 +52886,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 364, + "id": 374, "interval": null, "links": [], "maxDataPoints": 100, @@ -51883,7 +52961,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 365, + "id": 375, "interval": null, "links": [], "maxDataPoints": 100, @@ -51962,7 +53040,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 366, + "id": 376, "interval": null, "isNew": true, "legend": { @@ -52215,7 +53293,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 367, + "id": 377, "interval": null, "isNew": true, "legend": { @@ -52348,7 +53426,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 368, + "id": 378, "interval": null, "isNew": true, "legend": { @@ -52484,7 +53562,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 369, + "id": 379, "interval": null, "links": [], "maxDataPoints": 100, @@ -52523,7 +53601,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 370, + "id": 380, "interval": null, "isNew": true, "legend": { @@ -52671,7 +53749,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 381, "interval": null, "isNew": true, "legend": { @@ -52804,7 +53882,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 372, + "id": 382, "interval": null, "isNew": true, "legend": { @@ -53005,7 +54083,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 383, "interval": null, "isNew": true, "legend": { @@ -53153,7 +54231,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 374, + "id": 384, "interval": null, "isNew": true, "legend": { @@ -53354,7 +54432,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 375, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -53487,7 +54565,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 376, + "id": 386, "interval": null, "isNew": true, "legend": { @@ -53620,7 +54698,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 377, + "id": 387, "interval": null, "isNew": true, "legend": { @@ -53753,7 +54831,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 378, + "id": 388, "interval": null, "isNew": true, "legend": { @@ -53886,7 +54964,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 379, + "id": 389, "interval": null, "isNew": true, "legend": { @@ -54026,7 +55104,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 380, + "id": 390, "interval": null, "legend": { "show": false @@ -54124,7 +55202,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 381, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -54328,7 +55406,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 382, + "id": 392, "interval": null, "links": [], "maxDataPoints": 100, @@ -54367,7 +55445,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 383, + "id": 393, "interval": null, "isNew": true, "legend": { @@ -54500,7 +55578,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 384, + "id": 394, "interval": null, "isNew": true, "legend": { @@ -54633,7 +55711,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 395, "interval": null, "isNew": true, "legend": { @@ -54766,7 +55844,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 386, + "id": 396, "interval": null, "isNew": true, "legend": { @@ -54902,7 +55980,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 387, + "id": 397, "interval": null, "links": [], "maxDataPoints": 100, @@ -54941,7 +56019,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -55074,7 +56152,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 389, + "id": 399, "interval": null, "isNew": true, "legend": { @@ -55207,7 +56285,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 390, + "id": 400, "interval": null, "isNew": true, "legend": { @@ -55355,7 +56433,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 401, "interval": null, "isNew": true, "legend": { @@ -55488,7 +56566,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 392, + "id": 402, "interval": null, "isNew": true, "legend": { @@ -55621,7 +56699,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 393, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -55754,7 +56832,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 394, + "id": 404, "interval": null, "isNew": true, "legend": { @@ -55890,7 +56968,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 405, "interval": null, "links": [], "maxDataPoints": 100, @@ -55929,7 +57007,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 396, + "id": 406, "interval": null, "isNew": true, "legend": { @@ -56062,7 +57140,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 397, + "id": 407, "interval": null, "isNew": true, "legend": { @@ -56195,7 +57273,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 408, "interval": null, "isNew": true, "legend": { @@ -56328,7 +57406,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 399, + "id": 409, "interval": null, "isNew": true, "legend": { @@ -56461,7 +57539,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 410, "interval": null, "isNew": true, "legend": { @@ -56594,7 +57672,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 411, "interval": null, "isNew": true, "legend": { @@ -56730,7 +57808,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 412, "interval": null, "links": [], "maxDataPoints": 100, @@ -56769,7 +57847,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -56902,7 +57980,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -57035,7 +58113,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 405, + "id": 415, "interval": null, "isNew": true, "legend": { @@ -57183,7 +58261,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 406, + "id": 416, "interval": null, "isNew": true, "legend": { @@ -57346,7 +58424,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 407, + "id": 417, "interval": null, "isNew": true, "legend": { @@ -57479,7 +58557,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 408, + "id": 418, "interval": null, "isNew": true, "legend": { @@ -57612,7 +58690,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 409, + "id": 419, "interval": null, "isNew": true, "legend": { @@ -57760,7 +58838,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 410, + "id": 420, "interval": null, "isNew": true, "legend": { @@ -57908,7 +58986,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 421, "interval": null, "isNew": true, "legend": { @@ -58044,7 +59122,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 412, + "id": 422, "interval": null, "links": [], "maxDataPoints": 100, @@ -58083,7 +59161,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 413, + "id": 423, "interval": null, "isNew": true, "legend": { @@ -58216,7 +59294,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 414, + "id": 424, "interval": null, "isNew": true, "legend": { @@ -58349,7 +59427,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 415, + "id": 425, "interval": null, "isNew": true, "legend": { @@ -58482,7 +59560,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 416, + "id": 426, "interval": null, "isNew": true, "legend": { @@ -58615,7 +59693,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 417, + "id": 427, "interval": null, "isNew": true, "legend": { @@ -58748,7 +59826,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 418, + "id": 428, "interval": null, "isNew": true, "legend": { @@ -58881,7 +59959,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 419, + "id": 429, "interval": null, "isNew": true, "legend": { @@ -59014,7 +60092,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 420, + "id": 430, "interval": null, "isNew": true, "legend": { @@ -59147,7 +60225,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 421, + "id": 431, "interval": null, "isNew": true, "legend": { @@ -59287,7 +60365,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 422, + "id": 432, "interval": null, "legend": { "show": false @@ -59385,7 +60463,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 423, + "id": 433, "interval": null, "isNew": true, "legend": { @@ -59518,7 +60596,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 424, + "id": 434, "interval": null, "isNew": true, "legend": { @@ -59666,7 +60744,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 425, + "id": 435, "interval": null, "isNew": true, "legend": { @@ -59814,7 +60892,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 426, + "id": 436, "interval": null, "isNew": true, "legend": { @@ -59954,7 +61032,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 427, + "id": 437, "interval": null, "legend": { "show": false @@ -60052,7 +61130,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 428, + "id": 438, "interval": null, "isNew": true, "legend": { @@ -60185,7 +61263,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 429, + "id": 439, "interval": null, "isNew": true, "legend": { @@ -60321,7 +61399,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 430, + "id": 440, "interval": null, "links": [], "maxDataPoints": 100, @@ -60360,7 +61438,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 431, + "id": 441, "interval": null, "isNew": true, "legend": { @@ -60493,7 +61571,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 432, + "id": 442, "interval": null, "isNew": true, "legend": { @@ -60656,7 +61734,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 433, + "id": 443, "interval": null, "isNew": true, "legend": { @@ -60804,7 +61882,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 434, + "id": 444, "interval": null, "isNew": true, "legend": { @@ -60937,7 +62015,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 435, + "id": 445, "interval": null, "isNew": true, "legend": { @@ -61077,7 +62155,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 436, + "id": 446, "interval": null, "legend": { "show": false @@ -61182,7 +62260,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 437, + "id": 447, "interval": null, "legend": { "show": false @@ -61287,7 +62365,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 438, + "id": 448, "interval": null, "legend": { "show": false @@ -61385,7 +62463,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 449, "interval": null, "isNew": true, "legend": { @@ -61525,7 +62603,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 440, + "id": 450, "interval": null, "legend": { "show": false @@ -61630,7 +62708,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 441, + "id": 451, "interval": null, "legend": { "show": false @@ -61735,7 +62813,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 442, + "id": 452, "interval": null, "legend": { "show": false @@ -61833,7 +62911,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 453, "interval": null, "isNew": true, "legend": { @@ -61966,7 +63044,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 444, + "id": 454, "interval": null, "isNew": true, "legend": { @@ -62099,7 +63177,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 455, "interval": null, "isNew": true, "legend": { @@ -62239,7 +63317,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 446, + "id": 456, "interval": null, "legend": { "show": false @@ -62337,7 +63415,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 447, + "id": 457, "interval": null, "isNew": true, "legend": { @@ -62473,7 +63551,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 448, + "id": 458, "interval": null, "links": [], "maxDataPoints": 100, @@ -62512,7 +63590,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 449, + "id": 459, "interval": null, "isNew": true, "legend": { @@ -62675,7 +63753,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 450, + "id": 460, "interval": null, "isNew": true, "legend": { @@ -62808,7 +63886,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 451, + "id": 461, "interval": null, "isNew": true, "legend": { @@ -62948,7 +64026,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 452, + "id": 462, "interval": null, "legend": { "show": false @@ -63053,7 +64131,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 453, + "id": 463, "interval": null, "legend": { "show": false @@ -63151,7 +64229,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 454, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -63306,7 +64384,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 455, + "id": 465, "interval": null, "legend": { "show": false @@ -63411,7 +64489,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 456, + "id": 466, "interval": null, "legend": { "show": false @@ -63516,7 +64594,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 457, + "id": 467, "interval": null, "legend": { "show": false @@ -63614,7 +64692,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 458, + "id": 468, "interval": null, "isNew": true, "legend": { @@ -63784,7 +64862,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 459, + "id": 469, "interval": null, "legend": { "show": false @@ -63882,7 +64960,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 460, + "id": 470, "interval": null, "isNew": true, "legend": { @@ -64083,7 +65161,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 471, "interval": null, "isNew": true, "legend": { @@ -64284,7 +65362,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 462, + "id": 472, "interval": null, "isNew": true, "legend": { @@ -64417,7 +65495,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 463, + "id": 473, "interval": null, "isNew": true, "legend": { @@ -64580,7 +65658,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 474, "interval": null, "isNew": true, "legend": { @@ -64713,7 +65791,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 465, + "id": 475, "interval": null, "isNew": true, "legend": { @@ -64846,7 +65924,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 466, + "id": 476, "interval": null, "isNew": true, "legend": { @@ -65047,7 +66125,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 467, + "id": 477, "interval": null, "isNew": true, "legend": { @@ -65180,7 +66258,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 468, + "id": 478, "interval": null, "isNew": true, "legend": { @@ -65320,7 +66398,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 469, + "id": 479, "interval": null, "legend": { "show": false @@ -65425,7 +66503,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 470, + "id": 480, "interval": null, "legend": { "show": false @@ -65530,7 +66608,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 471, + "id": 481, "interval": null, "legend": { "show": false @@ -65635,7 +66713,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 472, + "id": 482, "interval": null, "legend": { "show": false @@ -65740,7 +66818,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 473, + "id": 483, "interval": null, "legend": { "show": false @@ -65845,7 +66923,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 474, + "id": 484, "interval": null, "legend": { "show": false @@ -65950,7 +67028,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 475, + "id": 485, "interval": null, "legend": { "show": false @@ -66048,7 +67126,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 476, + "id": 486, "interval": null, "isNew": true, "legend": { @@ -66196,7 +67274,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 477, + "id": 487, "interval": null, "isNew": true, "legend": { @@ -66329,7 +67407,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 488, "interval": null, "isNew": true, "legend": { @@ -66462,7 +67540,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 479, + "id": 489, "interval": null, "isNew": true, "legend": { @@ -66610,7 +67688,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 480, + "id": 490, "interval": null, "isNew": true, "legend": { @@ -66746,7 +67824,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 481, + "id": 491, "interval": null, "links": [], "maxDataPoints": 100, @@ -66797,7 +67875,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 482, + "id": 492, "interval": null, "links": [], "maxDataPoints": 100, @@ -66893,7 +67971,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 483, + "id": 493, "interval": null, "links": [], "maxDataPoints": 100, @@ -66968,7 +68046,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 484, + "id": 494, "interval": null, "links": [], "maxDataPoints": 100, @@ -67043,7 +68121,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 485, + "id": 495, "interval": null, "links": [], "maxDataPoints": 100, @@ -67118,7 +68196,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 486, + "id": 496, "interval": null, "links": [], "maxDataPoints": 100, @@ -67193,7 +68271,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 487, + "id": 497, "interval": null, "links": [], "maxDataPoints": 100, @@ -67268,7 +68346,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 488, + "id": 498, "interval": null, "links": [], "maxDataPoints": 100, @@ -67343,7 +68421,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 489, + "id": 499, "interval": null, "links": [], "maxDataPoints": 100, @@ -67422,7 +68500,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 500, "interval": null, "isNew": true, "legend": { @@ -67555,7 +68633,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 501, "interval": null, "isNew": true, "legend": { @@ -67688,7 +68766,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 502, "interval": null, "isNew": true, "legend": { @@ -67821,7 +68899,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, + "id": 503, "interval": null, "isNew": true, "legend": { @@ -67954,7 +69032,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 494, + "id": 504, "interval": null, "isNew": true, "legend": { @@ -68087,7 +69165,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 495, + "id": 505, "interval": null, "isNew": true, "legend": { @@ -68235,7 +69313,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 496, + "id": 506, "interval": null, "isNew": true, "legend": { @@ -68368,7 +69446,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 497, + "id": 507, "interval": null, "isNew": true, "legend": { @@ -68501,7 +69579,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 498, + "id": 508, "interval": null, "isNew": true, "legend": { @@ -68667,7 +69745,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 499, + "id": 509, "interval": null, "legend": { "show": false @@ -68772,7 +69850,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 500, + "id": 510, "interval": null, "legend": { "show": false @@ -68877,7 +69955,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 501, + "id": 511, "interval": null, "legend": { "show": false @@ -68982,7 +70060,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 502, + "id": 512, "interval": null, "legend": { "show": false @@ -69087,7 +70165,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 503, + "id": 513, "interval": null, "legend": { "show": false @@ -69192,7 +70270,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 504, + "id": 514, "interval": null, "legend": { "show": false @@ -69297,7 +70375,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 505, + "id": 515, "interval": null, "legend": { "show": false @@ -69402,7 +70480,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 506, + "id": 516, "interval": null, "legend": { "show": false @@ -69500,7 +70578,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 507, + "id": 517, "interval": null, "isNew": true, "legend": { @@ -69633,7 +70711,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 508, + "id": 518, "interval": null, "isNew": true, "legend": { @@ -69766,7 +70844,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 509, + "id": 519, "interval": null, "isNew": true, "legend": { @@ -69899,7 +70977,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 510, + "id": 520, "interval": null, "isNew": true, "legend": { @@ -70032,7 +71110,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 511, + "id": 521, "interval": null, "isNew": true, "legend": { @@ -70165,7 +71243,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 512, + "id": 522, "interval": null, "isNew": true, "legend": { @@ -70298,7 +71376,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 513, + "id": 523, "interval": null, "isNew": true, "legend": { @@ -70431,7 +71509,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 514, + "id": 524, "interval": null, "isNew": true, "legend": { @@ -70571,7 +71649,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 515, + "id": 525, "interval": null, "legend": { "show": false @@ -70676,7 +71754,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 516, + "id": 526, "interval": null, "legend": { "show": false @@ -70774,7 +71852,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 517, + "id": 527, "interval": null, "isNew": true, "legend": { @@ -70907,7 +71985,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 518, + "id": 528, "interval": null, "isNew": true, "legend": { @@ -71040,7 +72118,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 519, + "id": 529, "interval": null, "isNew": true, "legend": { @@ -71173,7 +72251,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 520, + "id": 530, "interval": null, "isNew": true, "legend": { @@ -71306,7 +72384,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 521, + "id": 531, "interval": null, "isNew": true, "legend": { @@ -71439,7 +72517,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 522, + "id": 532, "interval": null, "isNew": true, "legend": { @@ -71575,7 +72653,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 523, + "id": 533, "interval": null, "links": [], "maxDataPoints": 100, @@ -71614,7 +72692,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 524, + "id": 534, "interval": null, "isNew": true, "legend": { @@ -71762,7 +72840,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 525, + "id": 535, "interval": null, "isNew": true, "legend": { @@ -71895,7 +72973,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 526, + "id": 536, "interval": null, "isNew": true, "legend": { @@ -72028,7 +73106,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 527, + "id": 537, "interval": null, "isNew": true, "legend": { @@ -72164,7 +73242,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 528, + "id": 538, "interval": null, "links": [], "maxDataPoints": 100, @@ -72203,7 +73281,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 529, + "id": 539, "interval": null, "isNew": true, "legend": { @@ -72336,7 +73414,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 530, + "id": 540, "interval": null, "isNew": true, "legend": { @@ -72469,7 +73547,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 531, + "id": 541, "interval": null, "isNew": true, "legend": { @@ -72602,7 +73680,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 532, + "id": 542, "interval": null, "isNew": true, "legend": { @@ -72735,7 +73813,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 533, + "id": 543, "interval": null, "isNew": true, "legend": { @@ -72868,7 +73946,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 534, + "id": 544, "interval": null, "isNew": true, "legend": { @@ -73004,7 +74082,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 535, + "id": 545, "interval": null, "links": [], "maxDataPoints": 100, @@ -73043,7 +74121,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 536, + "id": 546, "interval": null, "isNew": true, "legend": { @@ -73176,7 +74254,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 537, + "id": 547, "interval": null, "isNew": true, "legend": { @@ -73312,7 +74390,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 538, + "id": 548, "interval": null, "links": [], "maxDataPoints": 100, @@ -73351,7 +74429,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 539, + "id": 549, "interval": null, "isNew": true, "legend": { @@ -73552,7 +74630,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 540, + "id": 550, "interval": null, "isNew": true, "legend": { @@ -73688,7 +74766,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 541, + "id": 551, "interval": null, "links": [], "maxDataPoints": 100, @@ -73727,7 +74805,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 542, + "id": 552, "interval": null, "isNew": true, "legend": { @@ -73860,7 +74938,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 543, + "id": 553, "interval": null, "isNew": true, "legend": { @@ -73993,7 +75071,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 544, + "id": 554, "interval": null, "isNew": true, "legend": { @@ -74126,7 +75204,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 545, + "id": 555, "interval": null, "isNew": true, "legend": { @@ -74259,7 +75337,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 546, + "id": 556, "interval": null, "isNew": true, "legend": { @@ -74407,7 +75485,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 547, + "id": 557, "interval": null, "isNew": true, "legend": { @@ -74611,7 +75689,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 548, + "id": 558, "interval": null, "links": [], "maxDataPoints": 100, @@ -74650,7 +75728,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 549, + "id": 559, "interval": null, "isNew": true, "legend": { @@ -74783,7 +75861,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 550, + "id": 560, "interval": null, "isNew": true, "legend": { @@ -74916,7 +75994,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 551, + "id": 561, "interval": null, "isNew": true, "legend": { @@ -75049,7 +76127,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 552, + "id": 562, "interval": null, "isNew": true, "legend": { @@ -75182,7 +76260,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 553, + "id": 563, "interval": null, "isNew": true, "legend": { @@ -75379,7 +76457,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 554, + "id": 564, "interval": null, "links": [], "maxDataPoints": 100, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index ecc7487be82..b61272c5ef6 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -489eb016896d5f733f2db9787b85de0c51a1cf8373affebe85045b779a560cd0 ./metrics/grafana/tikv_details.json +36809f7e452742d5c3b62d0b9c5cf7566553289cb1d1b976e9aea8be4eaa2fc9 ./metrics/grafana/tikv_details.json From d434617430bea04adfcb0391cd6d56f27a71c5f7 Mon Sep 17 00:00:00 2001 From: Andrey Koshchiy Date: Sun, 3 Nov 2024 12:25:42 +0300 Subject: [PATCH 06/86] aws: switch to aws-sdk (#13814) close tikv/tikv#12371 * switch kms to aws_sdk lib * switch s3 to aws_sdk lib Signed-off-by: Andrey Koshchiy Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 1184 ++++++++++++++++++++++------ Cargo.toml | 11 +- components/cloud/aws/Cargo.toml | 27 +- components/cloud/aws/src/kms.rs | 326 +++++--- components/cloud/aws/src/s3.rs | 1034 +++++++++++++----------- components/cloud/aws/src/util.rs | 284 ++++--- components/tikv_util/Cargo.toml | 1 - components/tikv_util/src/stream.rs | 25 +- deny.toml | 12 +- 9 files changed, 1898 insertions(+), 1006 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0504f5dc74e..e494e5540cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,9 +54,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.7" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if 1.0.0", "once_cell", @@ -79,6 +79,15 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e" +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc 0.2.151", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -130,6 +139,17 @@ dependencies = [ "nodrop", ] +[[package]] +name = "assert-json-diff" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4259cbe96513d2f1073027a259fc2ca917feb3026a5a8d984e3628e490255cc0" +dependencies = [ + "extend", + "serde", + "serde_json", +] + [[package]] name = "async-channel" version = "1.6.1" @@ -248,29 +268,400 @@ name = "aws" version = "0.0.1" dependencies = [ "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-kms", + "aws-sdk-s3", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", "base64 0.13.0", "bytes", "cloud", "fail", "futures 0.3.15", "futures-util", + "grpcio", + "http 0.2.12", + "hyper", + "hyper-tls", "kvproto", "md5", - "rusoto_core", - "rusoto_credential", - "rusoto_kms", - "rusoto_mock", - "rusoto_s3", - "rusoto_sts", + "prometheus", "slog", "slog-global", "thiserror", "tikv_util", "tokio", + "tokio-util", "url", "uuid 0.8.2", ] +[[package]] +name = "aws-config" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e95816a168520d72c0e7680c405a5a8c1fb6a035b4bc4b9d7b0de8e1a941697" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand 2.0.1", + "http 0.2.12", + "time 0.3.20", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-runtime" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f42c2d4218de4dcd890a109461e2f799a1a2ba3bcd2cde9af88360f5df9266c6" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand 2.0.1", + "http 0.2.12", + "http-body 0.4.5", + "once_cell", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid 1.7.0", +] + +[[package]] +name = "aws-sdk-kms" +version = "1.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ebbbc319551583b9233a74b359ede7349102e779fc12371d2478e80b50d218" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8367c403fdf27690684b926a46ed9524099a69dd5dfcef62028bf4096b5b809f" +dependencies = [ + "ahash 0.8.11", + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand 2.0.1", + "hex 0.4.3", + "hmac", + "http 0.2.12", + "http-body 0.4.5", + "lru", + "once_cell", + "percent-encoding", + "regex-lite", + "sha2 0.10.8", + "tracing", + "url", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e52dc3fd7dfa6c01a69cf3903e00aa467261639138a05b06cd92314d2c8fb07" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc8db6904450bafe7473c6ca9123f88cc11089e41a025408f992db4e22d3be68" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex 0.4.3", + "hmac", + "http 0.2.12", + "http 1.1.0", + "once_cell", + "percent-encoding", + "sha2 0.10.8", + "time 0.3.20", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.60.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598b1689d001c4d4dc3cb386adb07d37786783aee3ac4b324bcadac116bf3d23" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc32c", + "crc32fast", + "hex 0.4.3", + "http 0.2.12", + "http-body 0.4.5", + "md-5", + "pin-project-lite", + "sha1", + "sha2 0.10.8", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.60.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http-body 0.4.5", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-protocol-test" +version = "0.63.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b92b62199921f10685c6b588fdbeb81168ae4e7950ae3e5f50145a01bb5f1ad" +dependencies = [ + "assert-json-diff", + "aws-smithy-runtime-api", + "base64-simd", + "cbor-diag", + "ciborium", + "http 0.2.12", + "pretty_assertions", + "regex-lite", + "roxmltree", + "serde_json", + "thiserror", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-protocol-test", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand 2.0.1", + "h2", + "http 0.2.12", + "http-body 0.4.5", + "http-body 1.0.0", + "httparse", + "hyper", + "indexmap 2.0.1", + "once_cell", + "pin-project-lite", + "pin-utils", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.1.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147100a7bea70fa20ef224a6bad700358305f5dc0f84649c53769761395b355b" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.1.0", + "http-body 0.4.5", + "http-body 1.0.0", + "http-body-util", + "itoa 1.0.1", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time 0.3.20", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d123fbc2a4adc3c301652ba8e149bf4bc1d1725affb9784eb20c953ace06bf55" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version 0.4.0", + "tracing", +] + [[package]] name = "azure" version = "0.0.1" @@ -435,7 +826,7 @@ dependencies = [ "futures 0.3.15", "futures-util", "grpcio", - "hex 0.4.2", + "hex 0.4.3", "keys", "kvproto", "lazy_static", @@ -480,7 +871,7 @@ dependencies = [ "file_system", "futures 0.3.15", "grpcio", - "hex 0.4.2", + "hex 0.4.3", "kvproto", "lazy_static", "log_wrappers", @@ -531,6 +922,16 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "batch-system" version = "0.1.0" @@ -593,7 +994,7 @@ dependencies = [ "quote", "regex", "rustc-hash", - "shlex 1.3.0", + "shlex", "which", ] @@ -614,7 +1015,7 @@ dependencies = [ "quote", "regex", "rustc-hash", - "shlex 1.3.0", + "shlex", "syn 2.0.79", ] @@ -666,12 +1067,30 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "boolinator" version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfa8873f51c92e232f9bac4065cddef41b714152812bfc5f7672ba16d6ef8cd9" +[[package]] +name = "bs58" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" +dependencies = [ + "tinyvec", +] + [[package]] name = "bstr" version = "0.2.8" @@ -704,11 +1123,18 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.0.1" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" + +[[package]] +name = "bytes-utils" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" dependencies = [ - "serde", + "bytes", + "either", ] [[package]] @@ -791,6 +1217,25 @@ dependencies = [ "txn_types", ] +[[package]] +name = "cbor-diag" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc245b6ecd09b23901a4fbad1ad975701fd5061ceaef6afa93a2d70605a64429" +dependencies = [ + "bs58", + "chrono", + "data-encoding", + "half 2.4.1", + "nom 7.1.3", + "num-bigint", + "num-rational 0.4.1", + "num-traits", + "separator", + "url", + "uuid 1.7.0", +] + [[package]] name = "cc" version = "1.0.83" @@ -851,7 +1296,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" dependencies = [ - "nom 7.1.0", + "nom 7.1.3", ] [[package]] @@ -868,10 +1313,11 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.20" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6127248204b9aba09a362f6c930ef6a78f2c1b2215f8a7b398c06e1083f17af0" +checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" dependencies = [ + "iana-time-zone", "js-sys", "num-integer", "num-traits", @@ -891,6 +1337,33 @@ dependencies = [ "parse-zoneinfo", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half 2.4.1", +] + [[package]] name = "clang-sys" version = "1.1.1" @@ -1077,9 +1550,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.2" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpu-time" @@ -1091,6 +1564,15 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc 0.2.151", +] + [[package]] name = "cpuid-bool" version = "0.1.2" @@ -1099,20 +1581,20 @@ checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" [[package]] name = "crc32c" -version = "0.6.4" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8f48d60e5b4d2c53d5c2b1d8a58c849a70ae5e5509b08a48d047e3b65714a74" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" dependencies = [ "rustc_version 0.4.0", ] [[package]] name = "crc32fast" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", ] [[package]] @@ -1255,6 +1737,12 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto" version = "0.0.1" @@ -1264,6 +1752,16 @@ dependencies = [ "slog-global", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "csv" version = "1.1.6" @@ -1369,6 +1867,12 @@ dependencies = [ "syn 1.0.103", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "digest" version = "0.9.0" @@ -1378,6 +1882,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common", + "subtle", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -1443,7 +1958,7 @@ dependencies = [ "file_system", "futures 0.3.15", "futures-util", - "hex 0.4.2", + "hex 0.4.3", "kvproto", "lazy_static", "matches", @@ -1697,6 +2212,18 @@ dependencies = [ "coprocessor_plugin_api", ] +[[package]] +name = "extend" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f47da3a72ec598d9c8937a7ebca8962a5c7a1f28444e38c2b33c771ba3f55f05" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.103", +] + [[package]] name = "external_storage" version = "0.0.1" @@ -1865,6 +2392,12 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" +[[package]] +name = "foldhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" + [[package]] name = "foreign-types" version = "0.3.2" @@ -1882,11 +2415,10 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.0.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ - "matches", "percent-encoding", ] @@ -2137,7 +2669,7 @@ dependencies = [ "cloud", "crc32c", "futures-util", - "http", + "http 0.2.12", "hyper", "hyper-tls", "kvproto", @@ -2289,7 +2821,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", "indexmap 2.0.1", "slab", "tokio", @@ -2303,6 +2835,16 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if 1.0.0", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.9.1" @@ -2315,8 +2857,19 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +dependencies = [ "allocator-api2", + "equivalent", + "foldhash", ] [[package]] @@ -2371,9 +2924,18 @@ checksum = "805026a5d0141ffc30abb3be3173848ad46a1b1664fe632428479619a3644d77" [[package]] name = "hex" -version = "0.4.2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] [[package]] name = "home" @@ -2397,9 +2959,20 @@ dependencies = [ [[package]] name = "http" -version = "0.2.8" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.1", +] + +[[package]] +name = "http" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", @@ -2413,7 +2986,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +dependencies = [ + "bytes", + "futures-core", + "http 1.1.0", + "http-body 1.0.0", "pin-project-lite", ] @@ -2482,17 +3078,17 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.23" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ "bytes", "futures-channel", "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.5", "httparse", "httpdate", "itoa 1.0.1", @@ -2510,7 +3106,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d52322a69f0a93f177d76ca82073fcec8d5b4eb6e28525d5b3142fa718195c" dependencies = [ - "http", + "http 0.2.12", "hyper", "linked_hash_set", "once_cell", @@ -2535,6 +3131,29 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -2543,11 +3162,10 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.2.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ - "matches", "unicode-bidi", "unicode-normalization", ] @@ -2572,7 +3190,7 @@ dependencies = [ "engine_traits", "fail", "futures 0.3.15", - "hex 0.4.2", + "hex 0.4.3", "keys", "kvproto", "lazy_static", @@ -2622,6 +3240,7 @@ checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e" dependencies = [ "equivalent", "hashbrown 0.14.0", + "serde", ] [[package]] @@ -2975,7 +3594,7 @@ name = "log_wrappers" version = "0.0.1" dependencies = [ "atomic", - "hex 0.4.2", + "hex 0.4.3", "protobuf", "serde", "slog", @@ -2984,6 +3603,15 @@ dependencies = [ "toml", ] +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.0", +] + [[package]] name = "lz4-sys" version = "1.9.4" @@ -3005,12 +3633,31 @@ dependencies = [ "syn 1.0.103", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.8", +] + [[package]] name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if 1.0.0", + "digest 0.10.7", +] + [[package]] name = "md5" version = "0.7.0" @@ -3282,13 +3929,12 @@ dependencies = [ [[package]] name = "nom" -version = "7.1.0" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ "memchr", "minimal-lexical", - "version_check 0.9.4", ] [[package]] @@ -3318,6 +3964,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi 0.3.9", +] + [[package]] name = "num" version = "0.3.0" @@ -3327,7 +3983,18 @@ dependencies = [ "num-complex", "num-integer", "num-iter", - "num-rational", + "num-rational 0.3.0", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", "num-traits", ] @@ -3404,6 +4071,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.17" @@ -3442,12 +4121,12 @@ dependencies = [ "base64 0.13.0", "chrono", "getrandom 0.2.11", - "http", + "http 0.2.12", "rand 0.8.5", "serde", "serde_json", "serde_path_to_error", - "sha2", + "sha2 0.9.1", "thiserror", "url", ] @@ -3570,6 +4249,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "page_size" version = "0.4.2" @@ -3696,9 +4387,9 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "percent-encoding" -version = "2.1.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "perfcnt" @@ -3887,6 +4578,16 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +[[package]] +name = "pretty_assertions" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +dependencies = [ + "diff", + "yansi", +] + [[package]] name = "prettyplease" version = "0.2.6" @@ -3938,7 +4639,7 @@ checksum = "0941606b9934e2d98a3677759a971756eb821f75764d0e0d26946d08e74d9104" dependencies = [ "bitflags 1.3.2", "byteorder", - "hex 0.4.2", + "hex 0.4.3", "lazy_static", "libc 0.2.151", ] @@ -4008,7 +4709,7 @@ dependencies = [ "rand 0.8.5", "rand_chacha 0.3.0", "rand_xorshift", - "regex-syntax", + "regex-syntax 0.8.2", "rusty-fork", "tempfile", "unarray", @@ -4127,7 +4828,7 @@ dependencies = [ "fail", "fs2", "hashbrown 0.14.0", - "hex 0.4.2", + "hex 0.4.3", "if_chain", "lazy_static", "libc 0.2.151", @@ -4341,7 +5042,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc 0.2.151", "rand_chacha 0.3.0", - "rand_core 0.6.2", + "rand_core 0.6.4", ] [[package]] @@ -4361,7 +5062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" dependencies = [ "ppv-lite86", - "rand_core 0.6.2", + "rand_core 0.6.4", ] [[package]] @@ -4390,9 +5091,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.2" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom 0.2.11", ] @@ -4412,7 +5113,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fac4373cd91b4f55722c553fb0f286edbb81ef3ff6eec7b99d1898a4110a0b28" dependencies = [ - "rand_core 0.6.2", + "rand_core 0.6.4", ] [[package]] @@ -4421,7 +5122,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ - "rand_core 0.6.2", + "rand_core 0.6.4", ] [[package]] @@ -4503,7 +5204,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.3", - "regex-syntax", + "regex-syntax 0.8.2", ] [[package]] @@ -4513,6 +5214,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9" dependencies = [ "byteorder", + "regex-syntax 0.6.29", + "utf8-ranges", ] [[package]] @@ -4523,9 +5226,21 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-lite" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -4552,8 +5267,8 @@ dependencies = [ "encoding_rs 0.8.33", "futures-core", "futures-util", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.5", "hyper", "hyper-tls", "ipnet", @@ -4713,121 +5428,12 @@ dependencies = [ ] [[package]] -name = "rusoto_core" -version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#2b142c1792062a7a3a8317610d78dd141ab4223d" -dependencies = [ - "async-trait", - "base64 0.13.0", - "bytes", - "crc32fast", - "futures 0.3.15", - "http", - "hyper", - "hyper-tls", - "lazy_static", - "log", - "rusoto_credential", - "rusoto_signature", - "rustc_version 0.3.3", - "serde", - "serde_json", - "tokio", - "xml-rs", -] - -[[package]] -name = "rusoto_credential" -version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#2b142c1792062a7a3a8317610d78dd141ab4223d" -dependencies = [ - "async-trait", - "chrono", - "dirs-next", - "futures 0.3.15", - "hyper", - "serde", - "serde_json", - "shlex 0.1.1", - "tokio", - "zeroize", -] - -[[package]] -name = "rusoto_kms" -version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#2b142c1792062a7a3a8317610d78dd141ab4223d" -dependencies = [ - "async-trait", - "bytes", - "futures 0.3.15", - "rusoto_core", - "serde", - "serde_json", -] - -[[package]] -name = "rusoto_mock" -version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#2b142c1792062a7a3a8317610d78dd141ab4223d" -dependencies = [ - "async-trait", - "chrono", - "futures 0.3.15", - "http", - "rusoto_core", - "serde", - "serde_json", -] - -[[package]] -name = "rusoto_s3" -version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#2b142c1792062a7a3a8317610d78dd141ab4223d" -dependencies = [ - "async-trait", - "bytes", - "futures 0.3.15", - "rusoto_core", - "serde", - "serde_derive", - "xml-rs", -] - -[[package]] -name = "rusoto_signature" -version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#2b142c1792062a7a3a8317610d78dd141ab4223d" -dependencies = [ - "base64 0.13.0", - "bytes", - "chrono", - "futures 0.3.15", - "hex 0.4.2", - "http", - "hyper", - "log", - "openssl", - "percent-encoding", - "pin-project-lite", - "rusoto_credential", - "rustc_version 0.3.3", - "serde", - "tokio", -] - -[[package]] -name = "rusoto_sts" -version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#2b142c1792062a7a3a8317610d78dd141ab4223d" +name = "roxmltree" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "921904a62e410e37e215c40381b7117f830d9d89ba60ab5236170541dd25646b" dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures 0.3.15", - "rusoto_core", - "serde_urlencoded", - "xml-rs", + "xmlparser", ] [[package]] @@ -4928,9 +5534,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.4" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "safemem" @@ -5054,11 +5660,17 @@ dependencies = [ "pest", ] +[[package]] +name = "separator" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f97841a747eef040fcd2e7b3b9a220a7205926e60488e673d9e4926d27772ce5" + [[package]] name = "serde" -version = "1.0.147" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] @@ -5069,19 +5681,19 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" dependencies = [ - "half", + "half 1.8.2", "serde", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 1.0.103", + "syn 2.0.79", ] [[package]] @@ -5095,12 +5707,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.64" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" +checksum = "cb0652c533506ad7a2e353cce269330d6afd8bdfb6d75e0ace5b35aacbd7b9e9" dependencies = [ - "indexmap 1.6.2", - "itoa 0.4.4", + "indexmap 2.0.1", + "itoa 1.0.1", "ryu", "serde", ] @@ -5234,19 +5846,41 @@ dependencies = [ "tikv_util", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest 0.10.7", +] + [[package]] name = "sha2" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1" dependencies = [ - "block-buffer", + "block-buffer 0.9.0", "cfg-if 0.1.10", "cpuid-bool", - "digest", + "digest 0.9.0", "opaque-debug", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest 0.10.7", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -5256,12 +5890,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "shlex" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" - [[package]] name = "shlex" version = "1.3.0" @@ -5366,9 +5994,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "smartstring" @@ -5595,6 +6223,12 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + [[package]] name = "symbolic-common" version = "12.8.0" @@ -5675,7 +6309,7 @@ dependencies = [ "bytes", "chrono", "futures-util", - "http", + "http 0.2.12", "percent-encoding", "pin-utils", "serde", @@ -5691,7 +6325,7 @@ version = "0.9.6" source = "git+https://github.com/tikv/tame-oauth?branch=fips-0.9#487e287c0d316b832dc44735cd9b7f7c432a10aa" dependencies = [ "data-encoding", - "http", + "http 0.2.12", "lock_api", "openssl", "parking_lot 0.11.1", @@ -6102,22 +6736,22 @@ checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" [[package]] name = "thiserror" -version = "1.0.30" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.30" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 1.0.103", + "syn 2.0.79", ] [[package]] @@ -6196,12 +6830,12 @@ dependencies = [ "criterion", "encoding_rs 0.8.29", "error_code", - "hex 0.4.2", + "hex 0.4.3", "kvproto", "lazy_static", "log_wrappers", "match-template", - "nom 7.1.0", + "nom 7.1.3", "num", "num-derive 0.3.0", "num-traits", @@ -6261,7 +6895,7 @@ dependencies = [ "crypto", "file_system", "flate2", - "hex 0.4.2", + "hex 0.4.3", "log_wrappers", "match-template", "num", @@ -6325,8 +6959,8 @@ dependencies = [ "grpcio", "grpcio-health", "health_controller", - "hex 0.4.2", - "http", + "hex 0.4.3", + "http 0.2.12", "hybrid_engine", "hyper", "hyper-openssl", @@ -6428,7 +7062,7 @@ dependencies = [ "futures 0.3.15", "gag", "grpcio", - "hex 0.4.2", + "hex 0.4.3", "keys", "kvproto", "log", @@ -6575,7 +7209,7 @@ dependencies = [ "gag", "grpcio", "heck 0.3.1", - "http", + "http 0.2.12", "kvproto", "lazy_static", "libc 0.2.151", @@ -6596,7 +7230,6 @@ dependencies = [ "protobuf", "rand 0.8.5", "regex", - "rusoto_core", "serde", "serde_json", "slog", @@ -6668,6 +7301,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tipb" version = "0.0.1" @@ -6850,6 +7498,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", ] [[package]] @@ -6858,10 +7528,19 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", + "tracing", "tracing-core", + "tracing-log", + "tracing-serde", ] [[package]] @@ -6950,12 +7629,9 @@ checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" [[package]] name = "unicode-bidi" -version = "0.3.4" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -dependencies = [ - "matches", -] +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" @@ -6965,11 +7641,11 @@ checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" [[package]] name = "unicode-normalization" -version = "0.1.12" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5479532badd04e128284890390c1e876ef7a993d0570b3597ae43dfa1d59afa4" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ - "smallvec", + "tinyvec", ] [[package]] @@ -6992,17 +7668,28 @@ checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" [[package]] name = "url" -version = "2.2.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", - "matches", "percent-encoding", "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "uuid" version = "0.8.2" @@ -7028,6 +7715,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0fb139b14473e1350e34439c888e44c805f37b4293d17f87ea920a66a20a99a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.11" @@ -7052,6 +7745,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "wait-timeout" version = "0.2.0" @@ -7232,6 +7931,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-sys" version = "0.32.0" @@ -7505,10 +8213,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" [[package]] -name = "xml-rs" -version = "0.8.0" +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "yansi" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" [[package]] name = "yatp" @@ -7549,9 +8263,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.1.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cbac2ed2ba24cc90f5e06485ac8c7c1e5449fe8911aef4d8877218af021a5b8" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" [[package]] name = "zipf" diff --git a/Cargo.toml b/Cargo.toml index 7ce84c9b489..69acc30a8e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -204,19 +204,10 @@ raft-proto = { git = "https://github.com/tikv/raft-rs", branch = "master" } protobuf = { git = "https://github.com/pingcap/rust-protobuf", branch = "v2.8" } protobuf-codegen = { git = "https://github.com/pingcap/rust-protobuf", branch = "v2.8" } -# TODO: remove this replacement after rusoto_s3 truly supports virtual-host style (https://github.com/rusoto/rusoto/pull/1823). -# UPDATE: use openssl for signature to support fips 140 -rusoto_core = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } -rusoto_credential = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } -rusoto_kms = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } -rusoto_mock = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } -rusoto_s3 = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } -rusoto_sts = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } +snappy-sys = { git = "https://github.com/tikv/rust-snappy.git", branch = "static-link" } # NOTICE: use openssl for signature to support fips 140 tame-oauth = { git = "https://github.com/tikv/tame-oauth", branch = "fips-0.9" } -snappy-sys = { git = "https://github.com/tikv/rust-snappy.git", branch = "static-link" } - # remove this when https://github.com/danburkert/fs2-rs/pull/42 is merged. fs2 = { git = "https://github.com/tikv/fs2-rs", branch = "tikv" } diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 15c1fb11744..ce7e15d13c4 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -10,6 +10,18 @@ failpoints = ["fail/failpoints"] [dependencies] async-trait = "0.1" + +aws-config = { version = "1", features = [], default-features = false } +aws-credential-types = { version = "1", features = ["hardcoded-credentials"] } +# Note: sts@1.40.0, s3@1.47.0 and kms@1.41.0 is the latest version that supports rustc 1.77... +# We may update this after we update our rustc. +aws-sdk-kms = { version = "=1.40.0", features = [], default-features = false } +aws-sdk-s3 = { version = "=1.40.0", features = ["rt-tokio"], default-features = false } + +aws-smithy-runtime = { version = "1", features = [ "client", "connector-hyper-0-14-x" ], default-features = false } +aws-smithy-runtime-api = { version = "1", features = [], default-features = false } +aws-smithy-types = { version = "1", features = ["byte-stream-poll-next"] } + base64 = "0.13.0" bytes = "1.0" cloud = { workspace = true } @@ -19,22 +31,25 @@ futures-util = { version = "0.3", default-features = false, features = ["io"] } # This is only a dependency to vendor openssl for rusoto. It's not clear exactly # how openssl is built for tikv, but it seems to be controlled by grpcio. This # makes `cargo test -p aws` link correctly. +grpcio = { workspace = true } +http = "0.2.0" +hyper = "0.14" +hyper-tls = { version = "0.5" } kvproto = { workspace = true } md5 = "0.7.0" -rusoto_core = "0.46.0" -rusoto_credential = "0.46.0" -rusoto_kms = { version = "0.46.0", features = ["serialize_structs"] } -rusoto_s3 = { version = "0.46.0", features = ["serialize_structs"] } -rusoto_sts = "0.46.0" +prometheus = { version = "0.13", default-features = false, features = ["nightly"] } slog = { workspace = true } slog-global = { workspace = true } thiserror = "1.0" tikv_util = { workspace = true } # better to not use slog-global, but pass in the logger tokio = { version = "1.5", features = ["time"] } +tokio-util = { version = "0.7" } url = "2.0" uuid = { version = "0.8", features = ["v4"] } [dev-dependencies] +aws-smithy-runtime = { version = "1.4.0", features = ["test-util", "client"] } +base64 = "0.13" futures = "0.3" -rusoto_mock = "0.46.0" +tokio = { version = "1.5", features = ["macros"] } diff --git a/components/cloud/aws/src/kms.rs b/components/cloud/aws/src/kms.rs index 560a27ed0b7..932b571fb7b 100644 --- a/components/cloud/aws/src/kms.rs +++ b/components/cloud/aws/src/kms.rs @@ -3,24 +3,29 @@ use std::ops::Deref; use async_trait::async_trait; +use aws_config::BehaviorVersion; +use aws_credential_types::provider::{error::CredentialsError, ProvideCredentials}; +use aws_sdk_kms::{ + operation::{decrypt::DecryptError, generate_data_key::GenerateDataKeyError}, + primitives::Blob, + types::DataKeySpec, + Client, +}; +use aws_sdk_s3::config::HttpClient; use cloud::{ error::{Error, KmsError, OtherError, Result}, kms::{Config, CryptographyType, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey}, }; -use rusoto_core::{request::DispatchSignedRequest, RusotoError}; -use rusoto_credential::{AwsCredentials, ProvideAwsCredentials, StaticProvider}; -use rusoto_kms::{ - DecryptError, DecryptRequest, GenerateDataKeyError, GenerateDataKeyRequest, Kms, KmsClient, -}; -use tikv_util::stream::RetryError; +use futures::executor::block_on; -use crate::util; +use crate::util::{self, is_retryable, SdkError}; + +const AWS_KMS_DATA_KEY_SPEC: DataKeySpec = DataKeySpec::Aes256; -const AWS_KMS_DATA_KEY_SPEC: &str = "AES_256"; pub const ENCRYPTION_VENDOR_NAME_AWS_KMS: &str = "AWS"; pub struct AwsKms { - client: KmsClient, + client: Client, current_key_id: KeyId, region: String, endpoint: String, @@ -40,20 +45,30 @@ impl std::fmt::Debug for AwsKms { } impl AwsKms { - fn new_with_creds_dispatcher( + fn new_with_creds_client( config: Config, - dispatcher: Dispatcher, + client: Http, credentials_provider: Creds, ) -> Result where - Creds: ProvideAwsCredentials + Send + Sync + 'static, - Dispatcher: DispatchSignedRequest + Send + Sync + 'static, + Http: HttpClient + 'static, + Creds: ProvideCredentials + 'static, { - let region = util::get_region( - config.location.region.as_ref(), - config.location.endpoint.as_ref(), + let mut loader = aws_config::defaults(BehaviorVersion::latest()) + .credentials_provider(credentials_provider) + .http_client(client); + + loader = util::configure_region( + loader, + &config.location.region, + !config.location.endpoint.is_empty(), )?; - let client = KmsClient::new_with(dispatcher, credentials_provider, region); + + loader = util::configure_endpoint(loader, &config.location.endpoint); + + let sdk_config = block_on(loader.load()); + let client = Client::new(&sdk_config); + Ok(AwsKms { client, current_key_id: config.key_id, @@ -63,7 +78,8 @@ impl AwsKms { } pub fn new(config: Config) -> Result { - let dispatcher = util::new_http_client()?; + let client = util::new_http_client(); + let creds = util::new_credentials_provider(client.clone()); match config.aws.as_ref() { Some(aws_config) => { if let (Some(access_key), Some(secret_access_key)) = ( @@ -71,24 +87,24 @@ impl AwsKms { aws_config.secret_access_key.clone(), ) { // Use provided AWS credentials - let credentials = AwsCredentials::new( + let credentials = aws_credential_types::Credentials::new( access_key, secret_access_key, None, // session token None, // expiration + "user-provided", ); - let static_provider = StaticProvider::from(credentials); - Self::new_with_creds_dispatcher(config, dispatcher, static_provider) + let static_provider = + aws_credential_types::provider::SharedCredentialsProvider::new(credentials); + Self::new_with_creds_client(config, client, static_provider) } else { // Fall back to default credentials provider - let provider = util::CredentialsProvider::new()?; - Self::new_with_creds_dispatcher(config, dispatcher, provider) + Self::new_with_creds_client(config, client, creds) } } None => { // No AWS config provided, use default credentials provider - let provider = util::CredentialsProvider::new()?; - Self::new_with_creds_dispatcher(config, dispatcher, provider) + Self::new_with_creds_client(config, client, creds) } } } @@ -103,38 +119,27 @@ impl KmsProvider for AwsKms { // On decrypt failure, the rule is to return WrongMasterKey error in case it is // possible that a wrong master key has been used, or other error otherwise. async fn decrypt_data_key(&self, data_key: &EncryptedKey) -> Result> { - let decrypt_request = DecryptRequest { - ciphertext_blob: bytes::Bytes::copy_from_slice(data_key), - // Use default algorithm SYMMETRIC_DEFAULT. - encryption_algorithm: None, - // Use key_id encoded in ciphertext. - key_id: Some(self.current_key_id.deref().clone()), - // Encryption context and grant tokens are not used. - encryption_context: None, - grant_tokens: None, - }; self.client - .decrypt(decrypt_request.clone()) + .decrypt() + .ciphertext_blob(Blob::new(data_key.clone().into_inner())) + .key_id(self.current_key_id.deref().clone()) + .send() .await .map_err(classify_decrypt_error) - .map(|response| response.plaintext.unwrap().as_ref().to_vec()) + .map(|response| response.plaintext().unwrap().as_ref().to_vec()) } async fn generate_data_key(&self) -> Result { - let generate_request = GenerateDataKeyRequest { - encryption_context: None, - grant_tokens: None, - key_id: self.current_key_id.deref().clone(), - key_spec: Some(AWS_KMS_DATA_KEY_SPEC.to_owned()), - number_of_bytes: None, - }; self.client - .generate_data_key(generate_request) + .generate_data_key() + .key_id(self.current_key_id.deref().clone()) + .key_spec(AWS_KMS_DATA_KEY_SPEC) + .send() .await .map_err(classify_generate_data_key_error) .and_then(|response| { - let ciphertext_key = response.ciphertext_blob.unwrap().as_ref().to_vec(); - let plaintext_key = response.plaintext.unwrap().as_ref().to_vec(); + let ciphertext_key = response.ciphertext_blob().unwrap().as_ref().to_vec(); + let plaintext_key = response.plaintext().unwrap().as_ref().to_vec(); Ok(DataKeyPair { encrypted: EncryptedKey::new(ciphertext_key)?, plaintext: PlainKey::new(plaintext_key, CryptographyType::AesGcm256)?, @@ -143,67 +148,52 @@ impl KmsProvider for AwsKms { } } -// Rusoto errors Display implementation just gives the cause message and -// discards the type. This is really bad when the cause message is empty! -// Use Debug instead: this will show both -pub struct FixRusotoErrorDisplay( - RusotoError, -); -impl std::fmt::Debug for FixRusotoErrorDisplay { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - } -} -impl std::fmt::Display for FixRusotoErrorDisplay { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - } -} -impl std::error::Error for FixRusotoErrorDisplay {} - -fn classify_generate_data_key_error(err: RusotoError) -> Error { - if let RusotoError::Service(e) = &err { - match &e { - GenerateDataKeyError::NotFound(_) => Error::ApiNotFound(err.into()), - GenerateDataKeyError::InvalidKeyUsage(_) => { +fn classify_generate_data_key_error(err: SdkError) -> Error { + if let SdkError::ServiceError(service_err) = &err { + match &service_err.err() { + GenerateDataKeyError::NotFoundException(_) => Error::ApiNotFound(err.into()), + GenerateDataKeyError::InvalidKeyUsageException(_) => { Error::KmsError(KmsError::Other(OtherError::from_box(err.into()))) } - GenerateDataKeyError::DependencyTimeout(_) => Error::ApiTimeout(err.into()), - GenerateDataKeyError::KMSInternal(_) => Error::ApiInternal(err.into()), - _ => Error::KmsError(KmsError::Other(OtherError::from_box( - FixRusotoErrorDisplay(err).into(), - ))), + GenerateDataKeyError::DependencyTimeoutException(_) => Error::ApiTimeout(err.into()), + GenerateDataKeyError::KmsInternalException(_) => Error::ApiInternal(err.into()), + _ => Error::KmsError(KmsError::Other(OtherError::from_box(err.into()))), } } else { classify_error(err) } } -fn classify_decrypt_error(err: RusotoError) -> Error { - if let RusotoError::Service(e) = &err { - match &e { - DecryptError::IncorrectKey(_) | DecryptError::NotFound(_) => { +fn classify_decrypt_error(err: SdkError) -> Error { + if let SdkError::ServiceError(service_err) = &err { + match &service_err.err() { + DecryptError::IncorrectKeyException(_) | DecryptError::NotFoundException(_) => { Error::KmsError(KmsError::WrongMasterKey(err.into())) } - DecryptError::DependencyTimeout(_) => Error::ApiTimeout(err.into()), - DecryptError::KMSInternal(_) => Error::ApiInternal(err.into()), - _ => Error::KmsError(KmsError::Other(OtherError::from_box( - FixRusotoErrorDisplay(err).into(), - ))), + DecryptError::DependencyTimeoutException(_) => Error::ApiTimeout(err.into()), + DecryptError::KmsInternalException(_) => Error::ApiInternal(err.into()), + _ => Error::KmsError(KmsError::Other(OtherError::from_box(err.into()))), } } else { classify_error(err) } } -fn classify_error(err: RusotoError) -> Error { +fn classify_error(err: SdkError) -> Error { match &err { - RusotoError::HttpDispatch(_) => Error::ApiTimeout(err.into()), - RusotoError::Credentials(_) => Error::ApiAuthentication(err.into()), - e if e.is_retryable() => Error::ApiInternal(err.into()), - _ => Error::KmsError(KmsError::Other(OtherError::from_box( - FixRusotoErrorDisplay(err).into(), - ))), + SdkError::DispatchFailure(dispatch_failure) => { + let maybe_credentials_err = dispatch_failure + .as_connector_error() + .and_then(|connector_err| std::error::Error::source(connector_err)) + .filter(|src_err| src_err.is::()); + if maybe_credentials_err.is_some() { + Error::ApiAuthentication(err.into()) + } else { + Error::ApiTimeout(err.into()) + } + } + e if is_retryable(e) => Error::ApiInternal(err.into()), + _ => Error::KmsError(KmsError::Other(OtherError::from_box(err.into()))), } } @@ -223,11 +213,11 @@ impl std::fmt::Debug for KmsClientDebug { #[cfg(test)] mod tests { - // use rusoto_mock::MockRequestDispatcher; + use aws_sdk_kms::config::Credentials; + use aws_smithy_runtime::client::http::test_util::{ReplayEvent, StaticReplayClient}; + use aws_smithy_types::body::SdkBody; use cloud::kms::Location; - use rusoto_credential::StaticProvider; - use rusoto_kms::{DecryptResponse, GenerateDataKeyResponse}; - use rusoto_mock::MockRequestDispatcher; + use http::Uri; use super::*; @@ -239,7 +229,7 @@ mod tests { key_id: KeyId::new("test_key_id".to_string()).unwrap(), vendor: String::new(), location: Location { - region: "ap-southeast-2".to_string(), + region: "cn-north-1".to_string(), endpoint: String::new(), }, azure: None, @@ -247,36 +237,68 @@ mod tests { aws: None, }; - let dispatcher = - MockRequestDispatcher::with_status(200).with_json_body(GenerateDataKeyResponse { - ciphertext_blob: Some(magic_contents.as_ref().into()), - key_id: Some("test_key_id".to_string()), - plaintext: Some(key_contents.clone().into()), - }); - let credentials_provider = - StaticProvider::new_minimal("abc".to_string(), "xyz".to_string()); - let aws_kms = AwsKms::new_with_creds_dispatcher( - config.clone(), - dispatcher, - credentials_provider.clone(), - ) - .unwrap(); + let resp = format!( + "{{\"KeyId\": \"test_key_id\", \"Plaintext\": \"{}\", \"CiphertextBlob\": \"{}\" }}", + base64::encode(key_contents.clone()), + base64::encode(magic_contents) + ); + + let client = StaticReplayClient::new(vec![ReplayEvent::new( + http::Request::builder() + .method("POST") + .uri(Uri::from_static("https://kms.cn-north-1.amazonaws.com.cn/")) + .body(SdkBody::from( + "{\"KeyId\":\"test_key_id\",\"KeySpec\":\"AES_256\"}", + )) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from(resp)) + .unwrap(), + )]); + + let creds = Credentials::from_keys("abc", "xyz", None); + + let aws_kms = + AwsKms::new_with_creds_client(config.clone(), client.clone(), creds.clone()).unwrap(); + let data_key = aws_kms.generate_data_key().await.unwrap(); + assert_eq!( data_key.encrypted, EncryptedKey::new(magic_contents.to_vec()).unwrap() ); assert_eq!(*data_key.plaintext, key_contents); - let dispatcher = MockRequestDispatcher::with_status(200).with_json_body(DecryptResponse { - plaintext: Some(key_contents.clone().into()), - key_id: Some("test_key_id".to_string()), - encryption_algorithm: None, - }); - let aws_kms = - AwsKms::new_with_creds_dispatcher(config, dispatcher, credentials_provider).unwrap(); + client.assert_requests_match(&[]); + + let req = format!( + "{{\"KeyId\":\"test_key_id\",\"CiphertextBlob\":\"{}\"}}", + base64::encode(data_key.encrypted.clone().into_inner()) + ); + + let resp = format!( + "{{\"KeyId\": \"test_key_id\", \"Plaintext\": \"{}\", \"EncryptionAlgorithm\": \"SYMMETRIC_DEFAULT\" }}", + base64::encode(key_contents.clone()), + ); + + let client = StaticReplayClient::new(vec![ReplayEvent::new( + http::Request::builder() + .uri(Uri::from_static("https://kms.cn-north-1.amazonaws.com.cn/")) + .body(SdkBody::from(req)) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from(resp)) + .unwrap(), + )]); + + let aws_kms = AwsKms::new_with_creds_client(config, client.clone(), creds).unwrap(); + let plaintext = aws_kms.decrypt_data_key(&data_key.encrypted).await.unwrap(); assert_eq!(plaintext, key_contents); + + client.assert_requests_match(&[]); } #[tokio::test] @@ -285,7 +307,7 @@ mod tests { key_id: KeyId::new("test_key_id".to_string()).unwrap(), vendor: String::new(), location: Location { - region: "ap-southeast-2".to_string(), + region: "cn-north-1".to_string(), endpoint: String::new(), }, azure: None, @@ -293,28 +315,70 @@ mod tests { aws: None, }; + let enc_key = EncryptedKey::new(b"invalid".to_vec()).unwrap(); + + let req = format!( + "{{\"KeyId\":\"test_key_id\",\"CiphertextBlob\":\"{}\"}}", + base64::encode(enc_key.clone().into_inner()) + ); + // IncorrectKeyException // // HTTP Status Code: 400 // Json, see: - // https://github.com/rusoto/rusoto/blob/mock-v0.43.0/rusoto/services/kms/src/generated.rs#L1970 - // https://github.com/rusoto/rusoto/blob/mock-v0.43.0/rusoto/core/src/proto/json/error.rs#L7 // https://docs.aws.amazon.com/kms/latest/APIReference/API_Decrypt.html#API_Decrypt_Errors - let dispatcher = MockRequestDispatcher::with_status(400).with_body( - r#"{ - "__type": "IncorrectKeyException", - "Message": "mock" - }"#, - ); - let credentials_provider = - StaticProvider::new_minimal("abc".to_string(), "xyz".to_string()); - let aws_kms = - AwsKms::new_with_creds_dispatcher(config, dispatcher, credentials_provider).unwrap(); - let enc_key = EncryptedKey::new(b"invalid".to_vec()).unwrap(); + let client = StaticReplayClient::new(vec![ReplayEvent::new( + http::Request::builder() + .uri(Uri::from_static("https://kms.cn-north-1.amazonaws.com.cn/")) + .body(SdkBody::from(req)) + .unwrap(), + http::Response::builder() + .status(400) + .body(SdkBody::from( + r#"{ + "__type": "IncorrectKeyException", + "Message": "mock" + }"#, + )) + .unwrap(), + )]); + + let creds = Credentials::from_keys("abc", "xyz", None); + + let aws_kms = AwsKms::new_with_creds_client(config, client.clone(), creds).unwrap(); let fut = aws_kms.decrypt_data_key(&enc_key); + match fut.await { Err(Error::KmsError(KmsError::WrongMasterKey(_))) => (), other => panic!("{:?}", other), } + + client.assert_requests_match(&[]); + } + + #[tokio::test] + #[cfg(FALSE)] + // FIXME: enable this (or move this to an integration test) + async fn test_aws_kms_localstack() { + let config = Config { + key_id: KeyId::new("cbf4ef24-982d-4fd3-a75b-b95aaec84860".to_string()).unwrap(), + vendor: String::new(), + location: Location { + region: "us-east-1".to_string(), + endpoint: "http://localhost:4566".to_string(), + }, + azure: None, + gcp: None, + }; + + let creds = + Credentials::from_keys("testUser".to_string(), "testAccessKey".to_string(), None); + let aws_kms = + AwsKms::new_with_creds_client(config, util::new_http_client(), creds).unwrap(); + + let data_key = aws_kms.generate_data_key().await.unwrap(); + let plaintext = aws_kms.decrypt_data_key(&data_key.encrypted).await.unwrap(); + + assert_eq!(plaintext, data_key.plaintext.clone()); } } diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 1211e67ad6a..7180e9d28b4 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -7,6 +7,15 @@ use std::{ }; use async_trait::async_trait; +use aws_config::{sts::AssumeRoleProvider, BehaviorVersion, Region, SdkConfig}; +use aws_credential_types::{provider::ProvideCredentials, Credentials}; +use aws_sdk_s3::{ + config::HttpClient, + operation::get_object::GetObjectError, + types::{CompletedMultipartUpload, CompletedPart}, + Client, +}; +use bytes::Bytes; use cloud::{ blob::{ none_to_empty, BlobConfig, BlobObject, BlobStorage, BucketConf, DeletableStorage, @@ -15,22 +24,24 @@ use cloud::{ metrics::CLOUD_REQUEST_HISTOGRAM_VEC, }; use fail::fail_point; -use futures::stream::{self, Stream}; +use futures::{executor::block_on, stream::Stream}; use futures_util::{ future::{FutureExt, LocalBoxFuture}, io::{AsyncRead, AsyncReadExt}, stream::TryStreamExt, + StreamExt, }; pub use kvproto::brpb::S3 as InputConfig; -use rusoto_core::{request::DispatchSignedRequest, ByteStream, RusotoError}; -use rusoto_credential::{ProvideAwsCredentials, StaticProvider}; -use rusoto_s3::{util::AddressingStyle, *}; -use rusoto_sts::{StsAssumeRoleSessionCredentialsProvider, StsClient}; use thiserror::Error; -use tikv_util::{debug, stream::error_stream, time::Instant}; +use tikv_util::{ + debug, + stream::{error_stream, RetryError}, + time::Instant, +}; use tokio::time::{sleep, timeout}; +use tokio_util::io::ReaderStream; -use crate::util::{self, retry_and_count}; +use crate::util::{self, retry_and_count, SdkError}; const CONNECTION_TIMEOUT: Duration = Duration::from_secs(900); pub const STORAGE_VENDOR_NAME_AWS: &str = "aws"; @@ -141,10 +152,15 @@ impl BlobConfig for Config { } } +pub struct S3CompletedPart { + pub e_tag: Option, + pub part_number: i32, +} + #[derive(Clone)] pub struct S3Storage { config: Config, - client: S3Client, + client: Client, } impl S3Storage { @@ -161,83 +177,126 @@ impl S3Storage { } /// Create a new S3 storage for the given config. - pub fn new(config: Config) -> io::Result { - Self::with_request_dispatcher(config, util::new_http_client()?) + pub fn new(config: Config) -> io::Result { + let client = util::new_http_client(); + Self::new_with_client(config, client) } - fn new_creds_dispatcher( - config: Config, - dispatcher: Dispatcher, - credentials_provider: Creds, - ) -> io::Result + fn new_with_client(config: Config, client: Http) -> io::Result where - Creds: ProvideAwsCredentials + Send + Sync + 'static, - Dispatcher: DispatchSignedRequest + Send + Sync + 'static, + Http: HttpClient + Clone + 'static, { - let bucket_region = none_to_empty(config.bucket.region.clone()); - let bucket_endpoint = config.bucket.endpoint.clone(); - let region = util::get_region(&bucket_region, &none_to_empty(bucket_endpoint))?; - let mut client = S3Client::new_with(dispatcher, credentials_provider, region); - if config.force_path_style { - client.config_mut().addressing_style = AddressingStyle::Path; + // static credentials are used with minio + if let Some(access_key_pair) = &config.access_key_pair { + let creds = Credentials::from_keys( + (*access_key_pair.access_key).to_owned(), + (*access_key_pair.secret_access_key).to_owned(), + access_key_pair + .session_token + .as_deref() + .map(|s| s.to_owned()), + ); + Self::maybe_assume_role(config, client, creds) + } else { + let creds = util::new_credentials_provider(client.clone()); + Self::maybe_assume_role(config, client, creds) } - Ok(S3Storage { config, client }) } - fn maybe_assume_role( + fn maybe_assume_role( config: Config, - cred_provider: P, - dispatcher: D, - ) -> io::Result + client: Http, + credentials_provider: Creds, + ) -> io::Result where - P: ProvideAwsCredentials + Send + Sync + 'static, - D: DispatchSignedRequest + Send + Sync + 'static, + Http: HttpClient + 'static, + Creds: ProvideCredentials + 'static, { if config.role_arn.is_some() { - // try use role arn anyway with current creds when it's not nil. - let bucket_region = none_to_empty(config.bucket.region.clone()); - let bucket_endpoint = config.bucket.endpoint.clone(); - let region = util::get_region(&bucket_region, &none_to_empty(bucket_endpoint))?; - // cannot use the same dispatcher because of move, so use another http client. - let sts = StsClient::new_with(util::new_http_client()?, cred_provider, region); let duration_since_epoch = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .unwrap(); let timestamp_secs = duration_since_epoch.as_secs(); - let cred_provider = StsAssumeRoleSessionCredentialsProvider::new( - sts, - String::clone(config.role_arn.as_deref().unwrap()), - format!("{}", timestamp_secs), - config.external_id.as_deref().cloned(), - // default duration is 15min - None, - None, - None, - ); - Self::new_creds_dispatcher(config, dispatcher, cred_provider) + + let mut builder = AssumeRoleProvider::builder(config.role_arn.as_deref().unwrap()) + .session_name(format!("{}", timestamp_secs)); + + if let Some(external_id) = &config.external_id { + builder = builder.external_id(external_id.as_str()); + } + + if let Some(region) = &config.bucket.region { + builder = builder.region(Region::new(region.to_string())); + } + + let credentials_provider: io::Result = block_on(async { + let sdk_config = + Self::load_sdk_config(&config, util::new_http_client(), credentials_provider) + .await?; + builder = builder.configure(&sdk_config); + Ok(builder.build().await) + }); + Self::new_with_creds_client(config, client, credentials_provider?) } else { // or just use original cred_provider to access s3. - Self::new_creds_dispatcher(config, dispatcher, cred_provider) + Self::new_with_creds_client(config, client, credentials_provider) } } - pub fn with_request_dispatcher(config: Config, dispatcher: D) -> io::Result + async fn load_sdk_config( + config: &Config, + client: Http, + creds: Creds, + ) -> io::Result where - D: DispatchSignedRequest + Send + Sync + 'static, + Http: HttpClient + 'static, + Creds: ProvideCredentials + 'static, { - // static credentials are used with minio - if let Some(access_key_pair) = &config.access_key_pair { - let cred_provider = StaticProvider::new( - (*access_key_pair.access_key).to_owned(), - (*access_key_pair.secret_access_key).to_owned(), - access_key_pair.session_token.as_deref().cloned(), - None, - ); - Self::maybe_assume_role(config, cred_provider, dispatcher) - } else { - let cred_provider = util::CredentialsProvider::new()?; - Self::maybe_assume_role(config, cred_provider, dispatcher) - } + let bucket_region = none_to_empty(config.bucket.region.clone()); + let bucket_endpoint = none_to_empty(config.bucket.endpoint.clone()); + + let mut loader = + aws_config::defaults(BehaviorVersion::latest()).credentials_provider(creds); + + loader = util::configure_region(loader, &bucket_region, !bucket_endpoint.is_empty())?; + loader = util::configure_endpoint(loader, &bucket_endpoint); + loader = loader.http_client(client); + Ok(loader.load().await) + } + + fn new_with_creds_client( + config: Config, + client: Http, + credentials_provider: Creds, + ) -> io::Result + where + Http: HttpClient + 'static, + Creds: ProvideCredentials + 'static, + { + block_on(Self::new_with_creds_client_async( + config, + client, + credentials_provider, + )) + } + + async fn new_with_creds_client_async( + config: Config, + client: Http, + credentials_provider: Creds, + ) -> io::Result + where + Http: HttpClient + 'static, + Creds: ProvideCredentials + 'static, + { + let sdk_config = Self::load_sdk_config(&config, client, credentials_provider).await?; + + let mut builder = aws_sdk_s3::config::Builder::from(&sdk_config); + builder.set_force_path_style(Some(config.force_path_style)); + + let client = Client::from_conf(builder.build()); + + Ok(S3Storage { config, client }) } fn maybe_prefix_key(&self, key: &str) -> String { @@ -262,39 +321,53 @@ impl S3Storage { let key = self.maybe_prefix_key(name); let bucket = self.config.bucket.bucket.clone(); debug!("read file from s3 storage"; "key" => %key); - let req = GetObjectRequest { - key, - bucket: (*bucket).clone(), - range, - ..Default::default() - }; - Box::new( - self.client - .get_object(req) - .map(move |future| match future { - Ok(out) => out.body.unwrap(), - Err(RusotoError::Service(GetObjectError::NoSuchKey(key))) => { - ByteStream::new(error_stream(io::Error::new( + + let async_read = self + .client + .get_object() + .key(key.clone()) + .bucket((*bucket).clone()) + .set_range(range) + .send() + .map(move |fut| { + let stream: Box> + Unpin + Send> = match fut { + Ok(out) => Box::new(ReaderStream::new(out.body.into_async_read())), + Err(SdkError::ServiceError(service_err)) => match service_err.err() { + GetObjectError::NoSuchKey(_) => create_error_stream( io::ErrorKind::NotFound, format!("no key {} at bucket {}", key, *bucket), - ))) - } - Err(e) => ByteStream::new(error_stream(io::Error::new( + ), + _ => create_error_stream( + io::ErrorKind::Other, + format!("failed to get object {:?}", service_err), + ), + }, + Err(e) => create_error_stream( io::ErrorKind::Other, format!("failed to get object {}", e), - ))), - }) - .flatten_stream() - .into_async_read(), - ) + ), + }; + stream + }) + .flatten_stream() + .into_async_read(); + + Box::new(Box::pin(async_read)) } } +fn create_error_stream( + kind: io::ErrorKind, + msg: String, +) -> Box> + Unpin + Send + Sync> { + Box::new(error_stream(io::Error::new(kind, msg))) +} + /// A helper for uploading a large files to S3 storage. /// /// Note: this uploader does not support uploading files larger than 19.5 GiB. struct S3Uploader<'client> { - client: &'client S3Client, + client: &'client Client, bucket: String, key: String, @@ -306,23 +379,36 @@ struct S3Uploader<'client> { object_lock_enabled: bool, upload_id: String, - parts: Vec, + parts: Vec, } /// The errors a uploader can meet. /// This was made for make the result of [S3Uploader::run] get [Send]. #[derive(Debug, Error)] -enum UploadError { +pub enum UploadError { #[error("io error {0}")] Io(#[from] io::Error), - #[error("rusoto error {0}")] + #[error("aws-sdk error: {msg}")] // Maybe make it a trait if needed? - Rusoto(String), + Sdk { msg: String, retryable: bool }, +} + +impl RetryError for UploadError { + fn is_retryable(&self) -> bool { + match self { + UploadError::Io(_) => false, + UploadError::Sdk { msg: _, retryable } => *retryable, + } + } } -impl From> for UploadError { - fn from(r: RusotoError) -> Self { - Self::Rusoto(format!("{}", r)) +impl From> for UploadError { + fn from(err: SdkError) -> Self { + let msg = format!("{:?}", err); + Self::Sdk { + msg, + retryable: util::is_retryable(&err), + } } } @@ -363,7 +449,7 @@ const MINIMUM_PART_SIZE: usize = 5 * 1024 * 1024; impl<'client> S3Uploader<'client> { /// Creates a new uploader with a given target location and upload /// configuration. - fn new(client: &'client S3Client, config: &Config, key: String) -> Self { + fn new(client: &'client Client, config: &Config, key: String) -> Self { Self { client, key, @@ -389,7 +475,7 @@ impl<'client> S3Uploader<'client> { // For short files, execute one put_object to upload the entire thing. let mut data = Vec::with_capacity(est_len as usize); reader.read_to_end(&mut data).await?; - retry_and_count(|| self.upload(&data), "upload_small_file").await?; + Box::pin(retry_and_count(|| self.upload(&data), "upload_small_file")).await?; Ok(()) } else { // Otherwise, use multipart upload to improve robustness. @@ -424,76 +510,92 @@ impl<'client> S3Uploader<'client> { } /// Starts a multipart upload process. - async fn begin(&self) -> Result> { - match timeout( - Self::get_timeout(), + async fn begin(&self) -> Result { + let request = async { self.client - .create_multipart_upload(CreateMultipartUploadRequest { - bucket: self.bucket.clone(), - key: self.key.clone(), - acl: self.acl.as_ref().map(|s| s.to_string()), - server_side_encryption: self - .server_side_encryption + .create_multipart_upload() + .bucket(self.bucket.clone()) + .key(&self.key) + .set_acl(self.acl.as_ref().map(|s| s.as_str().into())) + .set_server_side_encryption( + self.server_side_encryption .as_ref() - .map(|s| s.to_string()), - ssekms_key_id: self.sse_kms_key_id.as_ref().map(|s| s.to_string()), - storage_class: self.storage_class.as_ref().map(|s| s.to_string()), - ..Default::default() - }), - ) - .await - { - Ok(output) => output?.upload_id.ok_or_else(|| { - RusotoError::ParseError( - "missing upload-id from create_multipart_upload()".to_owned(), + .map(|s| s.as_str().into()), ) - }), - Err(_) => Err(RusotoError::ParseError( - "timeout after 15mins for begin in s3 storage".to_owned(), - )), - } + .set_ssekms_key_id(self.sse_kms_key_id.as_ref().map(|s| s.to_string())) + .set_storage_class(self.storage_class.as_ref().map(|s| s.as_str().into())) + .send() + .await? + .upload_id() + .ok_or_else(|| UploadError::Sdk { + msg: "missing upload-id from create_multipart_upload()".to_owned(), + retryable: false, + }) + .map(|s| s.into()) + }; + timeout(Self::get_timeout(), request) + .await + .map_err(|_| UploadError::Sdk { + msg: "timeout after 15mins for begin in s3 storage".to_owned(), + retryable: false, + })? } /// Completes a multipart upload process, asking S3 to join all parts into a /// single file. - async fn complete(&self) -> Result<(), RusotoError> { - let res = timeout( - Self::get_timeout(), + async fn complete(&self) -> Result<(), UploadError> { + let request = async { + let aws_parts: Vec<_> = self + .parts + .iter() + .map(|p| { + CompletedPart::builder() + .part_number(p.part_number) + .set_e_tag(p.e_tag.clone()) + .build() + }) + .collect(); + self.client - .complete_multipart_upload(CompleteMultipartUploadRequest { - bucket: self.bucket.clone(), - key: self.key.clone(), - upload_id: self.upload_id.clone(), - multipart_upload: Some(CompletedMultipartUpload { - parts: Some(self.parts.clone()), - }), - ..Default::default() - }), - ) - .await - .map_err(|_| { - RusotoError::ParseError("timeout after 15mins for complete in s3 storage".to_owned()) - })?; - res.map(|_| ()) + .complete_multipart_upload() + .bucket(self.bucket.clone()) + .key(&self.key) + .upload_id(&self.upload_id) + .multipart_upload( + CompletedMultipartUpload::builder() + .set_parts(Some(aws_parts)) + .build(), + ) + .send() + .await?; + Ok(()) + }; + timeout(Self::get_timeout(), request) + .await + .map_err(|_| UploadError::Sdk { + msg: "timeout after 15mins for upload in s3 storage".to_owned(), + retryable: false, + })? } /// Aborts the multipart upload process, deletes all uploaded parts. - async fn abort(&self) -> Result<(), RusotoError> { - let res = timeout( - Self::get_timeout(), + async fn abort(&self) -> Result<(), UploadError> { + let request = async { self.client - .abort_multipart_upload(AbortMultipartUploadRequest { - bucket: self.bucket.clone(), - key: self.key.clone(), - upload_id: self.upload_id.clone(), - ..Default::default() - }), - ) - .await - .map_err(|_| { - RusotoError::ParseError("timeout after 15mins for abort in s3 storage".to_owned()) - })?; - res.map(|_| ()) + .abort_multipart_upload() + .bucket(&self.bucket) + .key(&self.key) + .upload_id(&self.upload_id) + .send() + .await?; + Ok(()) + }; + timeout(Self::get_timeout(), request) + .await + .map_err(|_| UploadError::Sdk { + msg: "timeout after 15mins for upload in s3 storage".to_owned(), + retryable: false, + })? } /// Uploads a part of the file. @@ -503,93 +605,106 @@ impl<'client> S3Uploader<'client> { &self, part_number: i64, data: &[u8], - ) -> Result> { - let res = timeout(Self::get_timeout(), async { - let start = Instant::now(); - let r = self + ) -> Result { + let request = async { + let result = self .client - .upload_part(UploadPartRequest { - bucket: self.bucket.clone(), - key: self.key.clone(), - upload_id: self.upload_id.clone(), - part_number, - content_length: Some(data.len() as i64), - content_md5: get_content_md5(self.object_lock_enabled, data), - body: Some(data.to_vec().into()), - ..Default::default() - }) - .await; + .upload_part() + .bucket(&self.bucket) + .key(&self.key) + .upload_id(&self.upload_id) + .part_number(part_number as i32) + .content_length(data.len() as i64) + .set_content_md5(get_content_md5(self.object_lock_enabled, data)) + .body(data.to_vec().into()) + .send() + .await?; + Ok(S3CompletedPart { + e_tag: result.e_tag().map(|t| t.into()), + part_number: part_number as i32, + }) + }; + timeout(Self::get_timeout(), async { + let start = Instant::now(); + let result = request.await; CLOUD_REQUEST_HISTOGRAM_VEC .with_label_values(&["s3", "upload_part"]) .observe(start.saturating_elapsed().as_secs_f64()); - r + result }) - .await; - match res { - Ok(part) => Ok(CompletedPart { - e_tag: part?.e_tag, - part_number: Some(part_number), - }), - Err(_) => Err(RusotoError::ParseError( - "timeout after 15mins for upload part in s3 storage".to_owned(), - )), - } + .await + .map_err(|_| UploadError::Sdk { + msg: "timeout after 15mins for upload part in s3 storage".to_owned(), + retryable: false, + })? } /// Uploads a file atomically. /// /// This should be used only when the data is known to be short, and thus /// relatively cheap to retry the entire upload. - async fn upload(&self, data: &[u8]) -> Result<(), RusotoError> { - let res = timeout(Self::get_timeout(), async { - #[cfg(feature = "failpoints")] - let delay_duration = (|| { - fail_point!("s3_sleep_injected", |t| { - let t = t.unwrap().parse::().unwrap(); - Duration::from_millis(t) + async fn upload(&self, data: &[u8]) -> Result<(), UploadError> { + let request = async { + self.client + .put_object() + .bucket(&self.bucket) + .key(&self.key) + .set_acl(self.acl.as_ref().map(|s| s.as_str().into())) + .set_ssekms_key_id(self.sse_kms_key_id.as_ref().map(|s| s.to_string())) + .set_storage_class(self.storage_class.as_ref().map(|s| s.as_str().into())) + .content_length(data.len() as i64) + .body(data.to_vec().into()) + .set_server_side_encryption( + self.server_side_encryption + .as_ref() + .map(|s| s.as_str().into()), + ) + .set_content_md5(get_content_md5(self.object_lock_enabled, data)) + .send() + .await + .map(|_| ()) + .map_err(|err| err.into()) + }; + timeout( + Self::get_timeout(), + Box::pin(async { + #[cfg(feature = "failpoints")] + let delay_duration = (|| { + fail_point!("s3_sleep_injected", |t| { + let t = t.unwrap().parse::().unwrap(); + Duration::from_millis(t) + }); + Duration::from_millis(0) + })(); + #[cfg(not(feature = "failpoints"))] + let delay_duration = Duration::from_millis(0); + + if delay_duration > Duration::from_millis(0) { + sleep(delay_duration).await; + } + + fail_point!("s3_put_obj_err", |_| { + Err(UploadError::Sdk { + msg: "failed to put object".to_owned(), + retryable: false, + }) }); - Duration::from_millis(0) - })(); - #[cfg(not(feature = "failpoints"))] - let delay_duration = Duration::from_millis(0); - if delay_duration > Duration::from_millis(0) { - sleep(delay_duration).await; - } + let start = Instant::now(); - fail_point!("s3_put_obj_err", |_| { - Err(RusotoError::ParseError("failed to put object".to_owned())) - }); + let result = request.await; - let start = Instant::now(); - let r = self - .client - .put_object(PutObjectRequest { - bucket: self.bucket.clone(), - key: self.key.clone(), - acl: self.acl.as_ref().map(|s| s.to_string()), - server_side_encryption: self - .server_side_encryption - .as_ref() - .map(|s| s.to_string()), - ssekms_key_id: self.sse_kms_key_id.as_ref().map(|s| s.to_string()), - storage_class: self.storage_class.as_ref().map(|s| s.to_string()), - content_length: Some(data.len() as i64), - content_md5: get_content_md5(self.object_lock_enabled, data), - body: Some(data.to_vec().into()), - ..Default::default() - }) - .await; - CLOUD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["s3", "put_object"]) - .observe(start.saturating_elapsed().as_secs_f64()); - r - }) + CLOUD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["s3", "put_object"]) + .observe(start.saturating_elapsed().as_secs_f64()); + result + }), + ) .await - .map_err(|_| { - RusotoError::ParseError("timeout after 15mins for upload in s3 storage".to_owned()) - })?; - res.map(|_| ()) + .map_err(|_| UploadError::Sdk { + msg: "timeout after 15mins for upload in s3 storage".to_owned(), + retryable: false, + })? } fn get_timeout() -> Duration { @@ -619,7 +734,7 @@ impl BlobStorage for S3Storage { debug!("save file to s3 storage"; "key" => %key); let uploader = S3Uploader::new(&self.client, &self.config, key); - let result = uploader.run(&mut reader, content_length).await; + let result = Box::pin(uploader.run(&mut reader, content_length)).await; result.map_err(|e| { let error_code = if let UploadError::Io(ref io_error) = e { io_error.kind() @@ -643,67 +758,18 @@ impl BlobStorage for S3Storage { } } -struct S3PrefixIter<'cli> { - cli: &'cli S3Storage, - finished: bool, - cont_token: Option, - prefix: String, -} - -impl<'cli> S3PrefixIter<'cli> { - async fn next_page(&mut self) -> io::Result>> { - if self.finished { - return Ok(None); - } - let mut input = ListObjectsV2Request::default(); - input.bucket = String::clone(&self.cli.config.bucket.bucket); - input.prefix = Some(self.cli.maybe_prefix_key(&self.prefix)); - input.continuation_token = self.cont_token.clone(); - let now = Instant::now(); - let res = retry_and_count( - || self.cli.client.list_objects_v2(input.clone()), - "get_one_page", - ) - .await - .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; - CLOUD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["s3", "list_objects_v2"]) - .observe(now.saturating_elapsed().as_secs_f64()); - - self.finished = !res.is_truncated.ok_or_else(|| { - io::Error::new(io::ErrorKind::InvalidData, "no IsTruncated in response") - })? || res.next_continuation_token.is_none(); - self.cont_token = res.next_continuation_token; - let data = res - .contents - .unwrap_or_default() - .into_iter() - .map(|data| BlobObject { - key: self - .cli - .strip_prefix_if_needed(data.key.unwrap_or_default()), - }) - .collect::>(); - Ok(Some(data)) - } -} - impl DeletableStorage for S3Storage { fn delete(&self, name: &str) -> LocalBoxFuture<'_, io::Result<()>> { let key = self.maybe_prefix_key(name); async move { let now = Instant::now(); - let res = retry_and_count( - || { - self.client.delete_object(DeleteObjectRequest { - bucket: self.config.bucket.bucket.to_string(), - key: key.clone(), - ..Default::default() - }) - }, - "delete_object", - ) - .await; + let res = self + .client + .delete_object() + .bucket(self.config.bucket.bucket.to_string()) + .key(key.clone()) + .send() + .await; CLOUD_REQUEST_HISTOGRAM_VEC .with_label_values(&["s3", "delete_object"]) .observe(now.saturating_elapsed().as_secs_f64()); @@ -724,19 +790,42 @@ impl IterableStorage for S3Storage { &self, prefix: &str, ) -> Pin> + '_>> { - let walker = S3PrefixIter { - cli: self, - finished: false, - cont_token: None, - prefix: prefix.to_owned(), - }; - let s = stream::try_unfold(walker, |mut w| async move { - let res = w.next_page().await?; - io::Result::Ok(res.map(|v| (v, w))) - }) - .map_ok(|data| stream::iter(data.into_iter().map(Ok))) - .try_flatten(); - Box::pin(s) + let builder = self + .client + .list_objects_v2() + .bucket(self.config.bucket.bucket.to_string()) + .prefix(self.maybe_prefix_key(prefix)); + let mut page_stream = builder.into_paginator().send(); + let stream = futures::stream::poll_fn(move |cx| page_stream.poll_next(cx)); + + stream + .map_ok(|page| { + page.contents + .map(|cs| { + futures::stream::iter(cs.into_iter().map(|v| { + Ok(BlobObject { + key: v.key.map(|k| self.strip_prefix_if_needed(k)).ok_or_else( + || { + io::Error::new( + io::ErrorKind::InvalidData, + "object key is empty", + ) + }, + )?, + }) + })) + .left_stream() + }) + .unwrap_or_else(|| futures::stream::empty().right_stream()) + }) + .map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!("sdk encounters an unexpected error: {:?}", err), + ) + }) + .try_flatten() + .boxed_local() } } @@ -744,134 +833,12 @@ impl IterableStorage for S3Storage { mod tests { use std::assert_matches::assert_matches; - use rusoto_core::signature::SignedRequest; - use rusoto_mock::{MockRequestDispatcher, MultipleMockRequestDispatcher}; - use tikv_util::stream::block_on_external_io; + use aws_sdk_s3::{config::Credentials, primitives::SdkBody}; + use aws_smithy_runtime::client::http::test_util::{ReplayEvent, StaticReplayClient}; + use http::Uri; use super::*; - fn make_list_bucket_result( - name: &str, - pfx: &str, - next_cont_token: Option<&str>, - is_truncated: bool, - max_keys: u64, - items: impl IntoIterator, - ) -> MockRequestDispatcher { - let items = items.into_iter().collect::>(); - let mut s = format!( - r#" - - - {} - {} - {} - {} - {} - {}"#, - name, - pfx, - next_cont_token.unwrap_or(""), - items.len(), - max_keys, - is_truncated - ); - for item in items { - s.push_str(&format!( - r#" - - {} - STANDARD - "#, - item - )); - } - s.push_str("\n"); - MockRequestDispatcher::with_status(200).with_body(&s) - } - - #[tokio::test] - async fn test_list_objects() { - const BUCKET: &str = "breeze"; - const PREFIX: &str = "/my/great/prefix"; - - let bucket_name = StringNonEmpty::required(BUCKET.to_string()).unwrap(); - let bucket = BucketConf::default(bucket_name); - let mut config = Config::default(bucket); - let multi_part_size = 2; - // set multi_part_size to use upload_part function - config.multi_part_size = multi_part_size; - - let check_cont_tok = |cont: Option| { - move |r: &SignedRequest| { - assert_eq!( - r.params.get("continuation-token").and_then(|v| v.as_ref()), - cont.as_ref() - ); - } - }; - - let files = |pfx, max| { - let mut i = 0; - std::iter::repeat_with(move || { - i += 1; - format!("{}-{}", pfx, i) - }) - .take(max) - }; - - // split magic_contents into 3 parts, so we mock 5 requests here(1 begin + 3 - // part + 1 complete) - let dispatcher = MultipleMockRequestDispatcher::new(vec![ - make_list_bucket_result(BUCKET, PREFIX, Some("foo"), true, 16, files("foo", 16)) - .with_request_checker(check_cont_tok(None)), - make_list_bucket_result(BUCKET, PREFIX, Some("bar"), true, 16, files("bar", 16)) - .with_request_checker(check_cont_tok(Some("foo".to_owned()))), - make_list_bucket_result(BUCKET, PREFIX, None, false, 16, files("quux", 8)) - .with_request_checker(check_cont_tok(Some("bar".to_owned()))), - MockRequestDispatcher::with_status(400).with_request_checker(|req| { - panic!("Walk haven't stopped. The last request is {:?}", req) - }), - ]); - - let credentials_provider = StaticProvider::new_minimal(String::new(), String::new()); - let s = S3Storage::new_creds_dispatcher(config, dispatcher, credentials_provider).unwrap(); - assert_eq!( - s.iter_prefix(PREFIX) - .map_ok(|v| v.key) - .try_collect::>() - .await - .unwrap(), - files("foo", 16) - .chain(files("bar", 16)) - .chain(files("quux", 8)) - .collect::>() - ); - } - - #[test] - #[ignore] - fn test_somewhat() { - let mut bucket = BucketConf::default(StringNonEmpty::opt("astro".to_owned()).unwrap()); - bucket.endpoint = StringNonEmpty::opt("http://10.2.7.193:9000".to_owned()); - let s3 = Config::default(bucket); - let s3 = Config { - access_key_pair: Some(AccessKeyPair { - access_key: StringNonEmpty::opt("minioadmin".to_owned()).unwrap(), - secret_access_key: StringNonEmpty::opt("minioadmin".to_owned()).unwrap(), - session_token: None, - }), - force_path_style: true, - ..s3 - }; - - let storage = S3Storage::new(s3).unwrap(); - let s = storage.iter_prefix("tpcc-1000-incr-with-crc64/v1/backupmeta"); - let items = block_on_external_io(TryStreamExt::try_collect::>(s)); - println!("{:?}", items); - println!("{}", items.unwrap().len()); - } - #[test] fn test_s3_get_content_md5() { // base64 encode md5sum "helloworld" @@ -919,32 +886,90 @@ mod tests { let magic_contents = "567890"; let bucket_name = StringNonEmpty::required("mybucket".to_string()).unwrap(); - let bucket = BucketConf::default(bucket_name); + let mut bucket = BucketConf::default(bucket_name); + bucket.region = Some(StringNonEmpty::required("cn-north-1".to_string()).unwrap()); + let mut config = Config::default(bucket); let multi_part_size = 2; // set multi_part_size to use upload_part function config.multi_part_size = multi_part_size; + config.force_path_style = true; // split magic_contents into 3 parts, so we mock 5 requests here(1 begin + 3 // part + 1 complete) - let dispatcher = MultipleMockRequestDispatcher::new(vec![ - MockRequestDispatcher::with_status(200).with_body( - r#" - - 1 - "#, + let client = StaticReplayClient::new(vec![ + ReplayEvent::new( + http::Request::builder() + .uri(Uri::from_static( + "https://s3.cn-north-1.amazonaws.com.cn/mybucket/mykey?uploads" + )) + .body(SdkBody::from("")) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from( + r#" + + mybucket + mykey + 1 + "# + )).unwrap() + ), + ReplayEvent::new( + http::Request::builder() + .uri(Uri::from_static( + "https://s3.cn-north-1.amazonaws.com.cn/mybucket/mykey?x-id=UploadPart&partNumber=1&uploadId=1" + )) + .body(SdkBody::from("56")) + .unwrap(), + http::Response::builder().status(200).body(SdkBody::from("")).unwrap() + ), + ReplayEvent::new( + http::Request::builder() + .uri(Uri::from_static( + "https://s3.cn-north-1.amazonaws.com.cn/mybucket/mykey?x-id=UploadPart&partNumber=2&uploadId=1" + )) + .body(SdkBody::from("78")) + .unwrap(), + http::Response::builder().status(200).body(SdkBody::from("")).unwrap() + ), + ReplayEvent::new( + http::Request::builder() + .uri(Uri::from_static( + "https://s3.cn-north-1.amazonaws.com.cn/mybucket/mykey?x-id=UploadPart&partNumber=3&uploadId=1" + )) + .body(SdkBody::from("90")) + .unwrap(), + http::Response::builder().status(200).body(SdkBody::from("")).unwrap() + ), + ReplayEvent::new( + http::Request::builder() + .uri(Uri::from_static( + "https://s3.cn-north-1.amazonaws.com.cn/mybucket/mykey?uploadId=1" + )) + .body(SdkBody::from( + r#"123"# + )) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from( + r#" + + https://s3.cn-north-1.amazonaws.com.cn/mybucket/mykey + mybucket + mykey + + + "# + )).unwrap() ), - MockRequestDispatcher::with_status(200), - MockRequestDispatcher::with_status(200), - MockRequestDispatcher::with_status(200), - MockRequestDispatcher::with_status(200), ]); - let credentials_provider = - StaticProvider::new_minimal("abc".to_string(), "xyz".to_string()); - - let s = S3Storage::new_creds_dispatcher(config, dispatcher, credentials_provider).unwrap(); + let creds = Credentials::from_keys("abc".to_string(), "xyz".to_string(), None); + let s = S3Storage::new_with_creds_client(config.clone(), client.clone(), creds).unwrap(); s.put( "mykey", PutResource(Box::new(magic_contents.as_bytes())), @@ -952,6 +977,9 @@ mod tests { ) .await .unwrap(); + + client.assert_requests_match(&[]); + assert_eq!( CLOUD_REQUEST_HISTOGRAM_VEC .get_metric_with_label_values(&["s3", "upload_part"]) @@ -972,18 +1000,52 @@ mod tests { bucket.prefix = StringNonEmpty::opt("myprefix".to_string()); let mut config = Config::default(bucket); config.force_path_style = true; - let dispatcher = MockRequestDispatcher::with_status(200).with_request_checker( - move |req: &SignedRequest| { - assert_eq!(req.region.name(), "ap-southeast-2"); - assert_eq!(req.hostname(), "s3.ap-southeast-2.amazonaws.com"); - assert_eq!(req.path(), "/mybucket/myprefix/mykey"); - // PutObject is translated to HTTP PUT. - assert_eq!(req.payload.is_some(), req.method() == "PUT"); - }, - ); - let credentials_provider = - StaticProvider::new_minimal("abc".to_string(), "xyz".to_string()); - let s = S3Storage::new_creds_dispatcher(config, dispatcher, credentials_provider).unwrap(); + + let client = StaticReplayClient::new(vec![ + ReplayEvent::new( + http::Request::builder() + .method("PUT") + .uri(Uri::from_static( + "https://s3.ap-southeast-2.amazonaws.com/mybucket/myprefix/mykey?x-id=PutObject", + )) + .body(SdkBody::from("5678")) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from("")) + .unwrap(), + ), + ReplayEvent::new( + http::Request::builder() + .method("GET") + .uri(Uri::from_static( + "https://s3.ap-southeast-2.amazonaws.com/mybucket/myprefix/mykey?x-id=GetObject", + )) + .body(SdkBody::from("")) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from("5678")) + .unwrap(), + ), + ReplayEvent::new( + http::Request::builder() + .method("PUT") + .uri(Uri::from_static( + "https://s3.ap-southeast-2.amazonaws.com/mybucket/myprefix/mykey?x-id=PutObject", + )) + .body(SdkBody::from("5678")) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from("")) + .unwrap(), + ), + ]); + + let creds = Credentials::from_keys("abc".to_string(), "xyz".to_string(), None); + + let s = S3Storage::new_with_creds_client(config.clone(), client.clone(), creds).unwrap(); s.put( "mykey", PutResource(Box::new(magic_contents.as_bytes())), @@ -995,8 +1057,8 @@ mod tests { let mut reader = s.get("mykey"); let mut buf = Vec::new(); let ret = reader.read_to_end(&mut buf).await; - assert!(ret.unwrap() == 0); - assert!(buf.is_empty()); + assert!(ret.unwrap() == 4); + assert!(!buf.is_empty()); // inject put error let s3_put_obj_err_fp = "s3_put_obj_err"; @@ -1008,6 +1070,7 @@ mod tests { ) .await .unwrap_err(); + fail::remove(s3_put_obj_err_fp); // test timeout @@ -1039,10 +1102,12 @@ mod tests { .unwrap(); fail::remove(s3_sleep_injected_fp); fail::remove(s3_timeout_injected_fp); + + client.assert_requests_match(&[]); } - #[test] - fn test_s3_storage_with_virtual_host() { + #[tokio::test] + async fn test_s3_storage_with_virtual_host() { let magic_contents = "abcd"; let bucket_name = StringNonEmpty::required("bucket2".to_string()).unwrap(); let mut bucket = BucketConf::default(bucket_name); @@ -1050,58 +1115,75 @@ mod tests { bucket.prefix = StringNonEmpty::opt("prefix2".to_string()); let mut config = Config::default(bucket); config.force_path_style = false; - let dispatcher = MockRequestDispatcher::with_status(200).with_request_checker( - move |req: &SignedRequest| { - assert_eq!(req.region.name(), "ap-southeast-1"); - assert_eq!(req.hostname(), "bucket2.s3.ap-southeast-1.amazonaws.com"); - assert_eq!(req.path(), "/prefix2/key2"); - // PutObject is translated to HTTP PUT. - assert_eq!(req.payload.is_some(), req.method() == "PUT"); - }, - ); - let credentials_provider = - StaticProvider::new_minimal("abc".to_string(), "xyz".to_string()); - let s = S3Storage::new_creds_dispatcher(config, dispatcher, credentials_provider).unwrap(); - block_on_external_io(s.put( + + let client = StaticReplayClient::new(vec![ReplayEvent::new( + http::Request::builder() + .method("PUT") + .uri(Uri::from_static( + "https://bucket2.s3.ap-southeast-1.amazonaws.com/prefix2/key2?x-id=PutObject", + )) + .body(SdkBody::from("abcd")) + .unwrap(), + http::Response::builder() + .status(200) + .body(SdkBody::from("")) + .unwrap(), + )]); + + let creds = Credentials::from_keys("abc".to_string(), "xyz".to_string(), None); + + let s = S3Storage::new_with_creds_client(config.clone(), client.clone(), creds).unwrap(); + s.put( "key2", PutResource(Box::new(magic_contents.as_bytes())), magic_contents.len() as u64, - )) + ) + .await .unwrap(); + + client.assert_requests_match(&[]); } - #[test] + #[tokio::test] #[cfg(FALSE)] // FIXME: enable this (or move this to an integration test) if we've got a - // reliable way to test s3 (rusoto_mock requires custom logic to verify the + // reliable way to test s3 (aws test_util requires custom logic to verify the // body stream which itself can have bug) - fn test_real_s3_storage() { + async fn test_real_s3_storage() { use tikv_util::time::Limiter; let bucket = BucketConf { - endpoint: "http://127.0.0.1:9000".to_owned(), - bucket: "bucket".to_owned(), - prefix: "prefix".to_owned(), - ..BucketConf::default() + endpoint: Some(StringNonEmpty::required("http://127.0.0.1:9000".to_owned()).unwrap()), + bucket: StringNonEmpty::required("bucket".to_owned()).unwrap(), + prefix: Some(StringNonEmpty::required("prefix".to_owned()).unwrap()), + region: None, + storage_class: None, }; let s3 = Config { - access_key: "93QZ01QRBYQQXC37XHZV".to_owned(), - secret_access_key: "N2VcI4Emg0Nm7fDzGBMJvguHHUxLGpjfwt2y4+vJ".to_owned(), + access_key_pair: Some(AccessKeyPair { + access_key: StringNonEmpty::required("93QZ01QRBYQQXC37XHZV".to_owned()).unwrap(), + secret_access_key: StringNonEmpty::required( + "N2VcI4Emg0Nm7fDzGBMJvguHHUxLGpjfwt2y4+vJ".to_owned(), + ) + .unwrap(), + session_token: None, + }), force_path_style: true, - ..Config::default() + ..Config::default(bucket) }; let limiter = Limiter::new(f64::INFINITY); - let storage = S3Storage::new(&s3).unwrap(); + let storage = S3Storage::new(s3).unwrap(); const LEN: usize = 1024 * 1024 * 4; static CONTENT: [u8; LEN] = [50_u8; LEN]; storage - .write( + .put( "huge_file", - Box::new(limiter.limit(&CONTENT[..])), + PutResource(Box::new(limiter.limit(&CONTENT[..]))), LEN as u64, ) + .await .unwrap(); let mut reader = storage.get("huge_file"); diff --git a/components/cloud/aws/src/util.rs b/components/cloud/aws/src/util.rs index 6ee27bb0c42..3abbfa28671 100644 --- a/components/cloud/aws/src/util.rs +++ b/components/cloud/aws/src/util.rs @@ -1,27 +1,35 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use std::{error::Error as StdError, io}; -use std::io::{self, Error, ErrorKind}; - -use async_trait::async_trait; -use cloud::metrics; -use futures::{future::TryFutureExt, Future}; -use rusoto_core::{ - region::Region, - request::{HttpClient, HttpConfig}, -}; -use rusoto_credential::{ - AutoRefreshingProvider, AwsCredentials, ChainProvider, CredentialsError, ProvideAwsCredentials, +use ::aws_smithy_runtime_api::client::orchestrator::HttpResponse; +use aws_config::{ + default_provider::credentials::DefaultCredentialsChain, + environment::EnvironmentVariableRegionProvider, + meta::region::{self, ProvideRegion, RegionProviderChain}, + profile::ProfileFileRegionProvider, + provider_config::ProviderConfig, + ConfigLoader, Region, }; -use rusoto_sts::WebIdentityProvider; +use aws_credential_types::provider::{error::CredentialsError, ProvideCredentials}; +use aws_sdk_kms::config::SharedHttpClient; +use aws_sdk_s3::config::HttpClient; +use aws_smithy_runtime::client::http::hyper_014::HyperClientBuilder; +use cloud::metrics; +use futures::{Future, TryFutureExt}; +use hyper::Client; +use hyper_tls::HttpsConnector; use tikv_util::{ - stream::{retry_ext, RetryError, RetryExt}, + stream::{block_on_external_io, retry_ext, RetryError, RetryExt}, warn, }; -#[allow(dead_code)] // This will be used soon, please remove the allow. const READ_BUF_SIZE: usize = 1024 * 1024 * 2; -const AWS_WEB_IDENTITY_TOKEN_FILE: &str = "AWS_WEB_IDENTITY_TOKEN_FILE"; +const DEFAULT_REGION: &str = "us-east-1"; + +pub(crate) type SdkError = + ::aws_smithy_runtime_api::client::result::SdkError; + struct CredentialsErrorWrapper(CredentialsError); impl From for CredentialsError { @@ -32,7 +40,7 @@ impl From for CredentialsError { impl std::fmt::Display for CredentialsErrorWrapper { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0.message)?; + write!(f, "{:?}", self.0)?; Ok(()) } } @@ -43,37 +51,78 @@ impl RetryError for CredentialsErrorWrapper { } } -pub fn new_http_client() -> io::Result { - let mut http_config = HttpConfig::new(); - // This can greatly improve performance dealing with payloads greater - // than 100MB. See https://github.com/rusoto/rusoto/pull/1227 - // for more information. - http_config.read_buf_size(READ_BUF_SIZE); - // It is important to explicitly create the client and not use a global - // See https://github.com/tikv/tikv/issues/7236. - HttpClient::new_with_config(http_config).map_err(|e| { - Error::new( - ErrorKind::Other, - format!("create aws http client error: {}", e), - ) - }) +pub fn new_http_client() -> SharedHttpClient { + let mut hyper_builder = Client::builder(); + hyper_builder.http1_read_buf_exact_size(READ_BUF_SIZE); + + HyperClientBuilder::new() + .hyper_builder(hyper_builder) + .build(HttpsConnector::new()) +} + +pub fn new_credentials_provider(http: impl HttpClient + 'static) -> DefaultCredentialsProvider { + let fut = DefaultCredentialsProvider::new(http); + if let Ok(hnd) = tokio::runtime::Handle::try_current() { + tokio::task::block_in_place(move || hnd.block_on(fut)) + } else { + block_on_external_io(fut) + } +} + +pub fn is_retryable(error: &SdkError) -> bool { + match error { + SdkError::TimeoutError(_) => true, + SdkError::DispatchFailure(_) => true, + SdkError::ResponseError(resp_err) => { + let code = resp_err.raw().status(); + code.is_server_error() || code.as_u16() == http::StatusCode::REQUEST_TIMEOUT.as_u16() + } + _ => false, + } } -pub fn get_region(region: &str, endpoint: &str) -> io::Result { +pub fn configure_endpoint(loader: ConfigLoader, endpoint: &str) -> ConfigLoader { if !endpoint.is_empty() { - Ok(Region::Custom { - name: region.to_owned(), - endpoint: endpoint.to_owned(), - }) - } else if !region.is_empty() { - region.parse::().map_err(|e| { - Error::new( - ErrorKind::InvalidInput, - format!("invalid aws region format {}: {}", region, e), - ) - }) + loader.endpoint_url(endpoint) } else { - Ok(Region::default()) + loader + } +} + +pub fn configure_region( + loader: ConfigLoader, + region: &str, + custom: bool, +) -> io::Result { + if !region.is_empty() { + validate_region(region, custom)?; + Ok(loader.region(Region::new(region.to_owned()))) + } else { + Ok(loader.region(DefaultRegionProvider::new())) + } +} + +fn validate_region(region: &str, custom: bool) -> io::Result<()> { + if custom { + return Ok(()); + } + let v: &str = ®ion.to_lowercase(); + + match v { + "ap-east-1" | "apeast1" | "ap-northeast-1" | "apnortheast1" | "ap-northeast-2" + | "apnortheast2" | "ap-northeast-3" | "apnortheast3" | "ap-south-1" | "apsouth1" + | "ap-southeast-1" | "apsoutheast1" | "ap-southeast-2" | "apsoutheast2" + | "ca-central-1" | "cacentral1" | "eu-central-1" | "eucentral1" | "eu-west-1" + | "euwest1" | "eu-west-2" | "euwest2" | "eu-west-3" | "euwest3" | "eu-north-1" + | "eunorth1" | "eu-south-1" | "eusouth1" | "me-south-1" | "mesouth1" | "us-east-1" + | "useast1" | "sa-east-1" | "saeast1" | "us-east-2" | "useast2" | "us-west-1" + | "uswest1" | "us-west-2" | "uswest2" | "us-gov-east-1" | "usgoveast1" + | "us-gov-west-1" | "usgovwest1" | "cn-north-1" | "cnnorth1" | "cn-northwest-1" + | "cnnorthwest1" | "af-south-1" | "afsouth1" => Ok(()), + _ => Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("invalid aws region format {}", region), + )), } } @@ -93,98 +142,89 @@ where ).await } -pub struct CredentialsProvider(AutoRefreshingProvider); +#[derive(Debug)] +struct DefaultRegionProvider(RegionProviderChain); -impl CredentialsProvider { - pub fn new() -> io::Result { - Ok(CredentialsProvider( - AutoRefreshingProvider::new(DefaultCredentialsProvider::default()).map_err(|e| { - Error::new( - ErrorKind::Other, - format!("create aws credentials provider error: {}", e), - ) - })?, - )) +impl DefaultRegionProvider { + fn new() -> Self { + let env_provider = EnvironmentVariableRegionProvider::new(); + let profile_provider = ProfileFileRegionProvider::builder().build(); + + // same as default region resolving in rusoto + let chain = RegionProviderChain::first_try(env_provider) + .or_else(profile_provider) + .or_else(Region::new(DEFAULT_REGION)); + + Self(chain) } } -#[async_trait] -impl ProvideAwsCredentials for CredentialsProvider { - async fn credentials(&self) -> Result { - self.0.credentials().await +impl ProvideRegion for DefaultRegionProvider { + fn region(&self) -> region::future::ProvideRegion<'_> { + ProvideRegion::region(&self.0) } } -// Same as rusoto_credentials::DefaultCredentialsProvider with extra -// rusoto_sts::WebIdentityProvider support. +#[derive(Debug)] pub struct DefaultCredentialsProvider { - // Underlying implementation of rusoto_credentials::DefaultCredentialsProvider. - default_provider: ChainProvider, - // Provider IAM support in Kubernetes. - web_identity_provider: WebIdentityProvider, + default_provider: DefaultCredentialsChain, } -impl Default for DefaultCredentialsProvider { - fn default() -> DefaultCredentialsProvider { - DefaultCredentialsProvider { - default_provider: ChainProvider::new(), - web_identity_provider: WebIdentityProvider::from_k8s_env(), - } +impl DefaultCredentialsProvider { + async fn new(cli: impl HttpClient + 'static) -> Self { + let cfg = ProviderConfig::default().with_http_client(cli); + let default_provider = DefaultCredentialsChain::builder() + .configure(cfg) + .build() + .await; + Self { default_provider } } } -#[async_trait] -impl ProvideAwsCredentials for DefaultCredentialsProvider { - async fn credentials(&self) -> Result { - // use web identity provider first for the kubernetes environment. - let cred = if std::env::var(AWS_WEB_IDENTITY_TOKEN_FILE).is_ok() { - // we need invoke assume_role in web identity provider - // this API may failed sometimes. - // according to AWS experience, it's better to retry it with 10 times - // exponential backoff for every error, because we cannot +impl ProvideCredentials for DefaultCredentialsProvider { + fn provide_credentials<'a>( + &'a self, + ) -> aws_credential_types::provider::future::ProvideCredentials<'a> + where + Self: 'a, + { + aws_credential_types::provider::future::ProvideCredentials::new(async move { + // Add exponential backoff for every error, because we cannot // distinguish the error type. - retry_and_count( + let cred = retry_and_count( || { #[cfg(test)] fail::fail_point!("cred_err", |_| { + let cause: Box = + String::from("injected error").into(); Box::pin(futures::future::err(CredentialsErrorWrapper( - CredentialsError::new("injected error"), + CredentialsError::provider_error(cause), ))) as std::pin::Pin + Send>> }); - let res = self - .web_identity_provider - .credentials() - .map_err(|e| CredentialsErrorWrapper(e)); - #[cfg(test)] - return Box::pin(res); - #[cfg(not(test))] - res - }, - "get_cred_over_the_cloud", - ) - .await - .map_err(|e| e.0) - } else { - // Add exponential backoff for every error, because we cannot - // distinguish the error type. - retry_and_count( - || { - self.default_provider - .credentials() - .map_err(|e| CredentialsErrorWrapper(e)) + + Box::pin( + self.default_provider + .provide_credentials() + .map_err(|e| CredentialsErrorWrapper(e)), + ) }, "get_cred_on_premise", ) .await - .map_err(|e| e.0) - }; - - cred.map_err(|e| { - CredentialsError::new(format_args!( - "Couldn't find AWS credentials in sources ({}).", - e.message - )) + .map_err(|e| e.0); + + cred.map_err(|e| { + let msg = e + .source() + .map(|src_err| src_err.to_string()) + .unwrap_or_else(|| e.to_string()); + let cause: Box = + format_args!("Couldn't find AWS credentials in sources ({}).", msg) + .to_string() + .into(); + CredentialsError::provider_error(cause) + }) }) } } @@ -197,20 +237,30 @@ mod tests { #[cfg(feature = "failpoints")] #[tokio::test] async fn test_default_provider() { - let default_provider = DefaultCredentialsProvider::default(); + const AWS_WEB_IDENTITY_TOKEN_FILE: &str = "AWS_WEB_IDENTITY_TOKEN_FILE"; + + let default_provider = DefaultCredentialsProvider::new(new_http_client()).await; std::env::set_var(AWS_WEB_IDENTITY_TOKEN_FILE, "tmp"); // mock k8s env with web_identitiy_provider fail::cfg("cred_err", "return").unwrap(); fail::cfg("retry_count", "return(1)").unwrap(); - let res = default_provider.credentials().await; + let res = default_provider.provide_credentials().await; assert_eq!(res.is_err(), true); - assert_eq!( - res.err().unwrap().message, - "Couldn't find AWS credentials in sources (injected error)." - ); + + let err = res.unwrap_err(); + + match err { + CredentialsError::ProviderError(_) => { + assert_eq!( + err.source().unwrap().to_string(), + "Couldn't find AWS credentials in sources (injected error)." + ) + } + err => panic!("unexpected error type: {}", err), + } + fail::remove("cred_err"); fail::remove("retry_count"); - std::env::remove_var(AWS_WEB_IDENTITY_TOKEN_FILE); } } diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 8e6e7fb58ed..e1ab04cc732 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -48,7 +48,6 @@ prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = "2" rand = "0.8" -rusoto_core = "0.46.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" slog = { workspace = true } diff --git a/components/tikv_util/src/stream.rs b/components/tikv_util/src/stream.rs index 2dd50e332a1..4f16a75ef57 100644 --- a/components/tikv_util/src/stream.rs +++ b/components/tikv_util/src/stream.rs @@ -13,8 +13,6 @@ use std::{ use bytes::Bytes; use futures::stream::{self, Stream}; use futures_util::io::AsyncRead; -use http::status::StatusCode; -use rusoto_core::{request::HttpDispatchError, RusotoError}; use tokio::runtime::Builder; const MAX_RETRY_DELAY: Duration = Duration::from_secs(32); @@ -268,32 +266,11 @@ where } } -pub fn http_retriable(status: StatusCode) -> bool { - status.is_server_error() || status == StatusCode::REQUEST_TIMEOUT -} - -impl RetryError for RusotoError { - fn is_retryable(&self) -> bool { - match self { - Self::HttpDispatch(e) => e.is_retryable(), - Self::Unknown(resp) if http_retriable(resp.status) => true, - _ => false, - } - } -} - -impl RetryError for HttpDispatchError { - fn is_retryable(&self) -> bool { - true - } -} - #[cfg(test)] mod tests { use std::{cell::RefCell, pin::Pin}; use futures::{Future, FutureExt}; - use rusoto_core::HttpDispatchError; use super::RetryError; use crate::stream::retry; @@ -312,7 +289,7 @@ mod tests { #[test] fn test_retry_is_send_even_return_type_not_sync() { struct BangSync(Option>); - let fut = retry(|| futures::future::ok::<_, HttpDispatchError>(BangSync(None))); + let fut = retry(|| futures::future::ok::<_, TriviallyRetry>(BangSync(None))); assert_send(fut) } diff --git a/deny.toml b/deny.toml index c49499ce8f5..794ab97a21c 100644 --- a/deny.toml +++ b/deny.toml @@ -8,11 +8,11 @@ deny = [ # We allow md5 for AWS S3 object lock feature which requires # computting object's md5. { name = "md5", wrappers = ["aws"] }, - { name = "md-5" }, - { name = "sha1" }, + { name = "md-5", wrappers = ["aws-smithy-checksums"]}, + { name = "sha1", wrappers = ["aws-smithy-checksums"]}, { name = "sha-1" }, - # We allow sha2 for oauth2 crate, because it does use sha2 in TiKV use case. - { name = "sha2", wrappers = ["oauth2"] }, + # We allow sha2 for oauth2 and aws rust sdk crate, because it does use sha2 in TiKV use case. + { name = "sha2", wrappers = ["oauth2", "aws-sigv4", "aws-smithy-checksums", "aws-sdk-s3"] }, { name = "sha3" }, # Symmetric encryption { name = "aes" }, @@ -27,14 +27,14 @@ deny = [ { name = "ecdsa" }, { name = "ed25519" }, # Message authentication codes - { name = "hmac" }, + { name = "hmac", wrappers = ["aws-sigv4", "aws-sdk-s3"]}, # We prefer the system native TLS or OpenSSL. { name = "rustls" }, { name = "ring" }, # Ban trait crates from RustCrypto. { name = "aead" }, { name = "cipher" }, - { name = "digest", wrappers = ["sha2"] }, + { name = "digest", wrappers = ["sha2", "md-5", "sha1", "hmac"] }, { name = "password-hash" }, { name = "signature" }, ] From d01bd7483aab212a6de36b2c4a2fe60930a1aa00 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:53:00 +0800 Subject: [PATCH 07/86] In-memory engine: use stop-load-threshold for loading new regions (#17747) ref tikv/tikv#16141 use stop-load-threshold for loading new regions Signed-off-by: SpadeA-Tang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/in_memory_engine/src/background.rs | 2 +- components/in_memory_engine/src/memory_controller.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/components/in_memory_engine/src/background.rs b/components/in_memory_engine/src/background.rs index 23b593b60d0..12d6ffa5e31 100644 --- a/components/in_memory_engine/src/background.rs +++ b/components/in_memory_engine/src/background.rs @@ -760,7 +760,7 @@ impl BackgroundRunnerCore { if !self.memory_controller.reached_stop_load_threshold() { let expected_new_count = self .memory_controller - .evict_threshold() + .stop_load_threshold() .saturating_sub(self.memory_controller.mem_usage()) / region_stats_manager.expected_region_size(); let expected_new_count = usize::max(expected_new_count, 1); diff --git a/components/in_memory_engine/src/memory_controller.rs b/components/in_memory_engine/src/memory_controller.rs index b3a95c6f109..da6ffe64071 100644 --- a/components/in_memory_engine/src/memory_controller.rs +++ b/components/in_memory_engine/src/memory_controller.rs @@ -90,6 +90,11 @@ impl MemoryController { self.mem_usage() >= self.config.value().stop_load_threshold() } + #[inline] + pub(crate) fn stop_load_threshold(&self) -> usize { + self.config.value().stop_load_threshold() + } + #[inline] pub(crate) fn evict_threshold(&self) -> usize { self.config.value().evict_threshold() From 21226f2fa73b6d8e9db8a3d8e4fe0b0b5f19a88f Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Sun, 3 Nov 2024 19:13:01 -0800 Subject: [PATCH 08/86] RocksDB: Deprecate write_global_seq (#17757) close tikv/tikv#17711 Deprecate write_global_seq, since it is by default false. Signed-off-by: Yang Zhang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_panic/src/import.rs | 8 -------- components/engine_rocks/src/import.rs | 9 --------- components/engine_traits/src/import.rs | 4 ---- tests/failpoints/cases/test_import_service.rs | 20 ++----------------- 4 files changed, 2 insertions(+), 39 deletions(-) diff --git a/components/engine_panic/src/import.rs b/components/engine_panic/src/import.rs index 515523d203c..a30eeb3e005 100644 --- a/components/engine_panic/src/import.rs +++ b/components/engine_panic/src/import.rs @@ -24,12 +24,4 @@ impl IngestExternalFileOptions for PanicIngestExternalFileOptions { fn move_files(&mut self, f: bool) { panic!() } - - fn get_write_global_seqno(&self) -> bool { - panic!() - } - - fn set_write_global_seqno(&mut self, f: bool) { - panic!() - } } diff --git a/components/engine_rocks/src/import.rs b/components/engine_rocks/src/import.rs index a651d9e7394..930772d8c09 100644 --- a/components/engine_rocks/src/import.rs +++ b/components/engine_rocks/src/import.rs @@ -15,7 +15,6 @@ impl ImportExt for RocksEngine { let cf = util::get_cf_handle(self.as_inner(), cf_name)?; let mut opts = RocksIngestExternalFileOptions::new(); opts.move_files(true); - opts.set_write_global_seqno(false); // Note: no need reset the global seqno to 0 for compatibility as #16992 // enable the TiKV to handle the case on applying abnormal snapshot. let now = Instant::now_coarse(); @@ -54,14 +53,6 @@ impl IngestExternalFileOptions for RocksIngestExternalFileOptions { fn move_files(&mut self, f: bool) { self.0.move_files(f); } - - fn get_write_global_seqno(&self) -> bool { - self.0.get_write_global_seqno() - } - - fn set_write_global_seqno(&mut self, f: bool) { - self.0.set_write_global_seqno(f); - } } #[cfg(test)] diff --git a/components/engine_traits/src/import.rs b/components/engine_traits/src/import.rs index f042eb5fed0..06a913932b0 100644 --- a/components/engine_traits/src/import.rs +++ b/components/engine_traits/src/import.rs @@ -12,8 +12,4 @@ pub trait IngestExternalFileOptions { fn new() -> Self; fn move_files(&mut self, f: bool); - - fn get_write_global_seqno(&self) -> bool; - - fn set_write_global_seqno(&mut self, f: bool); } diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 57504d2c722..9eaa876b09f 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -5,7 +5,6 @@ use std::{ time::Duration, }; -use file_system::calc_crc32; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{disk_usage::DiskUsage, import_sstpb::*, tikvpb_grpc::TikvClient}; @@ -107,7 +106,7 @@ fn test_download_to_full_disk() { #[test] fn test_ingest_reentrant() { - let (cluster, ctx, _tikv, import) = new_cluster_and_tikv_import_client(); + let (_cluster, ctx, _tikv, import) = new_cluster_and_tikv_import_client(); let temp_dir = Builder::new() .prefix("test_ingest_reentrant") @@ -124,25 +123,9 @@ fn test_ingest_reentrant() { // Don't delete ingested sst file or we cannot find sst file in next ingest. fail::cfg("dont_delete_ingested_sst", "1*return").unwrap(); - let node_id = *cluster.sim.rl().get_node_ids().iter().next().unwrap(); - // Use sst save path to track the sst file checksum. - let save_path = cluster - .sim - .rl() - .importers - .get(&node_id) - .unwrap() - .get_path(&meta); - - let checksum1 = calc_crc32(save_path.clone()).unwrap(); // Do ingest and it will ingest success. must_ingest_sst(&import, ctx.clone(), meta.clone()); - let checksum2 = calc_crc32(save_path).unwrap(); - // TODO: Remove this once write_global_seqno is deprecated. - // Checksums are the same since the global seqno in the SST file no longer gets - // updated with the default setting, which is write_global_seqno=false. - assert_eq!(checksum1, checksum2); // Do ingest again and it can be reentrant must_ingest_sst(&import, ctx.clone(), meta); } @@ -486,6 +469,7 @@ fn test_flushed_applied_index_after_ingset() { // file a write to trigger ready flush, even if the write is not flushed. must_raw_put(&client, ctx, b"key1".to_vec(), b"value1".to_vec()); + std::thread::sleep(std::time::Duration::from_millis(50)); let count = sst_file_count(&cluster.paths); assert_eq!(0, count); From 887ef12afbc14376c64aabc1f088c34b4a655dbe Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 4 Nov 2024 16:01:19 +0800 Subject: [PATCH 09/86] In-memory Engine: remove dead code and fix typos (#17763) ref tikv/tikv#16141 Signed-off-by: Neil Shen --- .../engine_traits/src/region_cache_engine.rs | 7 ++----- components/in_memory_engine/src/background.rs | 12 ++---------- components/in_memory_engine/src/engine.rs | 19 +++---------------- .../in_memory_engine/src/write_batch.rs | 6 +++--- 4 files changed, 10 insertions(+), 34 deletions(-) diff --git a/components/engine_traits/src/region_cache_engine.rs b/components/engine_traits/src/region_cache_engine.rs index 757629d3fdd..feeaad5ca9b 100644 --- a/components/engine_traits/src/region_cache_engine.rs +++ b/components/engine_traits/src/region_cache_engine.rs @@ -8,7 +8,7 @@ use std::{ use keys::{enc_end_key, enc_start_key}; use kvproto::metapb::Region; -use crate::{Iterable, KvEngine, Snapshot, WriteBatchExt}; +use crate::{KvEngine, Snapshot, WriteBatchExt}; #[derive(Debug, PartialEq)] pub enum FailedReason { @@ -63,7 +63,7 @@ pub enum EvictReason { /// RegionCacheEngine works as a region cache caching some regions (in Memory or /// NVME for instance) to improve the read performance. pub trait RegionCacheEngine: - RegionCacheEngineExt + WriteBatchExt + Iterable + Debug + Clone + Unpin + Send + Sync + 'static + RegionCacheEngineExt + WriteBatchExt + Debug + Clone + Unpin + Send + Sync + 'static { type Snapshot: Snapshot; @@ -81,9 +81,6 @@ pub trait RegionCacheEngine: type DiskEngine: KvEngine; fn set_disk_engine(&mut self, disk_engine: Self::DiskEngine); - // return the region containing the key - fn get_region_for_key(&self, key: &[u8]) -> Option; - type RangeHintService: RangeHintService; fn start_hint_service(&self, range_hint_service: Self::RangeHintService); diff --git a/components/in_memory_engine/src/background.rs b/components/in_memory_engine/src/background.rs index 12d6ffa5e31..7705ff1a014 100644 --- a/components/in_memory_engine/src/background.rs +++ b/components/in_memory_engine/src/background.rs @@ -780,16 +780,8 @@ impl BackgroundRunnerCore { // Flush epoch and pin enough times to make the delayed operations be executed #[cfg(test)] pub(crate) fn flush_epoch() { - { - let guard = &epoch::pin(); - guard.flush(); - } - // Local epoch tries to advance the global epoch every 128 pins. When global - // epoch advances, the operations(here, means delete) in the older epoch can be - // executed. - for _ in 0..128 { - let _ = &epoch::pin(); - } + let guard = &epoch::pin(); + guard.flush(); } pub struct BackgroundRunner { diff --git a/components/in_memory_engine/src/engine.rs b/components/in_memory_engine/src/engine.rs index 24c5cca5eeb..872feec2706 100644 --- a/components/in_memory_engine/src/engine.rs +++ b/components/in_memory_engine/src/engine.rs @@ -14,8 +14,8 @@ use crossbeam_skiplist::{ }; use engine_rocks::RocksEngine; use engine_traits::{ - CacheRegion, EvictReason, FailedReason, IterOptions, Iterable, KvEngine, RegionCacheEngine, - RegionCacheEngineExt, RegionEvent, Result, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, + CacheRegion, EvictReason, FailedReason, KvEngine, RegionCacheEngine, RegionCacheEngineExt, + RegionEvent, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, }; use fail::fail_point; use kvproto::metapb::Region; @@ -30,7 +30,7 @@ use crate::{ encode_key_for_boundary_with_mvcc, encode_key_for_boundary_without_mvcc, InternalBytes, }, memory_controller::MemoryController, - read::{RegionCacheIterator, RegionCacheSnapshot}, + read::RegionCacheSnapshot, region_manager::{ AsyncFnOnce, LoadFailedReason, RegionCacheStatus, RegionManager, RegionState, }, @@ -537,10 +537,6 @@ impl RegionCacheEngine for RegionCacheMemoryEngine { .start_bg_hint_service(range_hint_service) } - fn get_region_for_key(&self, key: &[u8]) -> Option { - self.core.region_manager().get_region_for_key(key) - } - fn enabled(&self) -> bool { self.config.value().enable } @@ -625,15 +621,6 @@ impl RegionCacheEngineExt for RegionCacheMemoryEngine { } } -impl Iterable for RegionCacheMemoryEngine { - type Iterator = RegionCacheIterator; - - fn iterator_opt(&self, _: &str, _: IterOptions) -> Result { - // This engine does not support creating iterators directly by the engine. - panic!("iterator_opt is not supported on creating by RegionCacheMemoryEngine directly") - } -} - #[cfg(test)] pub mod tests { use std::{sync::Arc, time::Duration}; diff --git a/components/in_memory_engine/src/write_batch.rs b/components/in_memory_engine/src/write_batch.rs index fd92d4a5b8a..220d9a0243c 100644 --- a/components/in_memory_engine/src/write_batch.rs +++ b/components/in_memory_engine/src/write_batch.rs @@ -42,7 +42,7 @@ pub(crate) const MEM_CONTROLLER_OVERHEAD: usize = 8; // default, the memtable size for lock cf is 32MB. As not all ranges will be // cached in the memory, just use half of it here. const AMOUNT_TO_CLEAN_TOMBSTONE: u64 = ReadableSize::mb(16).0; -// The value of the delete entry in the in-memory engine. It's just a emptry +// The value of the delete entry in the in-memory engine. It's just a empty // slice. const DELETE_ENTRY_VAL: &[u8] = b""; @@ -73,8 +73,8 @@ pub struct RegionCacheWriteBatch { // ... -> PollHandler::end), although the same region can call `prepare_for_region` // multiple times, it can only call sequentially. This is say, we will not have this: // prepare_for_region(region1), prepare_for_region(region2), prepare_for_region(region1). - // In case to avoid this asssumption being broken, we record the regions that have called - // prepare_for_region and ensure that if the region is not the `currnet_region`, it is not + // In case to avoid this assumption being broken, we record the regions that have called + // prepare_for_region and ensure that if the region is not the `current_region`, it is not // recorded in this vec. prepared_regions: SmallVec<[u64; 10]>, } From 7f31c92bfd598f435e684b8448666f546a9d2118 Mon Sep 17 00:00:00 2001 From: ekexium Date: Tue, 5 Nov 2024 09:23:54 +0800 Subject: [PATCH 10/86] resolved-ts: use min_lock_ts-1 as the candidate of resolved-ts (#17730) close tikv/tikv#17728 Use min_lock_ts-1 as the candidate of resolved-ts, to ensure resolved_ts < lock.min_commit_ts( <= commit_ts). Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: you06 --- components/resolved_ts/src/resolver.rs | 47 ++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index f0bc30e3d05..a90df0f5a1d 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -568,8 +568,17 @@ impl Resolver { self.read_progress.as_ref() } - // Return the transaction with the smallest min_commit_ts. When min_commit_ts - // is unknown, use start_ts instead. + // Returns the minimum possible (commit_ts - 1), based on the knowledge we have, + // i.e. from all locks currently being tracked. + // The function is to provide an upper bound of resolved-ts. By definition + // resolved-ts must be strictly smaller than a future commit_ts. + // + // "Oldest" doesn't mean it started first, but means it may have the smallest + // commit_ts, which is the returned ts + 1. + // + // **NOTE**: + // For normal txns, we return start_ts. + // For large txns, we return its min_commit_ts-1. pub(crate) fn oldest_transaction(&self) -> Option<(TimeStamp, TxnLocks)> { let oldest_normal_txn = self .lock_ts_heap @@ -582,7 +591,7 @@ impl Resolver { .iter() .filter_map(|(start_ts, txn_locks)| { self.lookup_min_commit_ts(*start_ts) - .map(|ts| (ts, txn_locks.clone())) + .map(|ts| (ts.prev(), txn_locks.clone())) }) .min_by_key(|(ts, _)| *ts); @@ -896,7 +905,7 @@ mod tests { assert_eq!(resolver.large_txn_key_representative.len(), 2); assert_eq!(resolver.resolved_ts(), TimeStamp::zero()); - assert_eq!(resolver.resolve(20.into(), None, TsSource::PdTso), 1.into()); + assert_eq!(resolver.resolve(20.into(), None, TsSource::PdTso), 0.into()); txn_status_cache.upsert( 1.into(), @@ -913,9 +922,35 @@ mod tests { SystemTime::now(), ); - assert_eq!(resolver.resolve(20.into(), None, TsSource::PdTso), 5.into()); + assert_eq!(resolver.resolve(20.into(), None, TsSource::PdTso), 4.into()); let oldest_txn = resolver.oldest_transaction().unwrap(); - assert_eq!(oldest_txn.0, 5.into()); + assert_eq!(oldest_txn.0, 4.into()); assert_eq!(oldest_txn.1.lock_count, 2); } + + #[test] + fn test_resolved_ts_always_greater_than_following_commit_ts() { + // A later commit_ts must be strictly larger than resolved-ts. Equality is not + // allowed. The case may not happen in real implementation, but we want + // to ensure the correctness and robustness of every submodule. + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let txn_status_cache = Arc::new(TxnStatusCache::new(100)); + let mut resolver = Resolver::new(1, memory_quota, txn_status_cache.clone()); + let key: Vec = vec![1, 2, 3, 4]; + + resolver.track_lock(1.into(), key.clone(), None, 1).unwrap(); + // PD TSO = 9. A read request with ts 9 reads, and pushes the min_commit_ts to + // 10, which is greater than current PD TS. + txn_status_cache.upsert( + 1.into(), + TxnState::Ongoing { + min_commit_ts: 10.into(), + }, + SystemTime::now(), + ); + // We assert the resolved-ts cannot be 10. Because a later commit ts could be + // 10. + assert_eq!(resolver.resolve(10.into(), None, TsSource::PdTso), 9.into()); + // Now the txn can commit, with the smallest possible commit_ts = 10. + } } From 601f34d008df94f8f54e882971f3b08edf5599c8 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 5 Nov 2024 11:25:02 +0800 Subject: [PATCH 11/86] in_memory_engine: adjust the default config (#17751) ref tikv/tikv#16141, close tikv/tikv#17762 Let in_memory_engine's config`evict-threshold` and `stop-load-threshold` default value generated from `capacity`. Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/observer/load_eviction.rs | 10 +- components/in_memory_engine/src/background.rs | 3 + components/in_memory_engine/src/config.rs | 111 ++++++++++++++---- .../in_memory_engine/src/write_batch.rs | 1 - components/tikv_util/src/config.rs | 7 ++ metrics/grafana/tikv_details.dashboard.py | 4 +- metrics/grafana/tikv_details.json | 8 +- metrics/grafana/tikv_details.json.sha256 | 2 +- src/config/mod.rs | 3 +- 9 files changed, 113 insertions(+), 36 deletions(-) diff --git a/components/hybrid_engine/src/observer/load_eviction.rs b/components/hybrid_engine/src/observer/load_eviction.rs index 5acfebe1650..0058373b3c8 100644 --- a/components/hybrid_engine/src/observer/load_eviction.rs +++ b/components/hybrid_engine/src/observer/load_eviction.rs @@ -15,7 +15,7 @@ use raftstore::coprocessor::{ BoxQueryObserver, BoxRoleObserver, Cmd, Coprocessor, CoprocessorHost, DestroyPeerObserver, ExtraMessageObserver, ObserverContext, QueryObserver, RegionState, RoleObserver, }; -use tikv_util::info; +use tikv_util::debug; #[derive(Clone)] pub struct LoadEvictionObserver { @@ -82,7 +82,7 @@ impl LoadEvictionObserver { )) { let cache_region = CacheRegion::from_region(ctx.region()); - info!( + debug!( "ime evict range due to apply commands"; "region" => ?cache_region, "is_ingest_sst" => apply.pending_handle_ssts.is_some(), @@ -94,7 +94,7 @@ impl LoadEvictionObserver { if !state.new_regions.is_empty() { let cmd_type = cmd.request.get_admin_request().get_cmd_type(); assert!(cmd_type == AdminCmdType::BatchSplit || cmd_type == AdminCmdType::Split); - info!( + debug!( "ime handle region split"; "region_id" => ctx.region().get_id(), "admin_command" => ?cmd.request.get_admin_request().get_cmd_type(), @@ -246,7 +246,7 @@ impl RoleObserver for LoadEvictionObserver { if let StateRole::Leader = change.state { // Currently, it is only used by the manual load. let cache_region = CacheRegion::from_region(ctx.region()); - info!( + debug!( "ime try to load region due to became leader"; "region" => ?cache_region, ); @@ -255,7 +255,7 @@ impl RoleObserver for LoadEvictionObserver { && change.initialized { let cache_region = CacheRegion::from_region(ctx.region()); - info!( + debug!( "ime try to evict region due to became follower"; "region" => ?cache_region, ); diff --git a/components/in_memory_engine/src/background.rs b/components/in_memory_engine/src/background.rs index 7705ff1a014..65173d40f5f 100644 --- a/components/in_memory_engine/src/background.rs +++ b/components/in_memory_engine/src/background.rs @@ -698,6 +698,9 @@ impl BackgroundRunnerCore { } }; if !region_stats_manager.ready_for_auto_load_and_evict() { + info!( + "ime skip check load&evict because the duration from last load&evict check is too short." + ); return; } diff --git a/components/in_memory_engine/src/config.rs b/components/in_memory_engine/src/config.rs index e0a56abfab2..93cd01ba99b 100644 --- a/components/in_memory_engine/src/config.rs +++ b/components/in_memory_engine/src/config.rs @@ -15,6 +15,16 @@ const MIN_GC_RUN_INTERVAL: Duration = Duration::from_secs(10); // The maximum interval for GC run is 10 minutes which equals to the minimum // value of TiDB GC lifetime. const MAX_GC_RUN_INTERVAL: Duration = Duration::from_secs(600); +// the maximum write kv throughput(20MiB), this is an empirical value. +const MAX_WRITE_KV_SPEED: u64 = 20 * 1024 * 1024; +// The maximum duration in seconds we expect IME to release enough memory after +// memory usage reaches `evict_threshold`. This is an empirical value. +// We use this value to determine the default value of `evict_threshold` based +// on `capacity`. +const MAX_RESERVED_DURATION_FOR_WRITE: u64 = 10; +// Regions' mvcc read amplification statistics is updated every 1min, so we set +// the minimal load&evcit check duration to 2min. +const MIN_LOAD_EVICT_INTERVAL: Duration = Duration::from_secs(120); #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, OnlineConfig)] #[serde(default, rename_all = "kebab-case")] @@ -25,11 +35,15 @@ pub struct InMemoryEngineConfig { pub capacity: Option, /// When memory usage reaches this amount, we start to pick some regions to /// evict. + /// Default value: `capacity` - min(10 * MAX_WRITE_BYTES_SEC, capacity * + /// 0.1). pub evict_threshold: Option, /// When memory usage reaches this amount, we stop loading regions. // TODO(SpadeA): ultimately we only expose one memory limit to user. // When memory usage reaches this amount, no further load will be // performed. + // Default value: `evict_threshold` - min(RegionSplitSize(256MB) * 2 + MAX_WRITE_BYTES_SEC * + // MAX_EVICT_REGION_DUR_SECS,`evict_threshold` * 0.15) pub stop_load_threshold: Option, /// Determines the oldest timestamp (approximately, now - gc_run_interval) /// of the read request the in memory engine can serve. @@ -70,33 +84,42 @@ impl Default for InMemoryEngineConfig { } impl InMemoryEngineConfig { - pub fn validate(&mut self) -> Result<(), Box> { + pub fn validate(&mut self, region_split_size: ReadableSize) -> Result<(), Box> { if !self.enable { return Ok(()); } - if self.evict_threshold.is_none() || self.capacity.is_none() { + if self.capacity.is_none() { return Err("evict-threshold or capacity not set".into()); } - if self.stop_load_threshold.is_none() { - self.stop_load_threshold = self.evict_threshold; - } - - if self.stop_load_threshold.as_ref().unwrap() > self.evict_threshold.as_ref().unwrap() { + if self.evict_threshold.is_none() { + let capacity = self.capacity.unwrap().0; + let delta = std::cmp::min( + capacity / 10, + MAX_RESERVED_DURATION_FOR_WRITE * MAX_WRITE_KV_SPEED, + ); + self.evict_threshold = Some(ReadableSize(capacity - delta)); + } else if self.evict_threshold.as_ref().unwrap() >= self.capacity.as_ref().unwrap() { return Err(format!( - "stop-load-threshold {:?} is larger to evict-threshold {:?}", - self.stop_load_threshold.as_ref().unwrap(), - self.evict_threshold.as_ref().unwrap() + "evict-threshold {:?} is larger or equal to capacity {:?}", + self.evict_threshold.as_ref().unwrap(), + self.capacity.as_ref().unwrap() ) .into()); } - if self.evict_threshold.as_ref().unwrap() >= self.capacity.as_ref().unwrap() { + if self.stop_load_threshold.is_none() { + let delta = std::cmp::min( + self.capacity.unwrap().0 * 15 / 100, + region_split_size.0 * 2 + MAX_RESERVED_DURATION_FOR_WRITE * MAX_WRITE_KV_SPEED, + ); + self.stop_load_threshold = Some(ReadableSize(self.evict_threshold.unwrap().0 - delta)); + } else if self.stop_load_threshold.unwrap() > self.evict_threshold.unwrap() { return Err(format!( - "evict-threshold {:?} is larger or equal to capacity {:?}", - self.evict_threshold.as_ref().unwrap(), - self.capacity.as_ref().unwrap() + "stop-load-threshold {:?} is larger to evict-threshold {:?}", + self.stop_load_threshold.as_ref().unwrap(), + self.evict_threshold.as_ref().unwrap() ) .into()); } @@ -113,6 +136,14 @@ impl InMemoryEngineConfig { .into()); } + if self.load_evict_interval.0 < MIN_LOAD_EVICT_INTERVAL { + return Err(format!( + "load-evict-interval {:?} should be greater or equal to {:?}", + self.load_evict_interval, MIN_LOAD_EVICT_INTERVAL + ) + .into()); + } + Ok(()) } @@ -179,32 +210,68 @@ impl std::ops::Deref for InMemoryEngineConfigManager { mod tests { use super::*; + const DEFAULT_REGION_SPLIT_SIZE: ReadableSize = ReadableSize::mb(256); + #[test] fn test_validate() { let mut cfg = InMemoryEngineConfig::default(); - cfg.validate().unwrap(); + cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); cfg.enable = true; - assert!(cfg.validate().is_err()); + assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); cfg.capacity = Some(ReadableSize::gb(2)); cfg.evict_threshold = Some(ReadableSize::gb(1)); cfg.stop_load_threshold = Some(ReadableSize::gb(1)); - cfg.validate().unwrap(); + cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); // Error if less than MIN_GC_RUN_INTERVAL. cfg.gc_run_interval = ReadableDuration(Duration::ZERO); - assert!(cfg.validate().is_err()); + assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); cfg.gc_run_interval = ReadableDuration(Duration::from_secs(9)); - assert!(cfg.validate().is_err()); + assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); // Error if larger than MIN_GC_RUN_INTERVAL. cfg.gc_run_interval = ReadableDuration(Duration::from_secs(601)); - assert!(cfg.validate().is_err()); + assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); cfg.gc_run_interval = ReadableDuration(Duration::MAX); - assert!(cfg.validate().is_err()); + assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); cfg.gc_run_interval = ReadableDuration(Duration::from_secs(180)); - cfg.validate().unwrap(); + cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + + #[track_caller] + fn check_delta( + cfg: &InMemoryEngineConfig, + evict_delta: ReadableSize, + load_delta: ReadableSize, + ) { + let real_evict_delta = cfg.capacity.unwrap() - cfg.evict_threshold.unwrap(); + assert_eq!(real_evict_delta, evict_delta); + let real_load_delta = cfg.evict_threshold.unwrap() - cfg.stop_load_threshold.unwrap(); + assert_eq!(real_load_delta, load_delta); + } + + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + cfg.capacity = Some(ReadableSize::gb(1)); + cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + check_delta( + &cfg, + ReadableSize::gb(1) / 10, + ReadableSize::gb(1) * 15 / 100, + ); + + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + cfg.capacity = Some(ReadableSize::gb(5)); + cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + check_delta(&cfg, ReadableSize::mb(200), ReadableSize::mb(712)); + + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + cfg.capacity = Some(ReadableSize::gb(5)); + cfg.validate(ReadableSize::mb(96)).unwrap(); + check_delta(&cfg, ReadableSize::mb(200), ReadableSize::mb(392)); } } diff --git a/components/in_memory_engine/src/write_batch.rs b/components/in_memory_engine/src/write_batch.rs index 220d9a0243c..a66c3a3174d 100644 --- a/components/in_memory_engine/src/write_batch.rs +++ b/components/in_memory_engine/src/write_batch.rs @@ -146,7 +146,6 @@ impl RegionCacheWriteBatch { self.record_last_written_region(); let cached_region = CacheRegion::from_region(region); - // TODO: remote range. self.set_region_cache_status( self.engine .prepare_for_apply(&cached_region, region.is_in_flashback), diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 976350a8939..33cf7e4ddb6 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -129,6 +129,13 @@ impl Div for ReadableSize { } } +impl Sub for ReadableSize { + type Output = ReadableSize; + fn sub(self, rhs: ReadableSize) -> Self::Output { + ReadableSize(self.0 - rhs.0) + } +} + impl Mul for ReadableSize { type Output = ReadableSize; diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 96456ae1844..d88b01f6ec2 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -4633,7 +4633,7 @@ def InMemoryEngine() -> RowPanel: yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), targets=[ target( - expr=expr_sum_rate( + expr=expr_sum_delta( "tikv_in_memory_engine_load_duration_secs_count", by_labels=["instance"], ), @@ -4657,7 +4657,7 @@ def InMemoryEngine() -> RowPanel: yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), targets=[ target( - expr=expr_sum_rate( + expr=expr_sum_delta( "tikv_in_memory_engine_eviction_duration_secs_count", by_labels=["type"], ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index df7e9cd62f1..c18f536ec8d 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -39962,7 +39962,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_in_memory_engine_load_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(delta(\n tikv_in_memory_engine_load_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -39970,7 +39970,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_in_memory_engine_load_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(delta(\n tikv_in_memory_engine_load_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -40200,7 +40200,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_in_memory_engine_eviction_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", + "expr": "sum(delta(\n tikv_in_memory_engine_eviction_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -40208,7 +40208,7 @@ "intervalFactor": 1, "legendFormat": "{{type}} {{$additional_groupby}}", "metric": "", - "query": "sum(rate(\n tikv_in_memory_engine_eviction_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", + "query": "sum(delta(\n tikv_in_memory_engine_eviction_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index b61272c5ef6..b0964023cd3 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -36809f7e452742d5c3b62d0b9c5cf7566553289cb1d1b976e9aea8be4eaa2fc9 ./metrics/grafana/tikv_details.json +6a3c9238ffc0450c013981946f4388ced4a92759b379f0b2289b1cbdfc7a1b48 ./metrics/grafana/tikv_details.json diff --git a/src/config/mod.rs b/src/config/mod.rs index 750db4f9503..aca990dbc15 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3961,7 +3961,8 @@ impl TikvConfig { return Err("in-memory-engine is unavailable for feature TTL or API v2".into()); } self.in_memory_engine.expected_region_size = self.coprocessor.region_split_size(); - self.in_memory_engine.validate()?; + self.in_memory_engine + .validate(self.coprocessor.region_split_size())?; // Now, only support cross check in in-memory engine when compaction filter is // enabled. From 37dfba1b046ebf308db972af0a3a788b7132244c Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 5 Nov 2024 15:36:39 +0800 Subject: [PATCH 12/86] In-memory Engine: fix panic when destroy an uninitialized region (#17771) close tikv/tikv#17767 IME observes all peer destroy events to timely evict regions. By adding a new peer, the old and uninitialized peer will be destroyed and IME must not panic in this situation. Signed-off-by: Neil Shen --- .../src/observer/load_eviction.rs | 14 +++++-- .../raftstore/src/coprocessor/dispatcher.rs | 7 ---- .../failpoints/cases/test_in_memory_engine.rs | 41 ++++++++++++++++++- tests/failpoints/cases/test_stale_peer.rs | 2 - 4 files changed, 50 insertions(+), 14 deletions(-) diff --git a/components/hybrid_engine/src/observer/load_eviction.rs b/components/hybrid_engine/src/observer/load_eviction.rs index 0058373b3c8..cc2e12aacd4 100644 --- a/components/hybrid_engine/src/observer/load_eviction.rs +++ b/components/hybrid_engine/src/observer/load_eviction.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use engine_traits::{CacheRegion, EvictReason, KvEngine, RegionCacheEngineExt, RegionEvent}; use kvproto::{ - metapb::Region, + metapb::{Peer, Region}, raft_cmdpb::AdminCmdType, raft_serverpb::{ExtraMessage, ExtraMessageType, RaftApplyState}, }; @@ -15,7 +15,7 @@ use raftstore::coprocessor::{ BoxQueryObserver, BoxRoleObserver, Cmd, Coprocessor, CoprocessorHost, DestroyPeerObserver, ExtraMessageObserver, ObserverContext, QueryObserver, RegionState, RoleObserver, }; -use tikv_util::debug; +use tikv_util::{debug, warn}; #[derive(Clone)] pub struct LoadEvictionObserver { @@ -274,8 +274,16 @@ impl ExtraMessageObserver for LoadEvictionObserver { impl DestroyPeerObserver for LoadEvictionObserver { fn on_destroy_peer(&self, r: &Region) { + let mut region = r.clone(); + if region.get_peers().is_empty() { + warn!("ime evict an uninitialized region"; "region" => ?region); + // In some cases, the region may have no peer, such as an + // uninitialized peer being destroyed. We need to push an empty peer + // to prevent panic in `CacheRegion::from_region`. + region.mut_peers().push(Peer::default()); + } self.cache_engine.on_region_event(RegionEvent::Eviction { - region: CacheRegion::from_region(r), + region: CacheRegion::from_region(®ion), reason: EvictReason::PeerDestroy, }); } diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index b350d971e2d..fdcfc7ef87a 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -954,9 +954,6 @@ impl CoprocessorHost { } pub fn on_applied_current_term(&self, role: StateRole, region: &Region) { - if self.registry.cmd_observers.is_empty() { - return; - } for observer in &self.registry.cmd_observers { let observer = observer.observer.inner(); observer.on_applied_current_term(role, region); @@ -989,10 +986,6 @@ impl CoprocessorHost { } pub fn on_destroy_peer(&self, region: &Region) { - if self.registry.destroy_peer_observers.is_empty() { - return; - } - for observer in &self.registry.destroy_peer_observers { let observer = observer.observer.inner(); observer.on_destroy_peer(region); diff --git a/tests/failpoints/cases/test_in_memory_engine.rs b/tests/failpoints/cases/test_in_memory_engine.rs index aa9231497dd..13ef4c5cbfa 100644 --- a/tests/failpoints/cases/test_in_memory_engine.rs +++ b/tests/failpoints/cases/test_in_memory_engine.rs @@ -21,7 +21,9 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, RaftRequestHeader, Request}, raft_serverpb::RaftMessage, }; +use pd_client::PdClient; use protobuf::Message; +use raft::eraftpb::MessageType; use raftstore::{ coprocessor::ObserveHandle, store::{ @@ -34,8 +36,8 @@ use test_coprocessor::{ handle_request, init_data_with_details_pd_client, DagChunkSpliter, DagSelect, ProductTable, }; use test_raftstore::{ - get_tso, new_peer, new_put_cf_cmd, new_server_cluster_with_hybrid_engine, Cluster, - ServerCluster, + get_tso, new_learner_peer, new_peer, new_put_cf_cmd, new_server_cluster_with_hybrid_engine, + CloneFilterFactory, Cluster, Direction, RegionPacketFilter, ServerCluster, }; use test_util::eventually; use tidb_query_datatype::{ @@ -1022,3 +1024,38 @@ fn test_eviction_when_destroy_peer() { assert!(!region_cache_engine.region_cached(&r)); } } + +// IME must not panic when destroy an uninitialized region. +#[test] +fn test_eviction_when_destroy_uninitialized_peer() { + let mut cluster = new_server_cluster_with_hybrid_engine(0, 2); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + cluster.run_conf_change(); + + let region = pd_client.get_region(b"").unwrap(); + assert!( + !region.get_peers().iter().any(|p| p.get_store_id() == 2), + "{:?}", + region + ); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(region.get_id(), 2) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + + let learner1 = new_learner_peer(2, 2); + pd_client.must_add_peer(region.get_id(), learner1.clone()); + cluster.must_region_exist(region.get_id(), 2); + pd_client.must_remove_peer(region.get_id(), learner1); + + // IME observes all peer destroy events to timely evict regions. By adding a + // new peer, the old and uninitialized peer will be destroyed and IME must + // not panic in this case. + let learner2 = new_learner_peer(2, 3); + pd_client.must_add_peer(region.get_id(), learner2.clone()); + cluster.must_region_exist(region.get_id(), 2); +} diff --git a/tests/failpoints/cases/test_stale_peer.rs b/tests/failpoints/cases/test_stale_peer.rs index 7fb9f921e37..ee1171ae114 100644 --- a/tests/failpoints/cases/test_stale_peer.rs +++ b/tests/failpoints/cases/test_stale_peer.rs @@ -136,7 +136,6 @@ fn test_stale_learner_restart() { must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); } -/// pass /// Test if a peer can be destroyed through tombstone msg when applying /// snapshot. //#[test_case(test_raftstore_v2::new_node_cluster)] // unstable test case @@ -216,7 +215,6 @@ fn test_stale_peer_destroy_when_apply_snapshot() { must_get_none(&cluster.get_engine(3), b"k1"); } -/// pass /// Test if destroy a uninitialized peer through tombstone msg would allow a /// staled peer be created again. #[test_case(test_raftstore::new_node_cluster)] From 0904ef0734241dacfe420f4cd78adf36528d86b6 Mon Sep 17 00:00:00 2001 From: ris <79858083+RidRisR@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:33:07 +0800 Subject: [PATCH 13/86] Make import.num_threads be able to config online (#17665) close tikv/tikv#17572 Signed-off-by: RidRisR <79858083+RidRisR@users.noreply.github.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .config/nextest.toml | 2 +- components/backup/src/endpoint.rs | 27 ++- components/backup/src/utils.rs | 105 +++--------- components/sst_importer/src/config.rs | 20 ++- components/sst_importer/src/import_mode.rs | 39 +++-- components/sst_importer/src/import_mode2.rs | 32 ++-- components/sst_importer/src/sst_importer.rs | 68 ++++++-- components/tikv_util/src/lib.rs | 1 + .../tikv_util/src/resizable_threadpool.rs | 161 ++++++++++++++++++ src/import/sst_service.rs | 73 ++++---- 10 files changed, 357 insertions(+), 171 deletions(-) create mode 100644 components/tikv_util/src/resizable_threadpool.rs diff --git a/.config/nextest.toml b/.config/nextest.toml index 6f67aa5ecdb..2caec4b0c05 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,7 +1,7 @@ [profile.ci] retries = 2 # Run at most 3 times fail-fast = false -slow-timeout = { period = "60s", terminate-after = 2 } # Timeout 2m +slow-timeout = { period = "80s", terminate-after = 2 } # Timeout 2m failure-output = "final" [profile.ci.junit] diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 681187ad0d6..01c7b1bbd7a 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -38,6 +38,7 @@ use tikv_util::{ box_err, debug, error, error_unknown, future::RescheduleChecker, impl_display_as_debug, info, + resizable_threadpool::ResizableRuntime, store::find_peer, time::{Instant, Limiter}, warn, @@ -49,7 +50,7 @@ use txn_types::{Key, Lock, TimeStamp}; use crate::{ metrics::*, softlimit::{CpuStatistics, SoftLimit, SoftLimitByCpu}, - utils::{ControlThreadPool, KeyValueCodec}, + utils::KeyValueCodec, writer::{BackupWriterBuilder, CfNameWrap}, Error, *, }; @@ -702,7 +703,7 @@ impl SoftLimitKeeper { /// It coordinates backup tasks and dispatches them to different workers. pub struct Endpoint { store_id: u64, - pool: RefCell, + pool: RefCell, io_pool: Runtime, tablets: LocalTablets, config_manager: ConfigManager, @@ -877,7 +878,11 @@ impl Endpoint { causal_ts_provider: Option>, resource_ctl: Option>, ) -> Endpoint { - let pool = ControlThreadPool::new(); + let pool = ResizableRuntime::new( + "backup-worker", + Box::new(utils::create_tokio_runtime), + Box::new(|new_size| BACKUP_THREAD_POOL_SIZE_GAUGE.set(new_size as i64)), + ); let rt = utils::create_tokio_runtime(config.io_thread_size, "backup-io").unwrap(); let config_manager = ConfigManager(Arc::new(RwLock::new(config))); let softlimit = SoftLimitKeeper::new(config_manager.clone()); @@ -1500,7 +1505,11 @@ pub mod tests { use std::thread::sleep; let counter = Arc::new(AtomicU32::new(0)); - let mut pool = ControlThreadPool::new(); + let mut pool = ResizableRuntime::new( + "bkwkr", + Box::new(utils::create_tokio_runtime), + Box::new(|new_size: usize| BACKUP_THREAD_POOL_SIZE_GAUGE.set(new_size as i64)), + ); pool.adjust_with(3); for i in 0..8 { @@ -2540,12 +2549,12 @@ pub mod tests { endpoint.handle_backup_task(task); assert!(endpoint.pool.borrow().size == 15); - // shrink thread pool only if there are too many idle threads + // shrink thread pool endpoint.get_config_manager().set_num_threads(10); req.set_start_key(vec![b'2']); let (task, _) = Task::new(req.clone(), tx.clone()).unwrap(); endpoint.handle_backup_task(task); - assert!(endpoint.pool.borrow().size == 15); + assert!(endpoint.pool.borrow().size == 10); endpoint.get_config_manager().set_num_threads(3); req.set_start_key(vec![b'3']); @@ -2560,7 +2569,11 @@ pub mod tests { // for testing whether dropping the pool before all tasks finished causes panic. // but the panic must be checked manually. (It may panic at tokio runtime // threads) - let mut pool = ControlThreadPool::new(); + let mut pool = ResizableRuntime::new( + "bkwkr", + Box::new(utils::create_tokio_runtime), + Box::new(|new_size: usize| BACKUP_THREAD_POOL_SIZE_GAUGE.set(new_size as i64)), + ); pool.adjust_with(1); pool.spawn(async { tokio::time::sleep(Duration::from_millis(100)).await }); pool.adjust_with(2); diff --git a/components/backup/src/utils.rs b/components/backup/src/utils.rs index 9d85eb664eb..b13e51466f9 100644 --- a/components/backup/src/utils.rs +++ b/components/backup/src/utils.rs @@ -1,16 +1,13 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; - use api_version::{dispatch_api_version, ApiV2, KeyMode, KvFormat}; use file_system::IoType; -use futures::Future; use kvproto::kvrpcpb::ApiVersion; use tikv_util::{error, sys::thread::ThreadBuildWrapper}; use tokio::{io::Result as TokioResult, runtime::Runtime}; use txn_types::{Key, TimeStamp}; -use crate::{metrics::*, Result}; +use crate::Result; // BACKUP_V1_TO_V2_TS is used as causal timestamp to backup RawKV api version // V1/V1Ttl data and save to V2 format. Use 1 other than 0 because 0 is not a @@ -19,88 +16,6 @@ pub const BACKUP_V1_TO_V2_TS: u64 = 1; /// DaemonRuntime is a "background" runtime, which contains "daemon" tasks: /// any task spawn into it would run until finish even the runtime isn't /// referenced. -pub struct DaemonRuntime(Option); - -impl DaemonRuntime { - /// spawn a daemon task to the runtime. - pub fn spawn(self: &Arc, f: impl Future + Send + 'static) { - let wkr = self.clone(); - self.0.as_ref().unwrap().spawn(async move { - f.await; - drop(wkr) - }); - } - - /// create a daemon runtime from some runtime. - pub fn from_runtime(rt: Runtime) -> Arc { - Arc::new(Self(Some(rt))) - } -} - -impl Drop for DaemonRuntime { - fn drop(&mut self) { - // it is safe because all tasks should be finished. - self.0.take().unwrap().shutdown_background() - } -} -pub struct ControlThreadPool { - pub(crate) size: usize, - workers: Option>, -} - -impl ControlThreadPool { - pub fn new() -> Self { - ControlThreadPool { - size: 0, - workers: None, - } - } - - pub fn spawn(&self, func: F) - where - F: Future + Send + 'static, - { - self.workers - .as_ref() - .expect("ControlThreadPool: please call adjust_with() before spawn()") - .spawn(func); - } - - /// Lazily adjust the thread pool's size - /// - /// Resizing if the thread pool need to expend or there - /// are too many idle threads. Otherwise do nothing. - pub fn adjust_with(&mut self, new_size: usize) { - if self.size >= new_size && self.size - new_size <= 10 { - return; - } - // TODO: after tokio supports adjusting thread pool size(https://github.com/tokio-rs/tokio/issues/3329), - // adapt it. - let workers = create_tokio_runtime(new_size, "bkwkr") - .expect("failed to create tokio runtime for backup worker."); - self.workers = Some(DaemonRuntime::from_runtime(workers)); - self.size = new_size; - BACKUP_THREAD_POOL_SIZE_GAUGE.set(new_size as i64); - } -} - -/// Create a standard tokio runtime. -/// (which allows io and time reactor, involve thread memory accessor), -/// and set the I/O type to export. -pub fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult { - tokio::runtime::Builder::new_multi_thread() - .thread_name(thread_name) - .enable_io() - .enable_time() - .with_sys_and_custom_hooks( - || { - file_system::set_io_type(IoType::Export); - }, - || {}, - ) - .worker_threads(thread_count) - .build() -} #[derive(Debug, Copy, Clone)] pub struct KeyValueCodec { @@ -264,6 +179,24 @@ impl KeyValueCodec { } } +/// Create a standard tokio runtime. +/// (which allows io and time reactor, involve thread memory accessor), +/// and set the I/O type to export. +pub(crate) fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult { + tokio::runtime::Builder::new_multi_thread() + .thread_name(thread_name) + .enable_io() + .enable_time() + .with_sys_and_custom_hooks( + || { + file_system::set_io_type(IoType::Export); + }, + || {}, + ) + .worker_threads(thread_count) + .build() +} + #[cfg(test)] pub mod tests { use api_version::{KvFormat, RawValue}; diff --git a/components/sst_importer/src/config.rs b/components/sst_importer/src/config.rs index 7e83a07f2b2..543ff765b80 100644 --- a/components/sst_importer/src/config.rs +++ b/components/sst_importer/src/config.rs @@ -7,13 +7,14 @@ use std::{ }; use online_config::{self, OnlineConfig}; -use tikv_util::{config::ReadableDuration, HandyRwLock}; +use tikv_util::{ + config::ReadableDuration, resizable_threadpool::ResizableRuntimeHandle, HandyRwLock, +}; #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct Config { - #[online_config(skip)] pub num_threads: usize, #[online_config(skip)] pub stream_channel_window: usize, @@ -62,11 +63,17 @@ impl Config { } #[derive(Clone)] -pub struct ConfigManager(pub Arc>); +pub struct ConfigManager { + pub config: Arc>, + threads: ResizableRuntimeHandle, +} impl ConfigManager { - pub fn new(cfg: Config) -> Self { - ConfigManager(Arc::new(RwLock::new(cfg))) + pub fn new(cfg: Config, threads: ResizableRuntimeHandle) -> Self { + ConfigManager { + config: Arc::new(RwLock::new(cfg)), + threads, + } } } @@ -88,6 +95,7 @@ impl online_config::ConfigManager for ConfigManager { return Err(e); } + self.threads.adjust_with(cfg.num_threads); *self.wl() = cfg; Ok(()) } @@ -97,6 +105,6 @@ impl std::ops::Deref for ConfigManager { type Target = RwLock; fn deref(&self) -> &Self::Target { - self.0.as_ref() + self.config.as_ref() } } diff --git a/components/sst_importer/src/import_mode.rs b/components/sst_importer/src/import_mode.rs index 5f5b5d1060e..43da9eb3e70 100644 --- a/components/sst_importer/src/import_mode.rs +++ b/components/sst_importer/src/import_mode.rs @@ -11,8 +11,7 @@ use std::{ use engine_traits::{CfOptions, DbOptions, KvEngine}; use futures_util::compat::Future01CompatExt; use kvproto::import_sstpb::*; -use tikv_util::timer::GLOBAL_TIMER_HANDLE; -use tokio::runtime::Handle; +use tikv_util::{resizable_threadpool::ResizableRuntimeHandle, timer::GLOBAL_TIMER_HANDLE}; use super::{Config, Result}; @@ -88,7 +87,8 @@ impl ImportModeSwitcher { ImportModeSwitcher { inner, is_import } } - pub fn start(&self, executor: &Handle, db: E) { + // start_resizable_threads only serves for resizable runtime + pub fn start_resizable_threads(&self, executor: &ResizableRuntimeHandle, db: E) { // spawn a background future to put TiKV back into normal mode after timeout let inner = self.inner.clone(); let switcher = Arc::downgrade(&inner); @@ -245,10 +245,18 @@ mod tests { use engine_traits::{KvEngine, CF_DEFAULT}; use tempfile::Builder; use test_sst_importer::{new_test_engine, new_test_engine_with_options}; - use tikv_util::config::ReadableDuration; + use tikv_util::{config::ReadableDuration, resizable_threadpool::ResizableRuntime}; + use tokio::runtime::Runtime; + type TokioResult = std::io::Result; use super::*; + fn create_tokio_runtime(_: usize, _: &str) -> TokioResult { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + } + fn check_import_options( db: &E, expected_db_opts: &ImportModeDbOptions, @@ -305,13 +313,12 @@ mod tests { fn mf(_cf: &str, _name: &str, _v: f64) {} let cfg = Config::default(); - let threads = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); + let mut threads = + ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); + threads.adjust_with(cfg.num_threads); let switcher = ImportModeSwitcher::new(&cfg); - switcher.start(threads.handle(), db.clone()); + switcher.start_resizable_threads(&ResizableRuntimeHandle::new(threads), db.clone()); check_import_options(&db, &normal_db_options, &normal_cf_options); assert!(switcher.enter_import_mode(&db, mf).unwrap()); check_import_options(&db, &import_db_options, &import_cf_options); @@ -343,19 +350,19 @@ mod tests { ..Config::default() }; - let threads = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); - + let mut threads = + ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); + threads.adjust_with(4); let switcher = ImportModeSwitcher::new(&cfg); - switcher.start(threads.handle(), db.clone()); + let handle = ResizableRuntimeHandle::new(threads); + + switcher.start_resizable_threads(&handle, db.clone()); check_import_options(&db, &normal_db_options, &normal_cf_options); switcher.enter_import_mode(&db, mf).unwrap(); check_import_options(&db, &import_db_options, &import_cf_options); thread::sleep(Duration::from_secs(1)); - threads.block_on(tokio::task::yield_now()); + handle.block_on(tokio::task::yield_now()); check_import_options(&db, &normal_db_options, &normal_cf_options); } diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index 4db29c47a6f..ae5f72a5b00 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -8,8 +8,7 @@ use std::{ use collections::{HashMap, HashSet}; use futures_util::compat::Future01CompatExt; use kvproto::{import_sstpb::Range, metapb::Region}; -use tikv_util::timer::GLOBAL_TIMER_HANDLE; -use tokio::runtime::Handle; +use tikv_util::{resizable_threadpool::ResizableRuntimeHandle, timer::GLOBAL_TIMER_HANDLE}; use super::Config; @@ -56,9 +55,7 @@ impl ImportModeSwitcherV2 { ImportModeSwitcherV2 { inner } } - // Periodically perform timeout check to change import mode of some regions back - // to normal mode. - pub fn start(&self, executor: &Handle) { + pub fn start_resizable_threads(&self, executor: &ResizableRuntimeHandle) { // spawn a background future to put regions back into normal mode after timeout let inner = self.inner.clone(); let switcher = Arc::downgrade(&inner); @@ -157,10 +154,19 @@ pub fn range_overlaps(range1: &HashRange, range2: &Range) -> bool { mod test { use std::thread; - use tikv_util::config::ReadableDuration; + use tikv_util::{config::ReadableDuration, resizable_threadpool::ResizableRuntime}; + use tokio::runtime::Runtime; use super::*; + type TokioResult = std::io::Result; + + fn create_tokio_runtime(_: usize, _: &str) -> TokioResult { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + } + #[test] fn test_region_range_overlaps() { let verify_overlap = |ranges1: &[(&str, &str)], ranges2: &[(&str, &str)], overlap: bool| { @@ -270,10 +276,10 @@ mod test { ..Config::default() }; - let threads = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); + let mut threads = + ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); + threads.adjust_with(4); + let handle = ResizableRuntimeHandle::new(threads); let switcher = ImportModeSwitcherV2::new(&cfg); let mut region = Region::default(); @@ -300,14 +306,14 @@ mod test { assert!(switcher.region_in_import_mode(®ion2)); assert!(switcher.region_in_import_mode(®ion3)); - switcher.start(threads.handle()); + switcher.start_resizable_threads(&handle); thread::sleep(Duration::from_millis(400)); // renew the timeout of key_range2 switcher.ranges_enter_import_mode(vec![key_range2]); thread::sleep(Duration::from_millis(400)); - threads.block_on(tokio::task::yield_now()); + handle.block_on(tokio::task::yield_now()); // the range covering region and region2 should be cleared due to timeout. assert!(!switcher.region_in_import_mode(®ion)); @@ -315,7 +321,7 @@ mod test { assert!(switcher.region_in_import_mode(®ion3)); thread::sleep(Duration::from_millis(400)); - threads.block_on(tokio::task::yield_now()); + handle.block_on(tokio::task::yield_now()); assert!(!switcher.region_in_import_mode(®ion3)); } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index e3047d59ae1..b3102d0c4b0 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -39,14 +39,12 @@ use tikv_util::{ }, future::RescheduleChecker, memory::{MemoryQuota, OwnedAllocated}, + resizable_threadpool::ResizableRuntimeHandle, sys::{thread::ThreadBuildWrapper, SysQuota}, time::{Instant, Limiter}, Either, HandyRwLock, }; -use tokio::{ - runtime::{Handle, Runtime}, - sync::OnceCell, -}; +use tokio::{runtime::Runtime, sync::OnceCell}; use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ @@ -267,10 +265,10 @@ impl SstImporter { } } - pub fn start_switch_mode_check(&self, executor: &Handle, db: Option) { + pub fn start_switch_mode_check(&self, executor: &ResizableRuntimeHandle, db: Option) { match &self.switcher { - Either::Left(switcher) => switcher.start(executor, db.unwrap()), - Either::Right(switcher) => switcher.start(executor), + Either::Left(switcher) => switcher.start_resizable_threads(executor, db.unwrap()), + Either::Right(switcher) => switcher.start_resizable_threads(executor), } } @@ -1618,6 +1616,7 @@ mod tests { use std::{ io::{self, Cursor}, ops::Sub, + sync::atomic::{AtomicUsize, Ordering}, usize, }; @@ -1641,7 +1640,10 @@ mod tests { use tempfile::{Builder, TempDir}; use test_sst_importer::*; use test_util::new_test_key_manager; - use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io}; + use tikv_util::{ + codec::stream_event::EventEncoder, resizable_threadpool::ResizableRuntime, + stream::block_on_external_io, + }; use tokio::io::{AsyncWrite, AsyncWriteExt}; use tokio_util::compat::{FuturesAsyncWriteCompatExt, TokioAsyncWriteCompatExt}; use txn_types::{Value, WriteType}; @@ -1650,6 +1652,15 @@ mod tests { use super::*; use crate::{import_file::ImportPath, *}; + static COUNTER: AtomicUsize = AtomicUsize::new(0); + type TokioResult = std::io::Result; + + fn create_tokio_runtime(_: usize, _: &str) -> TokioResult { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + } + fn do_test_import_dir(key_manager: Option>) { let temp_dir = Builder::new().prefix("test_import_dir").tempdir().unwrap(); let dir = ImportDir::new(temp_dir.path()).unwrap(); @@ -2288,8 +2299,12 @@ mod tests { }; let change = cfg.diff(&cfg_new); + let threads = + ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); + let handle = ResizableRuntimeHandle::new(threads); + // create config manager and update config. - let mut cfg_mgr = ImportConfigManager::new(cfg); + let mut cfg_mgr = ImportConfigManager::new(cfg, handle); cfg_mgr.dispatch(change).unwrap(); importer.update_config_memory_use_ratio(&cfg_mgr); @@ -2312,11 +2327,44 @@ mod tests { ..Default::default() }; let change = cfg.diff(&cfg_new); - let mut cfg_mgr = ImportConfigManager::new(cfg); + + let threads = + ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); + let handle = ResizableRuntimeHandle::new(threads); + + let mut cfg_mgr = ImportConfigManager::new(cfg, handle); let r = cfg_mgr.dispatch(change); assert!(r.is_err()); } + #[test] + fn test_update_import_num_threads() { + let mut threads = ResizableRuntime::new( + "test", + Box::new(create_tokio_runtime), + Box::new(|new_size: usize| { + COUNTER.store(new_size, Ordering::SeqCst); + }), + ); + threads.adjust_with(Config::default().num_threads); + let handle = ResizableRuntimeHandle::new(threads); + let mut cfg_mgr = ImportConfigManager::new(Config::default(), handle); + + assert_eq!(COUNTER.load(Ordering::SeqCst), cfg_mgr.rl().num_threads); + assert_eq!(cfg_mgr.rl().num_threads, Config::default().num_threads); + + let cfg_new = Config { + num_threads: 10, + ..Default::default() + }; + let change = Config::default().diff(&cfg_new); + let r = cfg_mgr.dispatch(change); + + r.unwrap(); + assert_eq!(cfg_mgr.rl().num_threads, cfg_new.num_threads); + assert_eq!(COUNTER.load(Ordering::SeqCst), cfg_mgr.rl().num_threads); + } + #[test] fn test_download_kv_file_to_mem_cache() { // create a sample kv file. diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 1171e5f4286..7d30fe11874 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -54,6 +54,7 @@ pub mod memory; pub mod metrics; pub mod mpsc; pub mod quota_limiter; +pub mod resizable_threadpool; pub mod resource_control; pub mod smoother; pub mod store; diff --git a/components/tikv_util/src/resizable_threadpool.rs b/components/tikv_util/src/resizable_threadpool.rs new file mode 100644 index 00000000000..ce3cf944115 --- /dev/null +++ b/components/tikv_util/src/resizable_threadpool.rs @@ -0,0 +1,161 @@ +// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::{Arc, RwLock}; + +use futures::Future; +use tokio::{io::Result as TokioResult, runtime::Runtime}; + +/// DaemonRuntime is a "background" runtime, which contains "daemon" tasks: +/// any task spawn into it would run until finish even the runtime isn't +/// referenced. +pub struct DaemonRuntime(Option); + +impl DaemonRuntime { + /// spawn a daemon task to the runtime. + pub fn spawn(self: &Arc, f: impl Future + Send + 'static) { + let wkr = self.clone(); + self.0.as_ref().unwrap().spawn(async move { + f.await; + drop(wkr) + }); + } + + /// create a daemon runtime from some runtime. + pub fn from_runtime(rt: Runtime) -> Arc { + Arc::new(Self(Some(rt))) + } +} + +impl Drop for DaemonRuntime { + fn drop(&mut self) { + if let Some(runtime) = self.0.take() { + runtime.shutdown_background(); + } + } +} + +pub struct ResizableRuntime { + pub size: usize, + thread_name: String, + pool: Option>, + replace_pool_rule: Box TokioResult + Send + Sync>, + after_adjust: Box, +} + +impl ResizableRuntime { + pub fn new( + thread_name: &str, + replace_pool_rule: Box TokioResult + Send + Sync>, + after_adjust: Box, + ) -> Self { + ResizableRuntime { + size: 0, + thread_name: thread_name.to_owned(), + pool: None, + replace_pool_rule, + after_adjust, + } + } + + pub fn spawn(&self, fut: Fut) + where + Fut: Future + Send + 'static, + { + self.pool + .as_ref() + .expect("ResizableRuntime: please call adjust_with() before spawn()") + .spawn(fut); + } + + /// Lazily adjust the thread pool's size + pub fn adjust_with(&mut self, new_size: usize) { + if self.size == new_size { + return; + } + // TODO: after tokio supports adjusting thread pool size(https://github.com/tokio-rs/tokio/issues/3329), + // adapt it. + let pool = (self.replace_pool_rule)(new_size, &self.thread_name) + .expect("failed to create tokio runtime for backup worker."); + self.pool = Some(DaemonRuntime::from_runtime(pool)); + self.size = new_size; + (self.after_adjust)(new_size); + } + + pub fn block_on(&self, f: F) -> F::Output + where + F: Future, + { + self.pool + .as_ref() + .expect("ResizableRuntime: please call adjust_with() before block_on()") + .0 + .as_ref() + .unwrap() + .block_on(f) + } +} + +#[derive(Clone)] +pub struct ResizableRuntimeHandle { + inner: Arc>, +} + +impl ResizableRuntimeHandle { + pub fn new(runtime: ResizableRuntime) -> Self { + ResizableRuntimeHandle { + inner: Arc::new(RwLock::new(runtime)), + } + } + + pub fn spawn(&self, fut: Fut) + where + Fut: Future + Send + 'static, + { + let inner = self.inner.read().unwrap(); + inner.spawn(fut); + } + + pub fn adjust_with(&self, new_size: usize) { + let mut inner = self.inner.write().unwrap(); + inner.adjust_with(new_size); + } + + pub fn block_on(&self, f: F) -> F::Output + where + F: Future, + { + let inner = self.inner.read().unwrap(); + inner.block_on(f) + } +} + +#[cfg(test)] +mod test { + use std::sync::atomic::{AtomicUsize, Ordering}; + + use super::*; + + static COUNTER: AtomicUsize = AtomicUsize::new(0); + + #[test] + fn test_adjust_thread_num() { + let replace_pool_rule = |thread_count: usize, thread_name: &str| { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(thread_count) + .thread_name(thread_name) + .enable_all() + .build() + .unwrap(); + Ok(rt) + }; + let after_adjust = |new_size: usize| { + COUNTER.store(new_size, Ordering::SeqCst); + }; + let mut threads = + ResizableRuntime::new("test", Box::new(replace_pool_rule), Box::new(after_adjust)); + threads.adjust_with(4); + assert_eq!(COUNTER.load(Ordering::SeqCst), 4); + threads.adjust_with(8); + assert_eq!(COUNTER.load(Ordering::SeqCst), 8); + } +} diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index f5c4bf809f7..73935013897 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -37,14 +37,12 @@ use tikv_kv::{Engine, LocalTablets, Modify, WriteData}; use tikv_util::{ config::ReadableSize, future::{create_stream_with_buffer, paired_future_callback}, - sys::{ - disk::{get_disk_status, DiskUsage}, - thread::ThreadBuildWrapper, - }, + resizable_threadpool::{ResizableRuntime, ResizableRuntimeHandle}, + sys::disk::{get_disk_status, DiskUsage}, time::{Instant, Limiter}, HandyRwLock, }; -use tokio::{runtime::Runtime, time::sleep}; +use tokio::time::sleep; use txn_types::{Key, WriteRef, WriteType}; use super::{ @@ -56,6 +54,7 @@ use crate::{ send_rpc_response, server::CONFIG_ROCKSDB_GAUGE, storage::{self, errors::extract_region_error_from_error}, + tikv_util::sys::thread::ThreadBuildWrapper, }; /// The concurrency of sending raft request for every `apply` requests. @@ -119,7 +118,8 @@ pub struct ImportSstService { cfg: ConfigManager, tablets: LocalTablets, engine: E, - threads: Arc, + // TODO: (Ris) change to ResizableRuntime + threads: ResizableRuntimeHandle, importer: Arc>, limiter: Limiter, ingest_latch: Arc, @@ -321,47 +321,56 @@ impl ImportSstService { resource_manager: Option>, region_info_accessor: Arc, ) -> Self { - let props = tikv_util::thread_group::current_properties(); - let eng = Mutex::new(engine.clone()); - let threads = tokio::runtime::Builder::new_multi_thread() - .worker_threads(cfg.num_threads) - .enable_all() - .thread_name("sst-importer") - .with_sys_and_custom_hooks( - move || { - tikv_util::thread_group::set_properties(props.clone()); - - set_io_type(IoType::Import); - tikv_kv::set_tls_engine(eng.lock().unwrap().clone()); - }, - move || { - // SAFETY: we have set the engine at some lines above with type `E`. - unsafe { tikv_kv::destroy_tls_engine::() }; - }, - ) - .build() - .unwrap(); + let eng = Arc::new(Mutex::new(engine.clone())); + let create_tokio_runtime = move |thread_count: usize, thread_name: &str| { + let props = tikv_util::thread_group::current_properties(); + let eng = eng.clone(); + tokio::runtime::Builder::new_multi_thread() + .worker_threads(thread_count) + .enable_all() + .thread_name(thread_name) + .with_sys_and_custom_hooks( + move || { + tikv_util::thread_group::set_properties(props.clone()); + set_io_type(IoType::Import); + tikv_kv::set_tls_engine(eng.lock().unwrap().clone()); + }, + move || { + // SAFETY: we have set the engine at some lines above with type `E`. + unsafe { tikv_kv::destroy_tls_engine::() }; + }, + ) + .build() + }; + + let mut threads = + ResizableRuntime::new("import", Box::new(create_tokio_runtime), Box::new(|_| ())); + // There would be 4 initial threads running forever. + threads.adjust_with(4); + let handle = ResizableRuntimeHandle::new(threads); if let LocalTablets::Singleton(tablet) = &tablets { - importer.start_switch_mode_check(threads.handle(), Some(tablet.clone())); + importer.start_switch_mode_check(&handle.clone(), Some(tablet.clone())); } else { - importer.start_switch_mode_check(threads.handle(), None); + importer.start_switch_mode_check(&handle.clone(), None); } let writer = raft_writer::ThrottledTlsEngineWriter::default(); let gc_handle = writer.clone(); - threads.spawn(async move { + handle.spawn(async move { while gc_handle.try_gc() { tokio::time::sleep(WRITER_GC_INTERVAL).await; } }); - let cfg_mgr = ConfigManager::new(cfg); - threads.spawn(Self::tick(importer.clone(), cfg_mgr.clone())); + let cfg_mgr = ConfigManager::new(cfg, handle.clone()); + handle.spawn(Self::tick(importer.clone(), cfg_mgr.clone())); + // Drop the initial pool to accept new tasks + handle.adjust_with(cfg_mgr.rl().num_threads); ImportSstService { cfg: cfg_mgr, tablets, - threads: Arc::new(threads), + threads: handle.clone(), engine, importer, limiter: Limiter::new(f64::INFINITY), From bba0a36c396fa8ea3d54fbfb1d893b7a07932765 Mon Sep 17 00:00:00 2001 From: Bisheng Huang Date: Tue, 5 Nov 2024 18:09:30 +0800 Subject: [PATCH 14/86] grafana: clarify raft waterfall metrics descriptions (#17766) ref tikv/tikv#15990 Raft waterfall metrics track the duration of individual requests, all beginning from the same starting point (when the async write request is scheduled) but ending at various stages of the write process. Previous descriptions did not make that clear and may confuse the readers. This commit improves the grafana descriptions for clarity. Signed-off-by: Bisheng Huang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.dashboard.py | 32 ++++++------ metrics/grafana/tikv_details.json | 60 +++++++++++------------ metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 47 insertions(+), 47 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index d88b01f6ec2..ba90aed4a53 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -1892,13 +1892,13 @@ def RaftWaterfall() -> RowPanel: [ graph_panel_histogram_quantiles( title="Store propose wait duration", - description="The propose wait time duration of each request", + description="Time from request scheduling to when it is handled by Raftstore", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_request_wait_time_duration_secs", ), graph_panel_histogram_quantiles( title="Store batch wait duration", - description="The batch wait time duration of each request", + description="Time from request scheduling to when a batch of requests is formed and prepared to be proposed to Raft", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_store_wf_batch_wait_duration_seconds", ), @@ -1908,13 +1908,13 @@ def RaftWaterfall() -> RowPanel: [ graph_panel_histogram_quantiles( title="Store send to write queue duration", - description="The send-to-write-queue time duration of each request", + description="Time from request scheduling to just before it is sent to the store writer thread", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_store_wf_send_to_queue_duration_seconds", ), graph_panel_histogram_quantiles( title="Store send proposal duration", - description="The send raft message of the proposal duration of each request", + description="Time from request scheduling to just before it is sent to followers", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_store_wf_send_proposal_duration_seconds", ), @@ -1924,13 +1924,13 @@ def RaftWaterfall() -> RowPanel: [ graph_panel_histogram_quantiles( title="Store write kv db end duration", - description="The write kv db end duration of each request", + description="Time from request scheduling to when the batch's snapshot state is written to KV DB", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_store_wf_write_kvdb_end_duration_seconds", ), graph_panel_histogram_quantiles( title="Store before write duration", - description="The before write time duration of each request", + description="Time from request scheduling to just before it is written to Raft Engine", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_store_wf_before_write_duration_seconds", ), @@ -1939,16 +1939,16 @@ def RaftWaterfall() -> RowPanel: layout.row( [ graph_panel_histogram_quantiles( - title="Store persist duration", - description="The persist duration of each request", + title="Store write end duration", + description="Time from request scheduling to when it is written to Raft Engine", yaxes=yaxes(left_format=UNITS.SECONDS), - metric="tikv_raftstore_store_wf_persist_duration_seconds", + metric="tikv_raftstore_store_wf_write_end_duration_seconds", ), graph_panel_histogram_quantiles( - title="Store write end duration", - description="The write end duration of each request", + title="Store persist duration", + description="Time from request scheduling to when its associated ready is persisted on the leader", yaxes=yaxes(left_format=UNITS.SECONDS), - metric="tikv_raftstore_store_wf_write_end_duration_seconds", + metric="tikv_raftstore_store_wf_persist_duration_seconds", ), ] ) @@ -1956,13 +1956,13 @@ def RaftWaterfall() -> RowPanel: [ graph_panel_histogram_quantiles( title="Store commit but not persist duration", - description="The commit but not persist duration of each request", + description="Time from request scheduling to when it is committed; at the time of commit, it has not been persisted on the leader", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds", ), graph_panel_histogram_quantiles( title="Store commit and persist duration", - description="The commit and persist duration of each request", + description="Time from request scheduling to when it is committed; at the time of commit, it has been persisted on the leader", yaxes=yaxes(left_format=UNITS.SECONDS), metric="tikv_raftstore_store_wf_commit_log_duration_seconds", ), @@ -1976,9 +1976,9 @@ def RaftIO() -> RowPanel: layout.row( heatmap_panel_graph_panel_histogram_quantile_pairs( heatmap_title="Process ready duration", - heatmap_description="The time consumed for peer processes to be ready in Raft", + heatmap_description="The time taken by Raftstore to complete processing a poll round, which includes a batch of region peers", graph_title="99% Process ready duration per server", - graph_description="The time consumed for peer processes to be ready in Raft", + graph_description="The time taken by Raftstore to complete processing a poll round, which includes a batch of region peers", graph_by_labels=["instance"], graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c18f536ec8d..f120345c391 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -14870,7 +14870,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The propose wait time duration of each request", + "description": "Time from request scheduling to when it is handled by Raftstore", "editable": true, "error": false, "fieldConfig": { @@ -15071,7 +15071,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The batch wait time duration of each request", + "description": "Time from request scheduling to when a batch of requests is formed and prepared to be proposed to Raft", "editable": true, "error": false, "fieldConfig": { @@ -15272,7 +15272,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The send-to-write-queue time duration of each request", + "description": "Time from request scheduling to just before it is sent to the store writer thread", "editable": true, "error": false, "fieldConfig": { @@ -15473,7 +15473,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The send raft message of the proposal duration of each request", + "description": "Time from request scheduling to just before it is sent to followers", "editable": true, "error": false, "fieldConfig": { @@ -15674,7 +15674,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The write kv db end duration of each request", + "description": "Time from request scheduling to when the batch's snapshot state is written to KV DB", "editable": true, "error": false, "fieldConfig": { @@ -15875,7 +15875,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The before write time duration of each request", + "description": "Time from request scheduling to just before it is written to Raft Engine", "editable": true, "error": false, "fieldConfig": { @@ -16076,7 +16076,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The persist duration of each request", + "description": "Time from request scheduling to when it is written to Raft Engine", "editable": true, "error": false, "fieldConfig": { @@ -16169,7 +16169,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16177,14 +16177,14 @@ "intervalFactor": 1, "legendFormat": "99.99% {{$additional_groupby}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16192,14 +16192,14 @@ "intervalFactor": 1, "legendFormat": "99% {{$additional_groupby}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", "format": "time_series", "hide": false, "instant": false, @@ -16207,14 +16207,14 @@ "intervalFactor": 1, "legendFormat": "avg {{$additional_groupby}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -16222,7 +16222,7 @@ "intervalFactor": 1, "legendFormat": "count {{$additional_groupby}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -16231,7 +16231,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store persist duration", + "title": "Store write end duration", "tooltip": { "msResolution": true, "shared": true, @@ -16277,7 +16277,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The write end duration of each request", + "description": "Time from request scheduling to when its associated ready is persisted on the leader", "editable": true, "error": false, "fieldConfig": { @@ -16370,7 +16370,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16378,14 +16378,14 @@ "intervalFactor": 1, "legendFormat": "99.99% {{$additional_groupby}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16393,14 +16393,14 @@ "intervalFactor": 1, "legendFormat": "99% {{$additional_groupby}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le, $additional_groupby) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", "format": "time_series", "hide": false, "instant": false, @@ -16408,14 +16408,14 @@ "intervalFactor": 1, "legendFormat": "avg {{$additional_groupby}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -16423,7 +16423,7 @@ "intervalFactor": 1, "legendFormat": "count {{$additional_groupby}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -16432,7 +16432,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store write end duration", + "title": "Store persist duration", "tooltip": { "msResolution": true, "shared": true, @@ -16478,7 +16478,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The commit but not persist duration of each request", + "description": "Time from request scheduling to when it is committed; at the time of commit, it has not been persisted on the leader", "editable": true, "error": false, "fieldConfig": { @@ -16679,7 +16679,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The commit and persist duration of each request", + "description": "Time from request scheduling to when it is committed; at the time of commit, it has been persisted on the leader", "editable": true, "error": false, "fieldConfig": { @@ -16934,7 +16934,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for peer processes to be ready in Raft", + "description": "The time taken by Raftstore to complete processing a poll round, which includes a batch of region peers", "editable": true, "error": false, "fieldConfig": { @@ -17027,7 +17027,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for peer processes to be ready in Raft", + "description": "The time taken by Raftstore to complete processing a poll round, which includes a batch of region peers", "editable": true, "error": false, "fieldConfig": { diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index b0964023cd3..239e6c44191 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -6a3c9238ffc0450c013981946f4388ced4a92759b379f0b2289b1cbdfc7a1b48 ./metrics/grafana/tikv_details.json +d8d8d708f9e11b7a9e318f9c50184212a2adb0a7967b313f92fbef55021fffed ./metrics/grafana/tikv_details.json From bc2c6521e3272e7c07384f48b79c5d0f3947fcea Mon Sep 17 00:00:00 2001 From: Ling Jin <7138436+3AceShowHand@users.noreply.github.com> Date: Wed, 6 Nov 2024 14:42:57 +0800 Subject: [PATCH 15/86] cdc: do not send too much tasks if cannot be handled in time (#17724) close tikv/tikv#17696 * take cdc tasks into memory quota to prevent the TiKV OOM caused by too many pending tasks Signed-off-by: Neil Shen Signed-off-by: 3AceShowHand Co-authored-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/cdc/src/endpoint.rs | 25 ++++++++++-- components/cdc/src/metrics.rs | 6 +++ components/cdc/src/observer.rs | 66 ++++++++++++++++++++++++++++--- components/cdc/tests/mod.rs | 7 +++- components/server/src/server.rs | 11 +++--- components/server/src/server2.rs | 16 ++++---- components/txn_types/src/types.rs | 9 +++++ 7 files changed, 116 insertions(+), 24 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 972be77067e..a860d3d1260 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -985,6 +985,8 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, old_value_cb: OldValueCallback) { fail_point!("cdc_before_handle_multi_batch", |_| {}); + let size = multi.iter().map(|b| b.size()).sum(); + self.sink_memory_quota.free(size); let mut statistics = Statistics::default(); for batch in multi { let region_id = batch.region_id; @@ -1273,9 +1275,11 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable cb(); } Task::TxnExtra(txn_extra) => { + let size = txn_extra.size(); for (k, v) in txn_extra.old_values { self.old_value_cache.insert(k, v); } + self.sink_memory_quota.free(size); } Task::Validate(validate) => match validate { Validate::Region(region_id, validate) => { @@ -1334,16 +1338,27 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable pub struct CdcTxnExtraScheduler { scheduler: Scheduler, + memory_quota: Arc, } impl CdcTxnExtraScheduler { - pub fn new(scheduler: Scheduler) -> CdcTxnExtraScheduler { - CdcTxnExtraScheduler { scheduler } + pub fn new(scheduler: Scheduler, memory_quota: Arc) -> CdcTxnExtraScheduler { + CdcTxnExtraScheduler { + scheduler, + memory_quota, + } } } impl TxnExtraScheduler for CdcTxnExtraScheduler { fn schedule(&self, txn_extra: TxnExtra) { + let size = txn_extra.size(); + if let Err(e) = self.memory_quota.alloc(size) { + CDC_DROP_TXN_EXTRA_TASKS_COUNT.inc(); + debug!("cdc schedule txn extra failed on alloc memory quota"; + "in_use" => self.memory_quota.in_use(), "err" => ?e); + return; + } if let Err(e) = self.scheduler.schedule(Task::TxnExtra(txn_extra)) { error!("cdc schedule txn extra failed"; "err" => ?e); } @@ -1477,6 +1492,8 @@ mod tests { region_read_progress, store_resolver_gc_interval, ); + + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); let ep = Endpoint::new( DEFAULT_CLUSTER_ID, cfg, @@ -1493,12 +1510,12 @@ mod tests { .kv_engine() .unwrap() })), - CdcObserver::new(task_sched), + CdcObserver::new(task_sched, memory_quota.clone()), Arc::new(StdMutex::new(store_meta)), ConcurrencyManager::new(1.into()), env, security_mgr, - Arc::new(MemoryQuota::new(usize::MAX)), + memory_quota, causal_ts_provider, ); diff --git a/components/cdc/src/metrics.rs b/components/cdc/src/metrics.rs index 4ac38c3c0bc..de13f29fa44 100644 --- a/components/cdc/src/metrics.rs +++ b/components/cdc/src/metrics.rs @@ -102,6 +102,12 @@ lazy_static! { "Total fetched bytes of CDC incremental scan" ) .unwrap(); + + pub static ref CDC_DROP_TXN_EXTRA_TASKS_COUNT:IntCounter = register_int_counter!( + "tikv_cdc_drop_txn_extra_task_count", + "Total count of dropped txn extra tasks" + ).unwrap(); + pub static ref CDC_SCAN_TASKS: IntGaugeVec = register_int_gauge_vec!( "tikv_cdc_scan_tasks", "Total number of CDC incremental scan tasks", diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index fda4a456217..de898a8ae72 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -9,7 +9,7 @@ use kvproto::metapb::{Peer, Region}; use raft::StateRole; use raftstore::{coprocessor::*, store::RegionSnapshot, Error as RaftStoreError}; use tikv::storage::Statistics; -use tikv_util::{error, warn, worker::Scheduler}; +use tikv_util::{error, memory::MemoryQuota, warn, worker::Scheduler}; use crate::{ endpoint::{Deregister, Task}, @@ -25,6 +25,7 @@ use crate::{ #[derive(Clone)] pub struct CdcObserver { sched: Scheduler, + memory_quota: Arc, // A shared registry for managing observed regions. // TODO: it may become a bottleneck, find a better way to manage the registry. observe_regions: Arc>>, @@ -35,9 +36,10 @@ impl CdcObserver { /// /// Events are strong ordered, so `sched` must be implemented as /// a FIFO queue. - pub fn new(sched: Scheduler) -> CdcObserver { + pub fn new(sched: Scheduler, memory_quota: Arc) -> CdcObserver { CdcObserver { sched, + memory_quota, observe_regions: Arc::default(), } } @@ -119,8 +121,6 @@ impl CmdObserver for CdcObserver { let mut region = Region::default(); region.mut_peers().push(Peer::default()); // Create a snapshot here for preventing the old value was GC-ed. - // TODO: only need it after enabling old value, may add a flag to indicate - // whether to get it. let snapshot = RegionSnapshot::from_snapshot(Arc::new(engine.snapshot()), Arc::new(region)); let get_old_value = move |key, query_ts, @@ -128,6 +128,9 @@ impl CmdObserver for CdcObserver { statistics: &mut Statistics| { old_value::get_old_value(&snapshot, key, query_ts, old_value_cache, statistics) }; + + let size = cmd_batches.iter().map(|b| b.size()).sum(); + self.memory_quota.alloc_force(size); if let Err(e) = self.sched.schedule(Task::MultiBatch { multi: cmd_batches, old_value_cb: Box::new(get_old_value), @@ -209,14 +212,17 @@ mod tests { use kvproto::metapb::Region; use raftstore::coprocessor::RoleChange; use tikv::storage::kv::TestEngineBuilder; - use tikv_util::store::new_peer; + use tikv_util::{store::new_peer, worker::dummy_scheduler}; + use txn_types::{TxnExtra, TxnExtraScheduler}; use super::*; + use crate::CdcTxnExtraScheduler; #[test] fn test_register_and_deregister() { let (scheduler, mut rx) = tikv_util::worker::dummy_scheduler(); - let observer = CdcObserver::new(scheduler); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); + let observer = CdcObserver::new(scheduler, memory_quota.clone()); let observe_info = CmdObserveInfo::from_handle( ObserveHandle::new(), ObserveHandle::new(), @@ -226,12 +232,14 @@ mod tests { let mut cb = CmdBatch::new(&observe_info, 0); cb.push(&observe_info, 0, Cmd::default()); + let size = cb.size(); >::on_flush_applied_cmd_batch( &observer, cb.level, &mut vec![cb], &engine, ); + assert_eq!(memory_quota.in_use(), size); match rx.recv_timeout(Duration::from_millis(10)).unwrap().unwrap() { Task::MultiBatch { multi, .. } => { assert_eq!(multi.len(), 1); @@ -347,4 +355,50 @@ mod tests { observer.on_role_change(&mut ctx, &RoleChange::new_for_test(StateRole::Follower)); rx.recv_timeout(Duration::from_millis(10)).unwrap_err(); } + + #[test] + fn test_txn_extra_dropped_since_exceed_memory_quota() { + let memory_quota = Arc::new(MemoryQuota::new(10)); + let (task_sched, mut task_rx) = dummy_scheduler(); + let observer = CdcObserver::new(task_sched.clone(), memory_quota.clone()); + let txn_extra_scheduler = + CdcTxnExtraScheduler::new(task_sched.clone(), memory_quota.clone()); + + let observe_info = CmdObserveInfo::from_handle( + ObserveHandle::new(), + ObserveHandle::new(), + ObserveHandle::new(), + ); + let mut cb = CmdBatch::new(&observe_info, 0); + cb.push(&observe_info, 0, Cmd::default()); + + let engine = TestEngineBuilder::new().build().unwrap().get_rocksdb(); + >::on_flush_applied_cmd_batch( + &observer, + cb.level, + &mut vec![cb], + &engine, + ); + + txn_extra_scheduler.schedule(TxnExtra { + old_values: Default::default(), + one_pc: false, + allowed_in_flashback: false, + }); + + match task_rx + .recv_timeout(Duration::from_millis(10)) + .unwrap() + .unwrap() + { + Task::MultiBatch { multi, .. } => { + assert_eq!(multi.len(), 1); + assert_eq!(multi[0].len(), 1); + } + _ => panic!("unexpected task"), + }; + + let err = task_rx.recv_timeout(Duration::from_millis(10)).unwrap_err(); + assert_eq!(err, std::sync::mpsc::RecvTimeoutError::Timeout); + } } diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 1faea9391a8..c95c8ca9585 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -193,10 +193,13 @@ impl TestSuiteBuilder { })); sim.txn_extra_schedulers.insert( id, - Arc::new(cdc::CdcTxnExtraScheduler::new(worker.scheduler().clone())), + Arc::new(cdc::CdcTxnExtraScheduler::new( + worker.scheduler().clone(), + memory_quota.clone(), + )), ); let scheduler = worker.scheduler(); - let cdc_ob = cdc::CdcObserver::new(scheduler.clone()); + let cdc_ob = cdc::CdcObserver::new(scheduler.clone(), memory_quota.clone()); obs.insert(id, cdc_ob.clone()); sim.coprocessor_hosts.entry(id).or_default().push(Box::new( move |host: &mut CoprocessorHost| { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index a083ce5769a..da6a7a85b76 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -541,9 +541,13 @@ where cfg_controller.register(tikv::config::Module::Memory, Box::new(MemoryConfigManager)); // Create cdc. + let cdc_memory_quota = Arc::new(MemoryQuota::new( + self.core.config.cdc.sink_memory_quota.0 as _, + )); let mut cdc_worker = Box::new(LazyWorker::new("cdc")); let cdc_scheduler = cdc_worker.scheduler(); - let txn_extra_scheduler = cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone()); + let txn_extra_scheduler = + cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone(), cdc_memory_quota.clone()); self.engines .as_mut() @@ -770,7 +774,7 @@ where } // Register cdc. - let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); + let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone(), cdc_memory_quota.clone()); cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); // Register cdc config manager. cfg_controller.register( @@ -1068,9 +1072,6 @@ where } // Start CDC. - let cdc_memory_quota = Arc::new(MemoryQuota::new( - self.core.config.cdc.sink_memory_quota.0 as _, - )); let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 74a9ffaffa9..9e76ff6dba7 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -656,8 +656,10 @@ where // Create cdc worker. let mut cdc_worker = self.cdc_worker.take().unwrap(); let cdc_scheduler = self.cdc_scheduler.clone().unwrap(); + + let cdc_memory_quota = self.cdc_memory_quota.as_ref().unwrap(); // Register cdc observer. - let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); + let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone(), cdc_memory_quota.clone()); cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); // Register cdc config manager. cfg_controller.register( @@ -665,9 +667,6 @@ where Box::new(CdcConfigManager(cdc_scheduler.clone())), ); // Start cdc endpoint. - let cdc_memory_quota = Arc::new(MemoryQuota::new( - self.core.config.cdc.sink_memory_quota.0 as _, - )); let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, @@ -688,7 +687,6 @@ where ); cdc_worker.start_with_timer(cdc_endpoint); self.core.to_stop.push(cdc_worker); - self.cdc_memory_quota = Some(cdc_memory_quota); // Create resolved ts. if self.core.config.resolved_ts.enable { @@ -1548,8 +1546,11 @@ impl TikvServer { let cdc_worker = Box::new(LazyWorker::new("cdc")); let cdc_scheduler = cdc_worker.scheduler(); - let txn_extra_scheduler = cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone()); - + let cdc_memory_quota = Arc::new(MemoryQuota::new( + self.core.config.cdc.sink_memory_quota.0 as _, + )); + let txn_extra_scheduler = + cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone(), cdc_memory_quota.clone()); let mut engine = RaftKv2::new(router.clone(), region_info_accessor.region_leaders()); // Set txn extra scheduler immediately to make sure every clone has the // scheduler. @@ -1565,6 +1566,7 @@ impl TikvServer { self.region_info_accessor = Some(region_info_accessor); self.cdc_worker = Some(cdc_worker); self.cdc_scheduler = Some(cdc_scheduler); + self.cdc_memory_quota = Some(cdc_memory_quota); engines_info } diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 0deece96e3e..e2cc91c0cd8 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -567,6 +567,15 @@ impl TxnExtra { pub fn is_empty(&self) -> bool { self.old_values.is_empty() } + + pub fn size(&self) -> usize { + let mut result = 0; + for (key, value) in &self.old_values { + result += key.len(); + result += value.0.size(); + } + result + std::mem::size_of::() + } } pub trait TxnExtraScheduler: Send + Sync { From 5182652c0fb58c8a1a284bf74aa3cb5c7d8d6495 Mon Sep 17 00:00:00 2001 From: hhwyt Date: Wed, 6 Nov 2024 20:58:33 +0800 Subject: [PATCH 16/86] raftstore: allow leader transfer if conf change applied on transferee (#17643) close tikv/tikv#17363 Allow leader transfer if conf change applied on transferee. Signed-off-by: hhwyt Co-authored-by: Bisheng Huang --- components/raftstore/src/store/fsm/apply.rs | 16 + components/raftstore/src/store/fsm/peer.rs | 23 +- components/raftstore/src/store/peer.rs | 58 +++- components/raftstore/src/store/util.rs | 5 +- components/test_pd_client/src/pd.rs | 14 +- .../failpoints/cases/test_transfer_leader.rs | 287 +++++++++++++++++- 6 files changed, 377 insertions(+), 26 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 03741bebc11..19aeb627f42 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -2332,6 +2332,17 @@ where request: &AdminRequest, ) -> Result<(AdminResponse, ApplyResult)> { assert!(request.has_change_peer_v2()); + fail_point!( + "apply_on_conf_change_1_1", + self.id() == 1 && self.region_id() == 1, + |_| unreachable!() + ); + fail_point!( + "apply_on_conf_change_3_1", + self.id() == 3 && self.region_id() == 1, + |_| unreachable!() + ); + let changes = request.get_change_peer_v2().get_change_peers().to_vec(); info!( @@ -2585,6 +2596,11 @@ where req: &AdminRequest, ) -> Result<(AdminResponse, ApplyResult)> { fail_point!("apply_before_split"); + fail_point!( + "apply_before_split_1_1", + self.id() == 1 && self.region_id() == 1, + |_| { unreachable!() } + ); fail_point!( "apply_before_split_1_3", self.id() == 3 && self.region_id() == 1, diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 1c06d9f1496..ef657ff738f 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3700,6 +3700,9 @@ where } } + // NOTE: This method is used by both the leader and the follower. + // Both the request and response for transfer-leader share the MessageType + // `MsgTransferLeader`. fn on_transfer_leader_msg(&mut self, msg: &eraftpb::Message, peer_disk_usage: DiskUsage) { // log_term is set by original leader, represents the term last log is written // in, which should be equal to the original leader's term. @@ -3736,6 +3739,7 @@ where "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), "to" => ?from, + "last_index" => self.fsm.peer.get_store().last_index(), ); let mut cmd = new_admin_request( self.fsm.peer.region().get_id(), @@ -3795,6 +3799,11 @@ where return false; } + fail_point!("propose_locks_before_transfer_leader", |_| { + pessimistic_locks.status = LocksStatus::TransferringLeader; + true + }); + // If it is not writable, it's probably because it's a retried TransferLeader // and the locks have been proposed. But we still need to return true to // propose another TransferLeader command. Otherwise, some write requests that @@ -5835,7 +5844,7 @@ where let is_admin_request = msg.has_admin_request(); info_or_debug!( is_admin_request; - "failed to propose"; + "failed to pre propose"; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), "message" => ?msg, @@ -5868,8 +5877,20 @@ where let mut resp = RaftCmdResponse::default(); let term = self.fsm.peer.term(); bind_term(&mut resp, term); + // Save important details from `msg` so we can log them later if the proposal + // fails. This is a workaround because `msg` gets moved when proposed. + let is_admin_request = msg.has_admin_request(); + let admin_cmd_type = is_admin_request.then(|| msg.get_admin_request().get_cmd_type()); if self.fsm.peer.propose(self.ctx, cb, msg, resp, diskfullopt) { self.fsm.has_ready = true; + } else { + info_or_debug!( + is_admin_request; + "failed to propose"; + "region_id" => self.region_id(), + "peer_id" => self.fsm.peer_id(), + "admin_cmd_type" => ?admin_cmd_type, + ); } if self.fsm.peer.should_wake_up { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 07595d4a620..208a5587b9d 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -58,7 +58,7 @@ use tikv_util::{ box_err, codec::number::decode_u64, debug, error, info, - store::find_peer_by_id, + store::{find_peer_by_id, is_learner}, sys::disk::DiskUsage, time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant, InstantExt}, warn, @@ -3921,17 +3921,6 @@ where extra_msgs: Vec, ctx: &mut PollContext, ) -> bool { - // Checks if safe to transfer leader. - if self.raft_group.raft.has_pending_conf() { - info!( - "reject transfer leader due to pending conf change"; - "region_id" => self.region_id, - "peer_id" => self.peer.get_id(), - "peer" => ?peer, - ); - return false; - } - // Broadcast heartbeat to make sure followers commit the entries immediately. // It's only necessary to ping the target peer, but ping all for simplicity. self.raft_group.ping(); @@ -3987,12 +3976,36 @@ where } } - if self.raft_group.raft.has_pending_conf() - || self.raft_group.raft.pending_conf_index > index - { + // It's safe to transfer leader to a target peer that has already applied the + // configuration change, even if the current leader has not yet applied + // it. For more details, refer to the issue at: + // https://github.com/tikv/tikv/issues/17363#issuecomment-2404227253. + if self.raft_group.raft.pending_conf_index > index { + info!( + "not ready to transfer leader, transferee has an unapplied conf change"; + "region_id" => self.region_id, + "transferee_peer_id" => peer_id, + "pending_conf_index" => self.raft_group.raft.pending_conf_index, + "applied_index" => self.raft_group.raft.raft_log.applied, + "transferee_applied_index" => index + ); return Some("pending conf change"); } + if self.raft_group.raft.has_pending_conf() { + info!( + "transfer leader with pending conf on current leader"; + "region_id" => self.region_id, + "transferee_peer_id" => peer_id, + "transferee_applied_index" => index, + "pending_conf_index" => self.raft_group.raft.pending_conf_index, + "last_index" => self.get_store().last_index(), + "persist_index" => self.raft_group.raft.raft_log.persisted, + "committed_index" => self.raft_group.raft.raft_log.committed, + "applied_index" => self.raft_group.raft.raft_log.applied, + ); + } + let last_index = self.get_store().last_index(); if last_index >= index + ctx.cfg.leader_transfer_max_log_lag { return Some("log gap"); @@ -4686,7 +4699,7 @@ where ) -> bool { let pending_snapshot = self.is_handling_snapshot() || self.has_pending_snapshot(); // shouldn't transfer leader to witness peer or non-witness waiting data - if self.is_witness() || self.wait_data + if self.is_witness() || is_learner(&self.peer) || self.wait_data || pending_snapshot || msg.get_from() != self.leader_id() // Transfer leader to node with disk full will lead to write availablity downback. @@ -4703,6 +4716,7 @@ where "pending_snapshot" => pending_snapshot, "disk_usage" => ?ctx.self_disk_usage, "is_witness" => self.is_witness(), + "is_learner" => is_learner(&self.peer), "wait_data" => self.wait_data, ); return true; @@ -4771,6 +4785,18 @@ where &mut self, reply_cmd: bool, // whether it is a reply to a TransferLeader command ) { + info!( + "ack transfer leader"; + "region_id" => self.region_id, + "from_peer" => self.peer_id(), + "to_peer" => self.leader_id(), + "reply_cmd" => reply_cmd, + "last_index" => self.get_store().last_index(), + "persist_index" => self.raft_group.raft.raft_log.persisted, + "committed_index" => self.raft_group.raft.raft_log.committed, + "applied_index" => self.raft_group.raft.raft_log.applied, + ); + let mut msg = eraftpb::Message::new(); msg.set_from(self.peer_id()); msg.set_to(self.leader_id()); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index c0e6f299d96..10ac229fa16 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -240,7 +240,7 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat AdminCmdType::CommitMerge => AdminCmdEpochState::new(true, true, true, false), AdminCmdType::RollbackMerge => AdminCmdEpochState::new(true, true, true, false), // Transfer leader - AdminCmdType::TransferLeader => AdminCmdEpochState::new(true, true, false, false), + AdminCmdType::TransferLeader => AdminCmdEpochState::new(false, false, false, false), // PrepareFlashback could be committed successfully before a split being applied, so we need // to check the epoch to make sure it's sent to a correct key range. // NOTICE: FinishFlashback will never meet the epoch not match error since any scheduling @@ -2325,6 +2325,7 @@ mod tests { AdminCmdType::InvalidAdmin, AdminCmdType::ComputeHash, AdminCmdType::VerifyHash, + AdminCmdType::TransferLeader, ] { let mut admin = AdminRequest::default(); admin.set_cmd_type(*ty); @@ -2346,7 +2347,6 @@ mod tests { AdminCmdType::PrepareMerge, AdminCmdType::CommitMerge, AdminCmdType::RollbackMerge, - AdminCmdType::TransferLeader, ] { let mut admin = AdminRequest::default(); admin.set_cmd_type(*ty); @@ -2382,7 +2382,6 @@ mod tests { AdminCmdType::PrepareMerge, AdminCmdType::CommitMerge, AdminCmdType::RollbackMerge, - AdminCmdType::TransferLeader, ] { let mut admin = AdminRequest::default(); admin.set_cmd_type(*ty); diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index ec7b43f6887..a4ee044e42a 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1383,15 +1383,21 @@ impl TestPdClient { pub fn region_leader_must_be(&self, region_id: u64, peer: metapb::Peer) { for _ in 0..500 { sleep_ms(10); - if let Some(p) = self.cluster.rl().leaders.get(®ion_id) { - if *p == peer { - return; - } + if self.check_region_leader(region_id, peer.clone()) { + return; } } panic!("region {} must have leader: {:?}", region_id, peer); } + pub fn check_region_leader(&self, region_id: u64, peer: metapb::Peer) -> bool { + self.cluster + .rl() + .leaders + .get(®ion_id) + .map_or(false, |p| *p == peer) + } + // check whether region is split by split_key or not. pub fn check_split(&self, region: &metapb::Region, split_key: &[u8]) -> bool { // E.g, 1 [a, c) -> 1 [a, b) + 2 [b, c) diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 3139ecf06b5..2324e64e382 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -13,9 +13,10 @@ use crossbeam::channel; use engine_traits::CF_LOCK; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; -use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; +use kvproto::{kvrpcpb::*, metapb::PeerRole, pdpb, tikvpb::TikvClient}; use pd_client::PdClient; -use raft::eraftpb::MessageType; +use raft::eraftpb::{ConfChangeType, MessageType}; +use raftstore::store::Callback; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::Snapshot; @@ -680,3 +681,285 @@ fn test_check_long_uncommitted_proposals_after_became_leader() { rx.recv_timeout(2 * cluster.cfg.raft_store.long_uncommitted_base_threshold.0) .unwrap(); } + +// This test simulates a scenario where a configuration change has been applied +// on the transferee, allowing a leader transfer to that peer even if the +// change hasn't been applied on the current leader. +// +// The setup involves a 4-node cluster where peer-1 starts as the leader. A +// configuration change is initiated to remove peer-2. This change commits +// successfully but only applies on peer-2 and peer-4. +// +// The expected result for leader transfer is: +// - It will fail to peer-2 because it has been removed. +// - It will fail to peer-3 because it has unapplied configuration change. +// - It will succeed to peer-4 because it has already applied the +// configuration change. +#[test] +fn test_when_applied_conf_change_on_transferee() { + let mut cluster = new_server_cluster(0, 4); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + pd_client.must_add_peer(region_id, new_peer(4, 4)); + + cluster.must_put(b"k1", b"v1"); + + fail::cfg("apply_on_conf_change_1_1", "pause").unwrap(); + fail::cfg("apply_on_conf_change_3_1", "pause").unwrap(); + + pd_client.remove_peer(region_id, new_peer(2, 2)); + sleep_ms(300); + // Peer 2 still exists since the leader hasn't applied the ConfChange + // yet. + pd_client.must_have_peer(region_id, new_peer(2, 2)); + + // Use async_put for insertion here to avoid timeout errors, as synchronize put + // would hang due to the leader's apply process being paused. + let _ = cluster.async_put(b"k2", b"v2").unwrap(); + + pd_client.transfer_leader(region_id, new_peer(2, 2), vec![]); + sleep_ms(300); + assert_eq!( + pd_client.check_region_leader(region_id, new_peer(2, 2)), + false + ); + + pd_client.transfer_leader(region_id, new_peer(3, 3), vec![]); + sleep_ms(300); + assert_eq!( + pd_client.check_region_leader(region_id, new_peer(3, 3)), + false + ); + + pd_client.transfer_leader(region_id, new_peer(4, 4), vec![]); + pd_client.region_leader_must_be(region_id, new_peer(4, 4)); + + // Verify the data completeness on the new leader. + must_get_equal(&cluster.get_engine(4), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(4), b"k2", b"v2"); + + pd_client.must_none_peer(region_id, new_peer(2, 2)); +} + +// This test verifies that a leader transfer is rejected when the transferee +// has been demoted to a learner but the leader has not yet applied this +// configuration change. +#[test] +fn test_when_applied_conf_change_on_learner_transferee() { + let mut cluster = new_server_cluster(0, 3); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + pd_client.region_leader_must_be(region_id, new_peer(1, 1)); + + fail::cfg("apply_on_conf_change_1_1", "pause").unwrap(); + + // Demote peer-2 to be a learner. + pd_client.joint_confchange( + region_id, + vec![(ConfChangeType::AddLearnerNode, new_learner_peer(2, 2))], + ); + sleep_ms(300); + + pd_client.transfer_leader(region_id, new_peer(2, 2), vec![]); + sleep_ms(300); + assert_eq!( + pd_client.check_region_leader(region_id, new_peer(2, 2)), + false + ); + + pd_client.transfer_leader(region_id, new_peer(3, 3), vec![]); + pd_client.region_leader_must_be(region_id, new_peer(3, 3)); + let region = block_on(pd_client.get_region_by_id(region_id)) + .unwrap() + .unwrap(); + assert_eq!(region.get_peers()[1].get_role(), PeerRole::Learner); +} + +// This test verifies that a leader transfer is allowed when the transferee +// has applied a conf change but the leader has not yet applied. +#[test] +fn test_when_applied_conf_change_on_transferee_pessimistic_lock() { + let mut cluster = new_server_cluster(0, 4); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + pd_client.region_leader_must_be(region_id, new_peer(1, 1)); + + fail::cfg("apply_on_conf_change_1_1", "pause").unwrap(); + fail::cfg("propose_locks_before_transfer_leader", "return").unwrap(); + + pd_client.remove_peer(region_id, new_peer(2, 2)); + sleep_ms(300); + // Peer 2 still exists since the leader hasn't applied the ConfChange + // yet. + pd_client.must_have_peer(region_id, new_peer(2, 2)); + + pd_client.transfer_leader(region_id, new_peer(3, 3), vec![]); + pd_client.region_leader_must_be(region_id, new_peer(3, 3)); + + pd_client.must_none_peer(region_id, new_peer(2, 2)); +} + +// This test verifies that a leader transfer is allowed when the transferee +// has applied a region split but the leader has not yet applied. +#[test] +fn test_when_applied_region_split_on_transferee_pessimistic_lock() { + let mut cluster = new_server_cluster(0, 3); + // To enable the transferee to quickly report the split region information. + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(50); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + // Use peer_id 4 as the leader since we want to reuse the failpoint + // apply_before_split_1_3. + pd_client.transfer_leader(region_id, new_peer(3, 3), vec![]); + pd_client.region_leader_must_be(region_id, new_peer(3, 3)); + + fail::cfg("apply_before_split_1_1", "pause").unwrap(); + fail::cfg("apply_before_split_1_3", "pause").unwrap(); + fail::cfg("propose_locks_before_transfer_leader", "return").unwrap(); + + let region = pd_client.get_region(b"x1").unwrap(); + cluster.split_region(®ion, "x2".as_bytes(), Callback::None); + sleep_ms(300); + // Expect split is pending on the current leader. + assert_eq!(pd_client.get_regions_number(), 1); + + pd_client.transfer_leader(region_id, new_peer(2, 2), vec![]); + sleep_ms(300); + pd_client.region_leader_must_be(region_id, new_peer(2, 2)); + sleep_ms(300); + // TODO(hwy): We cannot enable this assertion yet since https://github.com/tikv/tikv/issues/12410. + // Expect split is finished on the new leader. + // assert_eq!(pd_client.get_regions_number(), 2); +} + +// This test verifies that a leader transfer is: +// - Not allowed for the source region when the transferee has applied a region +// commit-merge but the leader has not yet applied. +// - Allowed for the source region when the transferee has applied a region +// prepare-merge but the leader has not yet applied. +// - Allowed for the target region in both scenarios above. +#[test] +fn test_when_applied_region_merge_on_transferee_pessimistic_lock() { + let mut cluster = new_server_cluster(0, 4); + // To enable the transferee to quickly report the merged region information. + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(50); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + // Use peer_id 4 since we want to reuse the failpoint + // apply_before_commit_merge_except_1_4. + pd_client.must_add_peer(region_id, new_peer(4, 4)); + pd_client.region_leader_must_be(region_id, new_peer(1, 1)); + + let region = cluster.get_region(b"x2"); + let region_id = region.id; + pd_client.split_region(region, pdpb::CheckPolicy::Usekey, vec![b"x2".to_vec()]); + sleep_ms(300); + let left_region = cluster.get_region(b"x1"); + let right_region = cluster.get_region(b"x3"); + assert_eq!(region_id, right_region.get_id()); + let left_region_peer_on_store1 = new_peer( + left_region.get_peers()[0].store_id, + left_region.get_peers()[0].id, + ); + pd_client.region_leader_must_be(left_region.get_id(), left_region_peer_on_store1); + pd_client.region_leader_must_be(right_region.get_id(), new_peer(1, 1)); + + fail::cfg("apply_before_commit_merge_except_1_4", "pause").unwrap(); + fail::cfg("propose_locks_before_transfer_leader", "return").unwrap(); + + assert_eq!(pd_client.get_regions_number(), 2); + // Merge right to left. + pd_client.merge_region(right_region.get_id(), left_region.get_id()); + sleep_ms(300); + + pd_client.transfer_leader(right_region.get_id(), new_peer(4, 4), vec![]); + sleep_ms(300); + assert_eq!( + pd_client.check_region_leader(right_region.get_id(), new_peer(4, 4)), + false + ); + + pd_client.transfer_leader(right_region.get_id(), new_peer(2, 2), vec![]); + sleep_ms(300); + assert_eq!( + pd_client.check_region_leader(right_region.get_id(), new_peer(2, 2)), + false + ); + + assert_eq!(left_region.get_peers()[2].store_id, 4); + let left_region_peer_on_store4 = new_peer( + left_region.get_peers()[2].store_id, + left_region.get_peers()[2].id, + ); + pd_client.transfer_leader( + left_region.get_id(), + left_region_peer_on_store4.clone(), + vec![], + ); + pd_client.region_leader_must_be(left_region.get_id(), left_region_peer_on_store4); + sleep_ms(300); + + let left_region_peer_on_store2 = new_peer( + left_region.get_peers()[1].store_id, + left_region.get_peers()[1].id, + ); + pd_client.transfer_leader( + left_region.get_id(), + left_region_peer_on_store2.clone(), + vec![], + ); + pd_client.region_leader_must_be(left_region.get_id(), left_region_peer_on_store2); + sleep_ms(300); + + assert_eq!(pd_client.get_regions_number(), 1); +} + +// This test verifies that a leader transfer is allowed when the transferee +// has applied a witness switch but the leader has not yet applied. +#[test] +fn test_when_applied_witness_switch_on_transferee_pessimistic_lock() { + let mut cluster = new_server_cluster(0, 3); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + pd_client.transfer_leader(region_id, new_peer(2, 2), vec![]); + pd_client.region_leader_must_be(region_id, new_peer(2, 2)); + + // Pause applying on the current leader (peer-2). + fail::cfg("before_exec_batch_switch_witness", "pause").unwrap(); + fail::cfg("propose_locks_before_transfer_leader", "return").unwrap(); + + // Demote peer-3 to be a witness. + pd_client.switch_witnesses(region_id, vec![3], vec![true]); + sleep_ms(300); + + pd_client.transfer_leader(region_id, new_peer(3, 3), vec![]); + sleep_ms(300); + assert_eq!( + pd_client.check_region_leader(region_id, new_peer(3, 3)), + false + ); + + pd_client.transfer_leader(region_id, new_peer(1, 1), vec![]); + pd_client.region_leader_must_be(region_id, new_peer(1, 1)); + let region = block_on(pd_client.get_region_by_id(region_id)) + .unwrap() + .unwrap(); + assert!(region.get_peers()[2].is_witness); +} From 0c1d2178bbbc7da93b21094ecaedb1fcded3ef3f Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 7 Nov 2024 13:04:28 +0800 Subject: [PATCH 17/86] raft-engine: fix panics when reading entries on compacted raft logs (#17765) close tikv/tikv#17383, close tikv/tikv#17760 To address the corner case where a read thread encounters a panic due to reading with a stale index from the `Memtable` in raft-engine, which has been updated by a background thread that has already purged the stale logs. Signed-off-by: lucasliang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e494e5540cd..803c220a045 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2901,6 +2901,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.2.6" @@ -3614,9 +3620,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" dependencies = [ "cc", "libc 0.2.151", @@ -4820,7 +4826,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.4.2" -source = "git+https://github.com/tikv/raft-engine.git#cd1533d79cfea02d005b458bd70fd9675846c9c6" +source = "git+https://github.com/tikv/raft-engine.git#de1ec937529e3a88e093db0cf0d403522565fe64" dependencies = [ "byteorder", "crc32fast", @@ -4847,7 +4853,7 @@ dependencies = [ "scopeguard", "serde", "serde_repr", - "strum 0.25.0", + "strum 0.26.3", "thiserror", ] @@ -5668,9 +5674,9 @@ checksum = "f97841a747eef040fcd2e7b3b9a220a7205926e60488e673d9e4926d27772ce5" [[package]] name = "serde" -version = "1.0.193" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" dependencies = [ "serde_derive", ] @@ -5687,9 +5693,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" dependencies = [ "proc-macro2", "quote", @@ -6191,11 +6197,11 @@ dependencies = [ [[package]] name = "strum" -version = "0.25.0" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros 0.25.0", + "strum_macros 0.26.4", ] [[package]] @@ -6212,11 +6218,11 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.25.0" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9f3bd7d2e45dcc5e265fbb88d6513e4747d8ef9444cf01a533119bce28a157" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "rustversion", From 7bae899b688189caf6ca06f3af3d819e757ad26f Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 8 Nov 2024 14:59:09 +0800 Subject: [PATCH 18/86] in_memory_engine: fix the incorrect state of in_gc (#17789) close tikv/tikv#17788 Avoid can `on_gc_finished` when a new GC task is not run because there is another unfinished task. Signed-off-by: glorv --- components/in_memory_engine/src/background.rs | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/components/in_memory_engine/src/background.rs b/components/in_memory_engine/src/background.rs index 65173d40f5f..fa184000b86 100644 --- a/components/in_memory_engine/src/background.rs +++ b/components/in_memory_engine/src/background.rs @@ -458,11 +458,6 @@ impl BackgroundRunnerCore { /// Returns empty vector if there are no regions cached or the previous gc /// is not finished. fn regions_for_gc(&self) -> Vec { - // another gc task is running, skipped. - if !self.engine.region_manager().try_set_regions_in_gc(true) { - return vec![]; - } - let regions_map = self.engine.region_manager().regions_map.read(); regions_map .regions() @@ -1100,6 +1095,10 @@ impl Runnable for BackgroundRunner { "oldest_sequence" => seqno, ); let core = self.core.clone(); + // another gc task is running, skipped. + if !core.engine.region_manager().try_set_regions_in_gc(true) { + return; + } let regions = core.regions_for_gc(); if !regions.is_empty() { let f = async move { @@ -2863,15 +2862,37 @@ pub mod tests { Arc::new(MockPdClient {}), None, ); + assert!( + runner + .core + .engine + .region_manager() + .try_set_regions_in_gc(true) + ); let regions = runner.core.regions_for_gc(); assert_eq!(2, regions.len()); - // until the previous gc finished, node regions will be returned - assert!(runner.core.regions_for_gc().is_empty()); + // try run another gc task will return false. + assert!( + !runner + .core + .engine + .region_manager() + .try_set_regions_in_gc(true) + ); + // finished the current gc task. runner.core.on_gc_finished(); + assert!( + runner + .core + .engine + .region_manager() + .try_set_regions_in_gc(true) + ); let regions = runner.core.regions_for_gc(); assert_eq!(2, regions.len()); + runner.core.on_gc_finished(); } #[derive(Default)] From a210208c42af4001a923127ce30bfdec069fbd22 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Sat, 9 Nov 2024 20:28:47 +0800 Subject: [PATCH 19/86] In-memory Engine: Adjust memory settings based on available memory (#17515) ref tikv/tikv#16141 This commit adjusts the following in-memory-engine defaults: * `capacity`: Now IME uses 10% of the block cache and takes an equal amount of memory from the system. This is based on tests showing that the IME rarely fills its full capacity. * `mvcc_amplification_threshold`: Change from 100 to 10 which benefit common workloads like TPCc (50 warehouse), saving approximately 20% of unified read pool CPU usage. Also, it addresses two security issues: * Remove ignore of RUSTSEC-2024-0006, as vulnerable shlex 0.1.1 is removed by #13814 * Upgrade hashbrown from yanked 0.15.0 to 0.15.1 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 6 +- components/in_memory_engine/src/config.rs | 162 ++++++++++++++++-- .../in_memory_engine/src/region_stats.rs | 2 +- deny.toml | 4 - src/config/mod.rs | 6 +- 5 files changed, 152 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 803c220a045..9774f4cdc84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2863,9 +2863,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" dependencies = [ "allocator-api2", "equivalent", @@ -3615,7 +3615,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.15.0", + "hashbrown 0.15.1", ] [[package]] diff --git a/components/in_memory_engine/src/config.rs b/components/in_memory_engine/src/config.rs index 93cd01ba99b..57ba6a548cd 100644 --- a/components/in_memory_engine/src/config.rs +++ b/components/in_memory_engine/src/config.rs @@ -1,10 +1,11 @@ use std::{error::Error, sync::Arc, time::Duration}; use online_config::{ConfigChange, ConfigManager, OnlineConfig}; +use raftstore::coprocessor::config::SPLIT_SIZE; use serde::{Deserialize, Serialize}; use tikv_util::{ config::{ReadableDuration, ReadableSize, VersionTrack}, - info, + info, warn, }; const DEFAULT_GC_RUN_INTERVAL: Duration = Duration::from_secs(180); @@ -23,8 +24,20 @@ const MAX_WRITE_KV_SPEED: u64 = 20 * 1024 * 1024; // on `capacity`. const MAX_RESERVED_DURATION_FOR_WRITE: u64 = 10; // Regions' mvcc read amplification statistics is updated every 1min, so we set -// the minimal load&evcit check duration to 2min. +// the minimal load&evict check duration to 2min. const MIN_LOAD_EVICT_INTERVAL: Duration = Duration::from_secs(120); +// The default threshold for mvcc amplification. Test shows setting it to 10 +// can benefit common workloads, eg, TPCc (50 warehouse), saving about 20% of +// unified read pool CPU usage. +const DEFAULT_MVCC_AMPLIFICATION_THRESHOLD: usize = 10; +// The minimum required capacity, 2 times region split size. +const MIN_CAPACITY: u64 = 2 * SPLIT_SIZE.0; +// The maximum capacity, 5GB should be large enough. +const MAX_CAPACITY: u64 = ReadableSize::gb(5).0; +// By default, the IME uses 10% of the block cache and takes an equal amount of +// memory from the system. This is based on tests showing that the IME rarely +// fills its full capacity. +const DEFAULT_CAPACITY_FROM_BLOCK_CACHE_RATIO: f64 = 0.1; #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, OnlineConfig)] #[serde(default, rename_all = "kebab-case")] @@ -76,7 +89,7 @@ impl Default for InMemoryEngineConfig { load_evict_interval: ReadableDuration(Duration::from_secs(300)), evict_threshold: None, capacity: None, - mvcc_amplification_threshold: 100, + mvcc_amplification_threshold: DEFAULT_MVCC_AMPLIFICATION_THRESHOLD, cross_check_interval: ReadableDuration(Duration::from_secs(0)), expected_region_size: raftstore::coprocessor::config::SPLIT_SIZE, } @@ -84,13 +97,37 @@ impl Default for InMemoryEngineConfig { } impl InMemoryEngineConfig { - pub fn validate(&mut self, region_split_size: ReadableSize) -> Result<(), Box> { + pub fn validate( + &mut self, + block_cache_capacity: &mut u64, + region_split_size: ReadableSize, + ) -> Result<(), Box> { if !self.enable { return Ok(()); } + let capacity = + (*block_cache_capacity as f64 * DEFAULT_CAPACITY_FROM_BLOCK_CACHE_RATIO * 2.0) as u64; + if (capacity < MIN_CAPACITY || capacity <= region_split_size.0) && self.capacity.is_none() { + self.enable = false; + warn!( + "in-memory engine is disabled because capacity {} is too small, \ + try set `capacity` manually and make sure it's larger than {} \ + and region size {}", + ReadableSize(capacity), ReadableSize(MIN_CAPACITY), + region_split_size; + ); + return Ok(()); + } if self.capacity.is_none() { - return Err("evict-threshold or capacity not set".into()); + let capacity = std::cmp::min(MAX_CAPACITY, capacity); + self.capacity = Some(ReadableSize(capacity)); + *block_cache_capacity -= capacity / 2; + info!( + "in-memory engine capacity is set to {}, block cache capacity is set to {}", + self.capacity.as_ref().unwrap(), + ReadableSize(*block_cache_capacity), + ); } if self.evict_threshold.is_none() { @@ -168,7 +205,7 @@ impl InMemoryEngineConfig { evict_threshold: Some(ReadableSize::gb(1)), capacity: Some(ReadableSize::gb(2)), expected_region_size: ReadableSize::mb(20), - mvcc_amplification_threshold: 10, + mvcc_amplification_threshold: DEFAULT_MVCC_AMPLIFICATION_THRESHOLD, cross_check_interval: ReadableDuration(Duration::from_secs(0)), } } @@ -211,34 +248,55 @@ mod tests { use super::*; const DEFAULT_REGION_SPLIT_SIZE: ReadableSize = ReadableSize::mb(256); + const SMALL_ENOUGH_BLOCK_CACHE_CAPACITY: u64 = (MIN_CAPACITY + 1) / 2; + const LARGE_ENOUGH_BLOCK_CACHE_CAPACITY: u64 = + (MIN_CAPACITY + 1) * (1.0 / DEFAULT_CAPACITY_FROM_BLOCK_CACHE_RATIO) as u64; #[test] fn test_validate() { + // By default IME is disabled. let mut cfg = InMemoryEngineConfig::default(); - cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + let mut block_cache_capacity = 0; + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); + assert!(!cfg.enable); + // Correctly configured IME should pass validation. + let mut cfg = InMemoryEngineConfig::default(); cfg.enable = true; - assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); - cfg.capacity = Some(ReadableSize::gb(2)); cfg.evict_threshold = Some(ReadableSize::gb(1)); cfg.stop_load_threshold = Some(ReadableSize::gb(1)); - cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); // Error if less than MIN_GC_RUN_INTERVAL. cfg.gc_run_interval = ReadableDuration(Duration::ZERO); - assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); + assert!( + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .is_err() + ); cfg.gc_run_interval = ReadableDuration(Duration::from_secs(9)); - assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); + assert!( + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .is_err() + ); // Error if larger than MIN_GC_RUN_INTERVAL. cfg.gc_run_interval = ReadableDuration(Duration::from_secs(601)); - assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); + assert!( + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .is_err() + ); cfg.gc_run_interval = ReadableDuration(Duration::MAX); - assert!(cfg.validate(DEFAULT_REGION_SPLIT_SIZE).is_err()); + assert!( + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .is_err() + ); cfg.gc_run_interval = ReadableDuration(Duration::from_secs(180)); - cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); #[track_caller] fn check_delta( @@ -255,7 +313,8 @@ mod tests { let mut cfg = InMemoryEngineConfig::default(); cfg.enable = true; cfg.capacity = Some(ReadableSize::gb(1)); - cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); check_delta( &cfg, ReadableSize::gb(1) / 10, @@ -265,13 +324,80 @@ mod tests { let mut cfg = InMemoryEngineConfig::default(); cfg.enable = true; cfg.capacity = Some(ReadableSize::gb(5)); - cfg.validate(DEFAULT_REGION_SPLIT_SIZE).unwrap(); + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); check_delta(&cfg, ReadableSize::mb(200), ReadableSize::mb(712)); let mut cfg = InMemoryEngineConfig::default(); cfg.enable = true; cfg.capacity = Some(ReadableSize::gb(5)); - cfg.validate(ReadableSize::mb(96)).unwrap(); + cfg.validate(&mut block_cache_capacity, ReadableSize::mb(96)) + .unwrap(); check_delta(&cfg, ReadableSize::mb(200), ReadableSize::mb(392)); + + // Small capacity disables IME. + let mut block_cache_capacity = SMALL_ENOUGH_BLOCK_CACHE_CAPACITY; + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + cfg.validate(&mut block_cache_capacity, ReadableSize::mb(96)) + .unwrap(); + assert!(!cfg.enable); + assert_eq!(block_cache_capacity, SMALL_ENOUGH_BLOCK_CACHE_CAPACITY); + // ... unless capacity is set manually. + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + cfg.capacity = Some(ReadableSize(MIN_CAPACITY / 2 - 2)); + cfg.validate(&mut block_cache_capacity, ReadableSize::mb(96)) + .unwrap(); + assert!(cfg.enable); + assert_eq!(cfg.capacity.unwrap().0, MIN_CAPACITY / 2 - 2); + // block_cache_capacity should not be reduced by a manual set capacity. + assert_eq!(block_cache_capacity, SMALL_ENOUGH_BLOCK_CACHE_CAPACITY); + + // Validate will automatically set capacity if not set. + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + let mut block_cache_capacity = LARGE_ENOUGH_BLOCK_CACHE_CAPACITY; + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); + assert!(cfg.capacity.is_some(), "{:?}", cfg); + assert!(cfg.evict_threshold.is_some(), "{:?}", cfg); + assert!(cfg.stop_load_threshold.is_some(), "{:?}", cfg); + // block_cache_capacity should be reduced by capacity. + assert!( + LARGE_ENOUGH_BLOCK_CACHE_CAPACITY - cfg.capacity.unwrap().0 / 2 - 1 + < block_cache_capacity + && block_cache_capacity + < LARGE_ENOUGH_BLOCK_CACHE_CAPACITY - cfg.capacity.unwrap().0 / 2 + 1, + "block_cache_capacity: {}, capacity: {}", + block_cache_capacity, + cfg.capacity.unwrap().0, + ); + + // Capacity has a maximum limit. + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + let mut block_cache_capacity = ReadableSize::gb(100).0; + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); + assert_eq!(cfg.capacity.unwrap().0, MAX_CAPACITY); + assert!( + ReadableSize::gb(100).0 - cfg.capacity.unwrap().0 / 2 - 1 < block_cache_capacity + && block_cache_capacity < ReadableSize::gb(100).0 - cfg.capacity.unwrap().0 / 2 + 1, + "block_cache_capacity: {}, capacity: {}", + block_cache_capacity, + cfg.capacity.unwrap().0 + ); + + // ... unless capacity is set manually. + let mut cfg = InMemoryEngineConfig::default(); + cfg.enable = true; + cfg.capacity = Some(ReadableSize(2 * MAX_CAPACITY)); + let mut block_cache_capacity = ReadableSize::gb(100).0; + cfg.validate(&mut block_cache_capacity, DEFAULT_REGION_SPLIT_SIZE) + .unwrap(); + assert_eq!(cfg.capacity.unwrap().0, 2 * MAX_CAPACITY); + // block_cache_capacity should not be reduced by a manual set capacity. + assert_eq!(block_cache_capacity, ReadableSize::gb(100).0); } } diff --git a/components/in_memory_engine/src/region_stats.rs b/components/in_memory_engine/src/region_stats.rs index 5f81dc82860..852bd64ee5a 100644 --- a/components/in_memory_engine/src/region_stats.rs +++ b/components/in_memory_engine/src/region_stats.rs @@ -265,7 +265,7 @@ impl RegionStatsManager { r.cop_detail.mvcc_amplification() < mvcc_amplification_to_filter } else { - // In this case, memory usage is relarively low, we only evict those that should not be cached apparently. + // In this case, memory usage is relatively low, we only evict those that should not be cached apparently. r.cop_detail.mvcc_amplification() <= self.config.value().mvcc_amplification_threshold as f64 / MVCC_AMPLIFICATION_FILTER_FACTOR || r.cop_detail.iterated_count() < avg_top_next_prev / ITERATED_COUNT_FILTER_FACTOR diff --git a/deny.toml b/deny.toml index 794ab97a21c..b6bfe29f2f2 100644 --- a/deny.toml +++ b/deny.toml @@ -73,10 +73,6 @@ ignore = [ # # TODO: Upgrade clap to v4.x. "RUSTSEC-2021-0145", - # Ignore RUSTSEC-2024-0006 as it only included by "rusoto_credential" crate. - # - # TODO: Upgrade shlex@0.1.1 to v1.3.x. - "RUSTSEC-2024-0006", ] # TiKV is licensed under Apache 2.0, according to ASF 3RD PARTY LICENSE POLICY, diff --git a/src/config/mod.rs b/src/config/mod.rs index aca990dbc15..26c2f9000c3 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3961,8 +3961,10 @@ impl TikvConfig { return Err("in-memory-engine is unavailable for feature TTL or API v2".into()); } self.in_memory_engine.expected_region_size = self.coprocessor.region_split_size(); - self.in_memory_engine - .validate(self.coprocessor.region_split_size())?; + self.in_memory_engine.validate( + &mut self.storage.block_cache.capacity.as_mut().unwrap().0, + self.coprocessor.region_split_size(), + )?; // Now, only support cross check in in-memory engine when compaction filter is // enabled. From 962553d85013d273008a9168f843b577e591386d Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 11 Nov 2024 15:25:51 +0800 Subject: [PATCH 20/86] test: fix the test failure that clear_regions_in_written may be called twice (#17798) close tikv/tikv#17797 If the last call `prepare_for_region` returns `NotInCache`, `clear_written_regions` can be called twice in both `write_impl` and `clear`, which will cause panic. This pr changes `clear_written_regions` to consume `self.written_regions`to avoid this kind of duplicate clear. Signed-off-by: glorv --- .../in_memory_engine/src/write_batch.rs | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/components/in_memory_engine/src/write_batch.rs b/components/in_memory_engine/src/write_batch.rs index a66c3a3174d..f44c44ce217 100644 --- a/components/in_memory_engine/src/write_batch.rs +++ b/components/in_memory_engine/src/write_batch.rs @@ -206,6 +206,16 @@ impl RegionCacheWriteBatch { Ok(()) } + fn clear_written_regions(&mut self) { + if !self.written_regions.is_empty() { + self.engine + .core + .region_manager() + .clear_regions_in_being_written(&self.written_regions); + self.written_regions.clear(); + } + } + // Note: `seq` is the sequence number of the first key in this write batch in // the RocksDB, which will be incremented automatically for each key, so // that all keys have unique sequence numbers. @@ -244,12 +254,7 @@ impl RegionCacheWriteBatch { fail::fail_point!("ime_on_region_cache_write_batch_write_consumed"); fail::fail_point!("ime_before_clear_regions_in_being_written"); - if !self.written_regions.is_empty() { - self.engine - .core - .region_manager() - .clear_regions_in_being_written(&self.written_regions); - } + self.clear_written_regions(); self.engine .lock_modification_bytes @@ -344,7 +349,7 @@ impl RegionCacheWriteBatch { #[inline] fn record_last_written_region(&mut self) { - // NOTE: event if the region is evcited due to memory limit, we still + // NOTE: even if the region is evcited due to memory limit, we still // need to track it because its "in written" flag has been set. if self.region_cache_status != RegionCacheStatus::NotInCache { let last_region = self.current_region.take().unwrap(); @@ -502,12 +507,7 @@ impl WriteBatch for RegionCacheWriteBatch { self.record_last_written_region(); // region's `in_written` is not cleaned as `write_impl` is not called, // so we should do it here. - if !self.written_regions.is_empty() { - self.engine - .core - .region_manager() - .clear_regions_in_being_written(&self.written_regions); - } + self.clear_written_regions(); } self.region_cache_status = RegionCacheStatus::NotInCache; From af3903947ff1db0d1436944016878097c9d8bac8 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 12 Nov 2024 15:52:10 +0800 Subject: [PATCH 21/86] In-memory engine: handle error when getting regions info (#17805) ref tikv/tikv#16141 handle error when getting regions info Signed-off-by: SpadeA-Tang --- .../in_memory_engine/src/region_stats.rs | 50 +++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/components/in_memory_engine/src/region_stats.rs b/components/in_memory_engine/src/region_stats.rs index 852bd64ee5a..c3372bc4894 100644 --- a/components/in_memory_engine/src/region_stats.rs +++ b/components/in_memory_engine/src/region_stats.rs @@ -138,10 +138,21 @@ impl RegionStatsManager { ) -> (Vec, Vec) { // Get regions' stat of the cached region and sort them by next + prev in // descending order. - let mut regions_stat = self + let mut regions_stat = match self .info_provider .get_regions_stat(cached_region_ids.clone()) - .unwrap(); + { + Ok(regions_stat) => regions_stat, + Err(e) => { + error!( + "ime get regions stat failed"; + "err" => ?e, + ); + assert!(tikv_util::thread_group::is_shutdown(!cfg!(test))); + return (vec![], vec![]); + } + }; + regions_stat.sort_by(|a, b| { let next_prev_a = a.1.cop_detail.iterated_count(); let next_prev_b = b.1.cop_detail.iterated_count(); @@ -179,13 +190,24 @@ impl RegionStatsManager { / self.expected_region_size(); let expected_num_regions = usize::max(1, current_region_count + expected_new_count); info!("ime collect_changed_ranges"; "num_regions" => expected_num_regions); - let curr_top_regions = self + let curr_top_regions = match self .info_provider .get_top_regions(NonZeroUsize::try_from(expected_num_regions).unwrap()) - .unwrap() // TODO (afeinberg): Potentially custom error handling here. - .iter() - .map(|(r, region_stats)| (r.id, (r.clone(), region_stats.clone()))) - .collect::>(); + { + Ok(top_regions) => top_regions + .iter() + .map(|(r, region_stats)| (r.id, (r.clone(), region_stats.clone()))) + .collect::>(), + Err(e) => { + error!( + "ime get top regions failed"; + "err" => ?e, + ); + assert!(tikv_util::thread_group::is_shutdown(!cfg!(test))); + return (vec![], vec![]); + } + }; + { let mut region_loaded_map = self.region_loaded_at.write().unwrap(); for ®ion_id in curr_top_regions.keys() { @@ -329,10 +351,20 @@ impl RegionStatsManager { { // Get regions' stat of the cached region and sort them by next + prev in // descending order. - let regions_activity = self + let regions_activity = match self .info_provider .get_regions_stat(cached_region_ids.clone()) - .unwrap(); + { + Ok(regions_stat) => regions_stat, + Err(e) => { + error!( + "ime get regions stat failed"; + "err" => ?e, + ); + assert!(tikv_util::thread_group::is_shutdown(!cfg!(test))); + return; + } + }; if regions_activity.is_empty() { return; } From 9be46d2c70533bcd080af01f3f61f848d968ddb1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 18 Nov 2024 11:47:47 +0800 Subject: [PATCH 22/86] raftstore: add write batch limit for raft command batch (#17823) close tikv/tikv#17701 add write batch limit for raft command batch Signed-off-by: SpadeA-Tang Signed-off-by: SpadeA-Tang --- components/raftstore/src/store/fsm/peer.rs | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index ef657ff738f..922818edac3 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -133,6 +133,10 @@ enum DelayReason { /// in most case. const MAX_REGIONS_IN_ERROR: usize = 10; const REGION_SPLIT_SKIP_MAX_COUNT: usize = 3; +/// Limits the request size that can be batched in a single RaftCmdRequest. +// todo: this fugure maybe changed to a more suitable value. +#[allow(clippy::identity_op)] +const MAX_BATCH_SIZE_LIMIT: u64 = 1 * 1024 * 1024; const UNSAFE_RECOVERY_STATE_TIMEOUT: Duration = Duration::from_secs(60); pub const MAX_PROPOSAL_SIZE_RATIO: f64 = 0.4; @@ -442,8 +446,13 @@ where // No batch request whose size exceed 20% of raft_entry_max_size, // so total size of request in batch_raft_request would not exceed // (40% + 20%) of raft_entry_max_size + // Also, to prevent the write batch size from becoming too large when + // raft_entry_max_size is set too high (all requests in a RaftCmdRequest will be + // written in one RocksDB write batch), we use MAX_APPLY_BATCH_SIZE to + // limit the number of requests batched within a single RaftCmdRequest. if req.get_requests().is_empty() || req_size as u64 > (cfg.raft_entry_max_size.0 as f64 * 0.2) as u64 + || (self.batch_req_size + req_size as u64) > MAX_BATCH_SIZE_LIMIT { return false; } @@ -7563,4 +7572,39 @@ mod tests { assert!(flag.load(Ordering::Acquire)); } } + + #[test] + fn test_batch_raft_cmd_request_builder_size_limit() { + let mut cfg = Config::default(); + cfg.raft_entry_max_size = ReadableSize::gb(1); + let mut q = Request::default(); + let mut builder = BatchRaftCmdRequestBuilder::::new(); + + let mut req = RaftCmdRequest::default(); + let mut put = PutRequest::default(); + put.set_key(b"aaaa".to_vec()); + let val = (0..200_000).map(|_| 0).collect_vec(); + put.set_value(val); + q.set_cmd_type(CmdType::Put); + q.set_put(put); + req.mut_requests().push(q.clone()); + let _ = q.take_put(); + let req_size = req.compute_size(); + assert!(builder.can_batch(&cfg, &req, req_size)); + let cb = Callback::write_ext(Box::new(move |_| {}), None, None); + let cmd = RaftCommand::new(req.clone(), cb); + builder.add(cmd, req_size); + + let mut req = RaftCmdRequest::default(); + let mut put = PutRequest::default(); + put.set_key(b"aaaa".to_vec()); + let val = (0..900_000).map(|_| 0).collect_vec(); + put.set_value(val); + q.set_cmd_type(CmdType::Put); + q.set_put(put); + req.mut_requests().push(q.clone()); + let _ = q.take_put(); + let req_size = req.compute_size(); + assert!(!builder.can_batch(&cfg, &req, req_size)); + } } From 9eb3edaf6e404ad9a21c7012dd0b2a1f6f442e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 18 Nov 2024 15:59:32 +0800 Subject: [PATCH 23/86] implement the base library for compacting logs (#17632) close tikv/tikv#17631 Added a new crate named `compact-log-backup`. Now it can merge some log files generated by log backup and make them become SSTs. Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 71 +- Cargo.toml | 3 +- components/compact-log-backup/Cargo.toml | 57 ++ .../src/compaction/collector.rs | 488 ++++++++++++ .../compact-log-backup/src/compaction/exec.rs | 577 ++++++++++++++ .../compact-log-backup/src/compaction/meta.rs | 443 +++++++++++ .../compact-log-backup/src/compaction/mod.rs | 198 +++++ components/compact-log-backup/src/errors.rs | 119 +++ .../src/exec_hooks/checkpoint.rs | 81 ++ .../src/exec_hooks/consistency.rs | 104 +++ .../compact-log-backup/src/exec_hooks/mod.rs | 38 + .../src/exec_hooks/observability.rs | 133 ++++ .../src/exec_hooks/save_meta.rs | 163 ++++ .../compact-log-backup/src/execute/hooking.rs | 215 ++++++ .../compact-log-backup/src/execute/mod.rs | 298 ++++++++ .../compact-log-backup/src/execute/test.rs | 311 ++++++++ components/compact-log-backup/src/lib.rs | 19 + components/compact-log-backup/src/source.rs | 206 +++++ .../compact-log-backup/src/statistic.rs | 216 ++++++ components/compact-log-backup/src/storage.rs | 719 ++++++++++++++++++ .../compact-log-backup/src/test_util.rs | 540 +++++++++++++ components/compact-log-backup/src/util.rs | 197 +++++ components/engine_traits/src/sst.rs | 2 +- .../tikv_util/src/codec/stream_event.rs | 10 + 24 files changed, 5203 insertions(+), 5 deletions(-) create mode 100644 components/compact-log-backup/Cargo.toml create mode 100644 components/compact-log-backup/src/compaction/collector.rs create mode 100644 components/compact-log-backup/src/compaction/exec.rs create mode 100644 components/compact-log-backup/src/compaction/meta.rs create mode 100644 components/compact-log-backup/src/compaction/mod.rs create mode 100644 components/compact-log-backup/src/errors.rs create mode 100644 components/compact-log-backup/src/exec_hooks/checkpoint.rs create mode 100644 components/compact-log-backup/src/exec_hooks/consistency.rs create mode 100644 components/compact-log-backup/src/exec_hooks/mod.rs create mode 100644 components/compact-log-backup/src/exec_hooks/observability.rs create mode 100644 components/compact-log-backup/src/exec_hooks/save_meta.rs create mode 100644 components/compact-log-backup/src/execute/hooking.rs create mode 100644 components/compact-log-backup/src/execute/mod.rs create mode 100644 components/compact-log-backup/src/execute/test.rs create mode 100644 components/compact-log-backup/src/lib.rs create mode 100644 components/compact-log-backup/src/source.rs create mode 100644 components/compact-log-backup/src/statistic.rs create mode 100644 components/compact-log-backup/src/storage.rs create mode 100644 components/compact-log-backup/src/test_util.rs create mode 100644 components/compact-log-backup/src/util.rs diff --git a/Cargo.lock b/Cargo.lock index 9774f4cdc84..ba111ddaf6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -172,8 +172,8 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", - "zstd", - "zstd-safe", + "zstd 0.13.2", + "zstd-safe 7.2.1", ] [[package]] @@ -1483,6 +1483,52 @@ dependencies = [ "tikv_alloc", ] +[[package]] +name = "compact-log-backup" +version = "0.1.0" +dependencies = [ + "async-compression", + "bytes", + "chrono", + "codec", + "crc64fast", + "dashmap", + "derive_more", + "encryption", + "engine_rocks", + "engine_traits", + "external_storage", + "fail", + "file_system", + "futures 0.3.15", + "futures-io", + "hex 0.4.2", + "keys", + "kvproto", + "lazy_static", + "pin-project", + "pprof", + "prometheus", + "protobuf", + "serde", + "serde_json", + "slog", + "slog-global", + "tempdir", + "test_util", + "thiserror", + "tidb_query_datatype", + "tikv_alloc", + "tikv_util", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "tracing-active-tree", + "txn_types", + "zstd 0.11.2+zstd.1.5.2", +] + [[package]] name = "concurrency_manager" version = "0.0.1" @@ -8282,13 +8328,32 @@ dependencies = [ "rand 0.7.3", ] +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe 5.0.2+zstd.1.5.2", +] + [[package]] name = "zstd" version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "zstd-safe", + "zstd-safe 7.2.1", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc 0.2.151", + "zstd-sys", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 69acc30a8e0..aecf767227a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -244,7 +244,7 @@ members = [ "components/cloud/azure", "components/cloud/gcp", "components/codec", - "components/collections", + "components/collections", "components/compact-log-backup", "components/concurrency_manager", "components/coprocessor_plugin_api", "components/crossbeam-skiplist", @@ -448,6 +448,7 @@ tracing = { version = "0.1.39", default-features = false, features = [ ] } openssl = "0.10" openssl-sys = "0.9" +compact-log-backup = { path = "components/compact-log-backup" } heck = "0.3" crossbeam = "0.8" crossbeam-channel = "0.5" diff --git a/components/compact-log-backup/Cargo.toml b/components/compact-log-backup/Cargo.toml new file mode 100644 index 00000000000..25f630a5008 --- /dev/null +++ b/components/compact-log-backup/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "compact-log-backup" +version = "0.1.0" +edition = "2021" +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +failpoints = ["fail/failpoints"] + +[dependencies] +async-compression = { version = "0.4.12", features = ["tokio", "futures-io", "zstd"] } +bytes = "1" +chrono = { workspace = true } +codec = { workspace = true } +crc64fast = "0.1" +dashmap = "5" +derive_more = "0.99.3" +encryption = { workspace = true } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +external_storage = { workspace = true } +fail = "0.5" +file_system = { workspace = true } +futures = "0.3" +futures-io = "0.3" +hex = "0.4" +keys = { workspace = true } +kvproto = { workspace = true } +lazy_static = "1.4" +pin-project = "1.0" +prometheus = { version = "0.13", default-features = false, features = ["nightly"] } +protobuf = { version = "2.8", features = ["bytes"] } +serde = "1.0" +serde_json = "1.0" +slog = { workspace = true } +slog-global ={ workspace = true } +thiserror = "1" +tidb_query_datatype = { workspace = true } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } +tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync", "signal"] } +tokio-stream = "0.1" +tokio-util = { version = "0.7", features = ["compat"] } +tracing = { workspace = true } +tracing-active-tree = { workspace = true } +txn_types = { workspace = true } + +# Below are used by `test_utils` only. +zstd = "0.11" + +[dev-dependencies] +pprof = { version = "0.13", default-features = false, features = [ + "flamegraph", + "protobuf-codec", +] } +tempdir = "0.3" +test_util = { workspace = true } diff --git a/components/compact-log-backup/src/compaction/collector.rs b/components/compact-log-backup/src/compaction/collector.rs new file mode 100644 index 00000000000..6113a82bccd --- /dev/null +++ b/components/compact-log-backup/src/compaction/collector.rs @@ -0,0 +1,488 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::{collections::HashMap, task::ready}; + +use tokio_stream::Stream; + +use super::{SubcompactionCollectKey, UnformedSubcompaction}; +use crate::{ + compaction::Subcompaction, + errors::{Result, TraceResultExt}, + statistic::CollectSubcompactionStatistic, + storage::LogFile, +}; + +/// A collecting subcompaction. + +/// Collecting a stream of [`LogFile`], and generate a stream of compactions. +#[pin_project::pin_project] +pub struct CollectSubcompaction>> { + #[pin] + inner: S, + last_compactions: Option>, + + collector: SubcompactionCollector, +} + +impl>> CollectSubcompaction { + /// Get delta of statistic between last call to this. + pub fn take_statistic(&mut self) -> CollectSubcompactionStatistic { + std::mem::take(&mut self.collector.stat) + } + + /// Get the mutable internal stream. + pub fn get_mut(&mut self) -> &mut S { + &mut self.inner + } +} + +pub struct CollectSubcompactionConfig { + /// Lower bound of timestamps. + /// Files donesn't contain any record with a timestamp greater than or equal + /// to this will be filtered out. + pub compact_from_ts: u64, + /// Upper bound of timestamps. + pub compact_to_ts: u64, + /// The expected size of a subcompaction. + pub subcompaction_size_threshold: u64, +} + +impl>> CollectSubcompaction { + pub fn new(s: S, cfg: CollectSubcompactionConfig) -> Self { + CollectSubcompaction { + inner: s, + last_compactions: None, + collector: SubcompactionCollector { + cfg, + items: HashMap::new(), + stat: CollectSubcompactionStatistic::default(), + }, + } + } +} + +/// Collects subcompactions by upstream log files. +/// For now, we collect subcompactions by grouping the input files with +/// [`SubcompactionCollectKey`]. When each group grows to the specified size, a +/// subcompaction will be generated. +struct SubcompactionCollector { + items: HashMap, + stat: CollectSubcompactionStatistic, + cfg: CollectSubcompactionConfig, +} + +impl SubcompactionCollector { + /// Adding a new log file input to the collector. + fn add_new_file(&mut self, file: LogFile) -> Option { + use std::collections::hash_map::Entry; + let key = SubcompactionCollectKey::by_file(&file); + + // Skip out-of-range files and schema meta files. + // Meta files need to have a simpler format so other BR client can easily open + // and rewrite it. (Perhaps we can also compact them.) + if file.is_meta + || file.max_ts < self.cfg.compact_from_ts + || file.min_ts > self.cfg.compact_to_ts + { + self.stat.files_filtered_out += 1; + return None; + } + + self.stat.bytes_in += file.file_real_size; + self.stat.files_in += 1; + + match self.items.entry(key) { + Entry::Occupied(mut o) => { + let key = *o.key(); + let u = o.get_mut(); + u.add_file(file); + if u.size > self.cfg.subcompaction_size_threshold { + let uc = o.remove(); + let c = uc.form(&key, &self.cfg); + self.stat.compactions_out += 1; + self.stat.bytes_out += c.size; + return Some(c); + } + } + Entry::Vacant(v) => { + v.insert(UnformedSubcompaction::by_file(&file)); + } + } + None + } + + /// Force create subcompaction by the current pending unformed + /// subcompactions. These subcompaction will be undersized. + fn take_pending_subcompactions(&mut self) -> impl Iterator + '_ { + self.items.drain().map(|(key, c)| { + // Hacking: update the statistic when we really yield the compaction. + // (At `poll_next`.) + c.form(&key, &self.cfg) + }) + } +} + +impl>> Stream for CollectSubcompaction { + type Item = Result; + + fn poll_next( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + let mut this = self.project(); + loop { + if let Some(finalize) = this.last_compactions { + return finalize + .pop() + .map(|c| { + // Now user can see the compaction, we can update the statistic here. + this.collector.stat.bytes_out += c.size; + this.collector.stat.compactions_out += 1; + Ok(c) + }) + .into(); + } + + let item = ready!(this.inner.as_mut().poll_next(cx)); + match item { + None => { + *this.last_compactions = + Some(this.collector.take_pending_subcompactions().collect()) + } + Some(Err(err)) => return Some(Err(err).trace_err()).into(), + Some(Ok(item)) => { + if let Some(comp) = this.collector.add_new_file(item) { + return Some(Ok(comp)).into(); + } + } + } + } + } +} + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use engine_traits::CF_WRITE; + use futures::stream::{self, StreamExt, TryStreamExt}; + use kvproto::brpb; + + use super::{CollectSubcompaction, CollectSubcompactionConfig, SubcompactionCollectKey}; + use crate::{ + compaction::EpochHint, + errors::{Error, ErrorKind, Result}, + storage::{Epoch, LogFile, LogFileId}, + }; + + fn log_file(name: &str, len: u64, key: SubcompactionCollectKey) -> LogFile { + LogFile { + id: LogFileId { + name: Arc::from(name.to_owned().into_boxed_str()), + offset: 0, + length: len, + }, + compression: kvproto::brpb::CompressionType::Zstd, + crc64xor: 0, + number_of_entries: 0, + file_real_size: len, + min_ts: 0, + max_ts: 0, + min_key: Arc::from([]), + max_key: Arc::from([]), + is_meta: key.is_meta, + region_id: key.region_id, + cf: key.cf, + ty: key.ty, + min_start_ts: 0, + table_id: 0, + resolved_ts: 0, + sha256: Arc::from([]), + region_start_key: None, + region_end_key: None, + region_epoches: None, + } + } + + fn with_ts(mut lf: LogFile, min_ts: u64, max_ts: u64) -> LogFile { + lf.min_ts = min_ts; + lf.max_ts = max_ts; + lf + } + + impl SubcompactionCollectKey { + fn of_region(r: u64) -> Self { + SubcompactionCollectKey { + cf: "default", + region_id: r, + ty: kvproto::brpb::FileType::Put, + is_meta: false, + table_id: 0, + } + } + } + + #[tokio::test] + async fn test_collect_subcompaction() { + let r = SubcompactionCollectKey::of_region; + let items = vec![ + log_file("001", 64, r(1)), + log_file("002", 65, r(2)), + log_file("003", 8, r(2)), + log_file("004", 64, r(2)), + log_file("005", 42, r(3)), + log_file("006", 98, r(3)), + log_file("008", 1, r(4)), + ]; + let mut collector = CollectSubcompaction::new( + stream::iter(items).map(Result::Ok), + CollectSubcompactionConfig { + compact_from_ts: 0, + compact_to_ts: u64::MAX, + subcompaction_size_threshold: 128, + }, + ); + + let mut res = (&mut collector) + .map_ok(|v| (v.size, v.region_id)) + .try_collect::>() + .await + .unwrap(); + + res[2..].sort(); + assert_eq!(res.len(), 4); + assert_eq!(res, &[(137, 2), (140, 3), (1, 4), (64, 1)]); + let stat = collector.take_statistic(); + assert_eq!(stat.files_in, 7); + assert_eq!(stat.bytes_in, 342); + assert_eq!(stat.bytes_out, 342); + assert_eq!(stat.compactions_out, 4); + assert_eq!(stat.files_filtered_out, 0); + } + + #[tokio::test] + async fn test_error() { + let r = SubcompactionCollectKey::of_region; + let items = vec![ + Ok(log_file("001", 64, r(1))), + Ok(log_file("006", 65, r(1))), + Err(Error::from(ErrorKind::Other("error".to_owned()))), + Ok(log_file("008", 20, r(1))), + ]; + + let collector = CollectSubcompaction::new( + stream::iter(items), + CollectSubcompactionConfig { + compact_from_ts: 0, + compact_to_ts: u64::MAX, + subcompaction_size_threshold: 128, + }, + ); + let mut st = collector.map_ok(|v| v.size); + assert_eq!(st.next().await.unwrap().unwrap(), 129); + st.next().await.unwrap().unwrap_err(); + } + + #[tokio::test] + async fn test_filter_out() { + let r = SubcompactionCollectKey::of_region; + let m = |mut k: SubcompactionCollectKey| { + k.is_meta = true; + k + }; + let t = with_ts; + + let items = vec![ + // should be filtered out. + t(log_file("1", 999, r(1)), 40, 49), + t(log_file("11", 456, r(1)), 201, 288), + t(log_file("11", 789, m(r(1))), 201, 288), + // total in range. + t(log_file("2", 20, r(1)), 50, 199), + // having overlap. + t(log_file("3", 100, r(1)), 199, 201), + t(log_file("4", 9, r(1)), 48, 51), + // other regions + t(log_file("5", 999, r(2)), 52, 55), + ]; + + let mut collector = CollectSubcompaction::new( + stream::iter(items.iter().cloned().map(Ok)), + CollectSubcompactionConfig { + compact_from_ts: 50, + compact_to_ts: 200, + subcompaction_size_threshold: 128, + }, + ); + + let res = (&mut collector) + .map_ok(|v| (v.size, v.region_id)) + .try_collect::>() + .await + .unwrap(); + + println!("{res:?}"); + assert_eq!(res, [(129, 1), (999, 2)]); + let stat = collector.take_statistic(); + println!("{:?}", stat); + assert_eq!(stat.files_filtered_out, 3); + assert_eq!(stat.compactions_out, 2); + assert_eq!(stat.files_in + stat.files_filtered_out, items.len() as u64); + } + + #[tokio::test] + async fn test_group() { + let r = SubcompactionCollectKey::of_region; + let m = |mut k: SubcompactionCollectKey| { + k.is_meta = true; + k + }; + let d = |mut k: SubcompactionCollectKey| { + k.ty = brpb::FileType::Delete; + k + }; + let w = |mut k: SubcompactionCollectKey| { + k.cf = CF_WRITE; + k + }; + + let files = vec![ + log_file("x", 100, r(1)), + log_file("m", 101, m(r(1))), + log_file("d", 102, d(r(1))), + log_file("w", 103, w(r(1))), + log_file("md", 104, d(m(r(1)))), + log_file("wd", 105, w(d(r(1)))), + log_file("all", 106, m(w(d(r(1))))), + log_file("other_region", 107, r(2)), + log_file("other_region_w", 108, w(r(2))), + ]; + + let mut collector = CollectSubcompaction::new( + stream::iter(files.iter().cloned().map(Ok)), + CollectSubcompactionConfig { + compact_from_ts: 0, + compact_to_ts: u64::MAX, + subcompaction_size_threshold: 128, + }, + ); + + let mut res = (&mut collector) + .map_ok(|v| (v.size, v.region_id, v.cf, v.ty)) + .try_collect::>() + .await + .unwrap(); + + res.sort_by_key(|v| v.0); + + use brpb::FileType::*; + assert_eq!( + res, + [ + (100, 1, "default", Put), + (102, 1, "default", Delete), + (103, 1, "write", Put), + (105, 1, "write", Delete), + (107, 2, "default", Put), + (108, 2, "write", Put) + ] + ); + + let stat = collector.take_statistic(); + assert_eq!(stat.files_in + stat.files_filtered_out, files.len() as u64); + assert_eq!(stat.compactions_out, 6); + assert_eq!(stat.files_filtered_out, 3); + } + + #[tokio::test] + async fn test_region_boundary() { + let r = SubcompactionCollectKey::of_region; + let e = |v, cv| Epoch { + conf_ver: cv, + version: v, + }; + let es = |es: Vec| Arc::from(es.into_boxed_slice()); + let eh = |start: &[u8], end: &[u8], ver: u64| EpochHint { + start_key: start.to_vec().into_boxed_slice().into(), + end_key: end.to_vec().into_boxed_slice().into(), + region_epoch: e(ver, 42), + }; + let rr = |mut l: LogFile, start: &[u8], end: &[u8], ver: u64| { + l.region_start_key = Some(start.to_vec().into_boxed_slice().into()); + l.region_end_key = Some(end.to_vec().into_boxed_slice().into()); + l.region_epoches = Some(es(vec![e(ver, 42)])); + l + }; + + struct Input<'a> { + files: &'a [LogFile], + total_size: u64, + require_epoches: &'a [EpochHint], + } + + async fn run(input: Input<'_>) { + let collector = CollectSubcompaction::new( + stream::iter(input.files.iter().cloned().map(Ok)), + CollectSubcompactionConfig { + compact_from_ts: 0, + compact_to_ts: u64::MAX, + subcompaction_size_threshold: 128, + }, + ); + + let mut res = collector + .map_ok(|v| (v.size, v.region_id, v.epoch_hints)) + .try_collect::>() + .await + .unwrap(); + + assert_eq!(res.len(), 1); + assert_eq!(res[0].0, input.total_size); + assert_eq!(res[0].1, 1); + res[0].2.sort_by_key(|v| v.region_epoch.version); + assert_eq!(res[0].2, input.require_epoches); + } + + let cases = [ + Input { + files: &[ + log_file("z", 0, r(1)), + rr(log_file("a", 1, r(1)), b"001", b"010", 42), + rr(log_file("b", 2, r(1)), b"001", b"005", 43), + ], + total_size: 3, + require_epoches: &[eh(b"001", b"010", 42), eh(b"001", b"005", 43)], + }, + Input { + files: &[ + log_file("z", 0, r(1)), + rr(log_file("a", 1, r(1)), b"001", b"010", 42), + ], + total_size: 1, + require_epoches: &[eh(b"001", b"010", 42)], + }, + Input { + files: &[ + log_file("z", 0, r(1)), + rr(log_file("a", 1, r(1)), b"001", b"010", 42), + rr(log_file("b", 2, r(1)), b"001", b"005", 43), + rr(log_file("c", 3, r(1)), b"001", b"015", 192), + ], + total_size: 6, + require_epoches: &[ + eh(b"001", b"010", 42), + eh(b"001", b"005", 43), + eh(b"001", b"015", 192), + ], + }, + Input { + files: &[log_file("z", 9, r(1))], + total_size: 9, + require_epoches: &[], + }, + ]; + + for c in cases { + run(c).await; + } + } +} diff --git a/components/compact-log-backup/src/compaction/exec.rs b/components/compact-log-backup/src/compaction/exec.rs new file mode 100644 index 00000000000..3fd14e480f2 --- /dev/null +++ b/components/compact-log-backup/src/compaction/exec.rs @@ -0,0 +1,577 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::{ + path::{Path, PathBuf}, + sync::Arc, +}; + +use engine_rocks::RocksEngine; +use engine_traits::{ + ExternalSstFileInfo, SstCompressionType, SstExt, SstWriter, SstWriterBuilder, + DATA_KEY_PREFIX_LEN, +}; +use external_storage::{ExternalStorage, UnpinReader}; +use file_system::Sha256Reader; +use futures::{future::TryFutureExt, io::AllowStdIo}; +use kvproto::brpb::{self, LogFileSubcompaction}; +use tikv_util::{ + codec::bytes::decode_bytes_in_place, retry_expr, stream::JustRetry, time::Instant, +}; + +use super::{EpochHint, Subcompaction, SubcompactionResult}; +use crate::{ + compaction::SST_OUT_REL, + errors::{OtherErrExt, Result, TraceResultExt}, + source::{Record, Source}, + statistic::{prom::*, LoadStatistic, SubcompactStatistic}, + storage::COMPACTION_OUT_PREFIX, + util::{self, Cooperate, ExecuteAllExt}, +}; + +/// The state of executing a subcompaction. +pub struct SubcompactionExec { + source: Source, + output: Arc, + co: Cooperate, + out_prefix: PathBuf, + + load_stat: LoadStatistic, + compact_stat: SubcompactStatistic, + + db: Option, +} + +/// The extra config of executing a subcompaction. +pub struct SubcompactExt { + /// The number of max concurrency of loading input files. + pub max_load_concurrency: usize, + /// The compression type that will be used for the output file. + pub compression: SstCompressionType, + /// The compression level of the output file. + /// + /// When `None`, will use the default level of the compression algorithm. + pub compression_level: Option, +} + +impl Default for SubcompactExt { + fn default() -> Self { + Self { + max_load_concurrency: Default::default(), + compression: SstCompressionType::Lz4, + compression_level: None, + } + } +} + +// NOTE: maybe we can merge this and `SubcompactionExt`? +/// The information used for constructing a [`SubcompactionExec`]. +pub struct SubcompactionExecArg { + /// The prefix of the output SST. + pub out_prefix: Option, + /// The RocksDB instance used for creating the SST writer. + pub db: Option, + /// The output storage. + pub storage: Arc, +} + +impl From> for SubcompactionExec { + fn from(value: SubcompactionExecArg) -> Self { + Self { + source: Source::new(Arc::clone(&value.storage)), + output: value.storage, + out_prefix: value + .out_prefix + .unwrap_or_else(|| Path::new(COMPACTION_OUT_PREFIX).to_owned()), + db: value.db, + + co: Default::default(), + load_stat: Default::default(), + compact_stat: Default::default(), + } + } +} + +impl SubcompactionExec { + #[cfg(test)] + pub fn default_config(storage: Arc) -> Self { + Self::from(SubcompactionExecArg { + storage, + out_prefix: None, + db: None, + }) + } +} + +/// An SST generated by a subcompaction. +struct WrittenSst { + content: S, + meta: kvproto::brpb::File, + epoch_hints: Vec, + physical_size: u64, +} + +/// Log backup may generate duplicated key-value entries. +/// When compacting them in the same subcompaction, we will dedup them. Hence +/// the checksum of output varies. +/// +/// During compacting, those difference will be recorded here. +#[derive(Default)] +struct ChecksumDiff { + removed_key: u64, + decreaed_size: u64, + crc64xor_diff: u64, +} + +impl SubcompactionExec +where + <::SstWriter as SstWriter>::ExternalSstFileReader: 'static, +{ + fn update_checksum_diff(a: &Record, b: &Record, diff: &mut ChecksumDiff) { + assert_eq!( + a, b, + "The record with same key contains different value: the backup might be corrupted." + ); + + diff.removed_key += 1; + diff.decreaed_size += (a.key.len() + a.value.len()) as u64; + let mut d = crc64fast::Digest::new(); + d.write(&a.key); + d.write(&a.value); + // When we remove even number of the same key-value pair, we don't need to + // update the crc64xor checksum. Trick here: If a key-value pair appears + // even times, that key will be eliminated in this checksum, so we just + // need to `XOR` all duplicated keys up, we will get the right crc64xor diff. + diff.crc64xor_diff ^= d.sum64(); + } + + /// Sort all inputs and dedup them, generating the checksum diff. + #[tracing::instrument(skip_all)] + async fn process_input( + &mut self, + items: impl Iterator>, + ) -> (Vec, ChecksumDiff) { + let mut flatten_items = items + .into_iter() + .flat_map(|v| v.into_iter()) + .collect::>(); + tokio::task::yield_now().await; + flatten_items.sort_unstable_by(Record::cmp_key); + tokio::task::yield_now().await; + let mut diff = ChecksumDiff::default(); + flatten_items.dedup_by(|k1, k2| { + if k1.key == k2.key { + Self::update_checksum_diff(k1, k2, &mut diff); + true + } else { + false + } + }); + (flatten_items, diff) + } + + #[tracing::instrument(skip_all)] + async fn load( + &mut self, + c: &Subcompaction, + ext: &mut SubcompactExt, + ) -> Result>> { + let mut eext = ExecuteAllExt::default(); + eext.max_concurrency = ext.max_load_concurrency; + + let items = super::util::execute_all_ext( + c.inputs + .iter() + .cloned() + .map(|f| { + let source = &self.source; + Box::pin(async move { + let _lt = COMPACT_LOG_BACKUP_LOAD_A_FILE_DURATION.start_coarse_timer(); + let mut out = vec![]; + let mut stat = LoadStatistic::default(); + source + .load(f, Some(&mut stat), |k, v| { + fail::fail_point!("compact_log_backup_omit_key", |_| {}); + out.push(Record { + key: k.to_owned(), + value: v.to_owned(), + }) + }) + .await?; + Result::Ok((out, stat)) + }) + }) + .collect(), + eext, + ) + .await?; + + let mut result = Vec::with_capacity(items.len()); + for (item, stat) in items { + self.load_stat += stat; + result.push(item); + } + + Ok(result.into_iter()) + } + + /// write the `sorted_items` to a in-mem SST. + /// + /// # Panics + /// + /// For now, if the `sorted_items` is empty, it will panic. + /// But it is reasonable to return an error in this scenario if needed. + #[tracing::instrument(skip_all, fields(name=%name))] + async fn write_sst( + &mut self, + name: &str, + c: &Subcompaction, + sorted_items: &[Record], + ext: &mut SubcompactExt, + ) -> Result::ExternalSstFileReader>> { + let cf = c.cf; + let mut wb = ::SstWriterBuilder::new() + .set_cf(cf) + .set_compression_type(Some(ext.compression)) + .set_in_memory(true); + if let Some(db) = self.db.as_ref() { + wb = wb.set_db(db); + } + if let Some(level) = ext.compression_level { + wb = wb.set_compression_level(level); + } + let mut w = wb.build(name)?; + let mut meta = kvproto::brpb::File::default(); + + let mut start_key = sorted_items[0].key.clone(); + // `File::{start,end}_key` should be raw key. + decode_bytes_in_place(&mut start_key, false).adapt_err()?; + let mut end_key = sorted_items.last().unwrap().key.clone(); + decode_bytes_in_place(&mut end_key, false).adapt_err()?; + // `File::end_key` should be exclusive. (!) + // Also we cannot just call next_key, or the table ID of the end key may be + // different, some versions of BR panics in that scenario. + end_key.push(0u8); + + meta.set_start_key(start_key); + meta.set_end_key(end_key); + meta.set_cf(cf.to_owned()); + meta.name = name.to_owned(); + meta.end_version = u64::MAX; + + let mut data_key = keys::DATA_PREFIX_KEY.to_vec(); + for item in sorted_items { + self.co.step().await; + + let mut d = crc64fast::Digest::new(); + d.write(&item.key); + d.write(&item.value); + let ts = item.ts().trace_err()?; + meta.crc64xor ^= d.sum64(); + meta.start_version = meta.start_version.min(ts); + meta.end_version = meta.end_version.max(ts); + + // NOTE: We may need to check whether the key is already a data key here once we + // are going to support compact SSTs. + data_key.truncate(DATA_KEY_PREFIX_LEN); + data_key.extend(&item.key); + w.put(&data_key, &item.value)?; + + self.compact_stat.logical_key_bytes_out += item.key.len() as u64; + self.compact_stat.logical_value_bytes_out += item.value.len() as u64; + meta.total_kvs += 1; + meta.total_bytes += item.key.len() as u64 + item.value.len() as u64; + } + let (info, out) = w.finish_read()?; + meta.set_size(info.file_size()); + self.compact_stat.keys_out += info.num_entries(); + self.compact_stat.physical_bytes_out += info.file_size(); + + let result = WrittenSst { + content: out, + meta, + epoch_hints: c.epoch_hints.clone(), + physical_size: info.file_size(), + }; + + Ok(result) + } + + #[tracing::instrument(skip_all, fields(name=%sst.meta.name))] + async fn upload_compaction_artifact( + &mut self, + c: &Subcompaction, + sst: &mut WrittenSst<::ExternalSstFileReader>, + ) -> Result { + use engine_traits::ExternalSstFileReader; + sst.content.reset()?; + let (rd, hasher) = Sha256Reader::new(&mut sst.content).adapt_err()?; + self.output + .write( + &sst.meta.name, + UnpinReader(Box::new(AllowStdIo::new(rd))), + sst.physical_size, + ) + .await?; + sst.meta.sha256 = hasher.lock().unwrap().finish().adapt_err()?.to_vec(); + let mut meta = brpb::LogFileSubcompaction::new(); + meta.set_meta(c.to_pb_meta()); + meta.set_region_meta_hints(sst.epoch_hints.iter().map(|v| v.to_pb()).collect()); + meta.set_sst_outputs(vec![sst.meta.clone()].into()); + Ok(meta) + } + + #[tracing::instrument(skip_all, fields(c=%c))] + pub async fn run( + mut self, + c: Subcompaction, + mut ext: SubcompactExt, + ) -> Result { + let mut result = SubcompactionResult::of(c); + let c = &result.origin; + for input in &c.inputs { + if input.crc64xor == 0 { + result.expected_crc64 = None; + } + if let Some(v) = result.expected_crc64.as_mut() { + *v ^= input.crc64xor; + } + result.expected_keys += input.num_of_entries; + result.expected_size += input.key_value_size; + } + + let begin = Instant::now(); + let items = self.load(c, &mut ext).await?; + self.compact_stat.load_duration += begin.saturating_elapsed(); + + let begin = Instant::now(); + let (sorted_items, cdiff) = self.process_input(items).await; + self.compact_stat.sort_duration += begin.saturating_elapsed(); + if sorted_items.is_empty() { + self.compact_stat.empty_generation += 1; + return Ok(result); + } + + if let Some(v) = result.expected_crc64.as_mut() { + *v ^= cdiff.crc64xor_diff; + } + result.expected_keys -= cdiff.removed_key; + result.expected_size -= cdiff.decreaed_size; + + let out_name = self + .out_prefix + .join(SST_OUT_REL) + .join(format!( + "{}_{}_{}_{}.sst", + util::aligned_u64(c.input_min_ts), + util::aligned_u64(c.input_max_ts), + c.cf, + c.region_id + )) + .display() + .to_string(); + let begin = Instant::now(); + assert!(!sorted_items.is_empty()); + let mut sst = self + .write_sst(&out_name, c, sorted_items.as_slice(), &mut ext) + .await?; + + self.compact_stat.write_sst_duration += begin.saturating_elapsed(); + + let begin = Instant::now(); + result.meta = + retry_expr! { self.upload_compaction_artifact(c, &mut sst).map_err(JustRetry) } + .await + .map_err(|err| err.0)?; + self.compact_stat.save_duration += begin.saturating_elapsed(); + + result.compact_stat = self.compact_stat; + result.load_stat = self.load_stat; + + return Ok(result); + } +} + +#[cfg(test)] +mod test { + use tidb_query_datatype::codec::table::encode_row_key; + use txn_types::Key; + + use crate::{ + compaction::Subcompaction, + storage::{Epoch, MetaFile}, + test_util::{ + gen_step, save_many_log_files, CompactInMem, KvGen, LogFileBuilder, TmpStorage, + }, + }; + + #[tokio::test] + async fn test_compact_one() { + let st = TmpStorage::create(); + + let const_val = |_| vec![42u8]; + let cm = CompactInMem::default(); + + let s1 = KvGen::new(gen_step(1, 0, 2).take(100), const_val); + let i1 = st.build_log_file("a.log", cm.tap_on(s1)).await; + + let s2 = KvGen::new(gen_step(1, 1, 2).take(100), const_val); + let i2 = st.build_log_file("b.log", cm.tap_on(s2)).await; + + let c = Subcompaction::of_many([i1, i2]); + + st.verify_result(st.run_subcompaction(c).await, cm); + } + + #[tokio::test] + async fn test_compact_dup() { + let st = TmpStorage::create(); + let cm = CompactInMem::default(); + + let s1 = KvGen::new(gen_step(1, 0, 3).take(100), |_| b"value".to_vec()); + let i1 = st.build_log_file("three.log", cm.tap_on(s1)).await; + + let s2 = KvGen::new(gen_step(1, 0, 2).take(100), |_| b"value".to_vec()); + let i2 = st.build_log_file("two.log", cm.tap_on(s2)).await; + + let c = Subcompaction::of_many([i1, i2]); + let res = st.run_subcompaction(c).await; + assert_eq!(res.load_stat.keys_in, 200); + assert_eq!(res.compact_stat.keys_out, 166); + st.verify_result(res, cm); + } + + #[tokio::test] + async fn test_compact_from_one_file() { + let st = TmpStorage::create(); + let cm = CompactInMem::default(); + + let s1 = KvGen::new(gen_step(1, 0, 2).take(100), |_| b"value".to_vec()); + let mut i1 = LogFileBuilder::new(|v| v.name = "a.log".to_owned()); + cm.tap_on(s1) + .for_each(|kv| i1.add_encoded(&kv.key, &kv.value)); + + let s2 = KvGen::new(gen_step(1, 1, 2).take(128), |_| b"value".to_vec()); + let mut i2 = LogFileBuilder::new(|v| v.name = "b.log".to_owned()); + cm.tap_on(s2) + .for_each(|kv| i2.add_encoded(&kv.key, &kv.value)); + + let meta = save_many_log_files("data.log", [i1, i2], st.storage().as_ref()) + .await + .unwrap(); + let ml = MetaFile::from(meta); + let c = Subcompaction::of_many(ml.into_logs()); + let res = st.run_subcompaction(c).await; + st.verify_result(res, cm); + } + + #[tokio::test] + async fn test_region_boundaries() { + let st = TmpStorage::create(); + let cm = CompactInMem::default(); + + let s1 = KvGen::new(gen_step(1, 0, 2).take(100), |_| b"value".to_vec()); + let enc = |v| Key::from_raw(v).into_encoded(); + let mut i1 = LogFileBuilder::new(|v| { + v.name = "a.log".to_owned(); + v.region_start_key = Some(enc(b"t")); + v.region_end_key = Some(enc(b"t\xff")); + v.region_epoches.push(Epoch { + version: 42, + conf_ver: 42, + }); + }); + cm.tap_on(s1) + .for_each(|kv| i1.add_encoded(&kv.key, &kv.value)); + let meta = save_many_log_files("data.log", [i1], st.storage().as_ref()) + .await + .unwrap(); + let ml = MetaFile::from(meta); + let c = Subcompaction::of_many(ml.into_logs()); + let res = st.run_subcompaction(c).await; + let sst_out = &res.meta.get_region_meta_hints()[0]; + assert_eq!(sst_out.get_start_key(), enc(b"t").as_slice()); + assert_eq!(sst_out.get_end_key(), enc(b"t\xff").as_slice()); + st.verify_result(res, cm); + } + + #[tokio::test] + async fn test_elide_region_boundaries() { + let st = TmpStorage::create(); + let cm = CompactInMem::default(); + + let s1 = KvGen::new(gen_step(1, 0, 2).take(100), |_| b"value".to_vec()); + let enc = |v| Key::from_raw(v).into_encoded(); + let mut i1 = LogFileBuilder::new(|v| { + v.name = "a.log".to_owned(); + v.region_start_key = Some(enc(b"t")); + v.region_end_key = Some(enc(b"t\xff")); + v.region_epoches.push(Epoch { + version: 42, + conf_ver: 42, + }); + }); + cm.tap_on(s1) + .for_each(|kv| i1.add_encoded(&kv.key, &kv.value)); + + let s2 = KvGen::new(gen_step(1, 200, 2).take(100), |_| b"value".to_vec()); + let mut i2 = LogFileBuilder::new(|v| { + v.name = "b.log".to_owned(); + }); + cm.tap_on(s2) + .for_each(|kv| i2.add_encoded(&kv.key, &kv.value)); + + let meta = save_many_log_files("data.log", [i1, i2], st.storage().as_ref()) + .await + .unwrap(); + let ml = MetaFile::from(meta); + let logs = ml.into_logs().collect::>(); + assert_eq!(logs.len(), 2, "{:?}", logs); + + // Case 1: with ranges only. + let c = Subcompaction::of_many([logs[0].clone()]); + let res = st.run_subcompaction(c).await; + let sst_out = &res.meta.get_region_meta_hints()[0]; + let bgn = sst_out.get_start_key(); + let end = sst_out.get_end_key(); + let cbgn = res.meta.get_meta().get_min_key(); + let cend = res.meta.get_meta().get_max_key(); + assert_eq!(bgn, enc(b"t").as_slice()); + assert_eq!(end, enc(b"t\xff").as_slice()); + assert!(cbgn > enc(bgn).as_slice(), "{:?}", cbgn); + assert!(cend < enc(end).as_slice(), "{:?}", cend); + + // Case 2: should elide range. + let c = Subcompaction::of_many(logs); + let res = st.run_subcompaction(c).await; + let sst_out = &res.meta.get_sst_outputs()[0]; + let bgn = sst_out.get_start_key(); + let end = sst_out.get_end_key(); + let cbgn = res.meta.get_meta().get_min_key(); + let cend = res.meta.get_meta().get_max_key(); + let row = |r| encode_row_key(1, r); + assert_eq!(bgn, row(0)); + let mut ek = row(398); + ek.push(0); + assert_eq!(end, ek); + assert!(cbgn > enc(bgn).as_slice(), "{:?}", cbgn); + assert!(cend < enc(end).as_slice(), "{:?}", cend); + st.verify_result(res.clone(), cm); + } + + #[tokio::test] + #[cfg(feature = "failpoints")] + // Note: maybe just modify the log files? + async fn test_failed_checksumming() { + let _fg = fail::FailGuard::new("compact_log_backup_omit_key", "1*return"); + let st = TmpStorage::create(); + + let cm = CompactInMem::default(); + + let s1 = KvGen::new(gen_step(1, 0, 3).take(100), |_| b"value".to_vec()); + let i1 = st.build_log_file("three.log", cm.tap_on(s1)).await; + + let c = Subcompaction::singleton(i1); + let res = st.run_subcompaction(c).await; + res.verify_checksum() + .expect_err("should failed to verify checksum"); + } +} diff --git a/components/compact-log-backup/src/compaction/meta.rs b/components/compact-log-backup/src/compaction/meta.rs new file mode 100644 index 00000000000..75e53f18750 --- /dev/null +++ b/components/compact-log-backup/src/compaction/meta.rs @@ -0,0 +1,443 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::{ + collections::{BTreeSet, HashMap}, + sync::Arc, +}; + +use external_storage::ExternalStorage; +use futures::stream::TryStreamExt; +use kvproto::brpb::{self, DeleteSpansOfFile}; + +use super::{ + collector::CollectSubcompactionConfig, EpochHint, Subcompaction, SubcompactionCollectKey, + SubcompactionResult, UnformedSubcompaction, +}; +use crate::{ + errors::Result, + storage::{ + LoadFromExt, LogFile, LogFileId, MetaFile, MigartionStorageWrapper, PhysicalLogFile, + StreamMetaStorage, + }, +}; + +impl SubcompactionResult { + pub fn verify_checksum(&self) -> Result<()> { + let mut output_crc64 = 0; + let mut output_length = 0; + let mut output_count = 0; + + for out in self.meta.get_sst_outputs() { + output_crc64 ^= out.get_crc64xor(); + output_length += out.get_total_bytes(); + output_count += out.get_total_kvs(); + } + + let check_eq = |output, input, hint| { + if output != input { + Err(crate::errors::ErrorKind::Other(format!( + "{} not match: output is {}, but input is {}", + hint, output, input + ))) + } else { + Ok(()) + } + }; + + if let Some(input_crc64) = self.expected_crc64 { + check_eq(output_crc64, input_crc64, "crc64xor")?; + } + check_eq(output_length, self.expected_size, "size")?; + check_eq(output_count, self.expected_keys, "num_of_entries")?; + + Ok(()) + } +} + +impl Subcompaction { + pub fn crc64(&self) -> u64 { + let mut crc64_xor = 0; + for input in &self.inputs { + let mut crc = crc64fast::Digest::new(); + crc.write(input.id.name.as_bytes()); + crc.write(&input.id.offset.to_le_bytes()); + crc.write(&input.id.length.to_le_bytes()); + crc64_xor ^= crc.sum64(); + } + let mut crc = crc64fast::Digest::new(); + crc.write(&self.region_id.to_le_bytes()); + crc.write(self.cf.as_bytes()); + crc.write(&self.size.to_le_bytes()); + crc.write(&self.input_min_ts.to_le_bytes()); + crc.write(&self.input_max_ts.to_le_bytes()); + crc.write(&self.compact_from_ts.to_le_bytes()); + crc.write(&self.compact_to_ts.to_le_bytes()); + crc.write(&protobuf::ProtobufEnum::value(&self.ty).to_le_bytes()); + crc.write(&self.min_key); + crc.write(&self.max_key); + crc64_xor ^= crc.sum64(); + + crc64_xor + } + + pub fn to_pb_meta(&self) -> brpb::LogFileSubcompactionMeta { + let mut out = brpb::LogFileSubcompactionMeta::default(); + out.set_table_id(self.table_id); + out.set_region_id(self.region_id); + out.set_cf(self.cf.to_owned()); + out.set_size(self.size); + out.set_input_min_ts(self.input_min_ts); + out.set_input_max_ts(self.input_max_ts); + out.set_compact_from_ts(self.compact_from_ts); + out.set_compact_until_ts(self.compact_to_ts); + out.set_min_key(self.min_key.to_vec()); + out.set_max_key(self.max_key.to_vec()); + out.set_sources(self.inputs_to_pb().into()); + out + } + + fn inputs_to_pb(&self) -> Vec { + let mut res = HashMap::<&str, brpb::SpansOfFile>::new(); + + for input in &self.inputs { + let spans = res.entry(&input.id.name).or_insert_with(|| { + let mut s = brpb::SpansOfFile::new(); + s.set_path(input.id.name.to_string()); + s + }); + spans.mut_spans().push(input.id.span()); + } + + res.into_values().collect() + } + + pub fn singleton(c: LogFile) -> Self { + Self::of_many([c]) + } + + pub fn of_many(items: impl IntoIterator) -> Self { + let mut it = items.into_iter(); + let initial_file = it.next().expect("of_many: empty iterator"); + let mut c = UnformedSubcompaction::by_file(&initial_file); + let key = SubcompactionCollectKey::by_file(&initial_file); + for item in it { + assert_eq!(key, SubcompactionCollectKey::by_file(&item)); + c.add_file(item); + } + + c.form( + &key, + &CollectSubcompactionConfig { + compact_from_ts: 0, + compact_to_ts: u64::MAX, + subcompaction_size_threshold: 0, + }, + ) + } +} + +impl LogFileId { + pub fn span(&self) -> brpb::Span { + let mut span = brpb::Span::new(); + span.set_offset(self.offset); + span.set_length(self.length); + span + } +} + +#[derive(Eq, PartialEq, Debug)] +struct SortByOffset(LogFileId); + +impl PartialOrd for SortByOffset { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.offset.partial_cmp(&other.0.offset) + } +} + +impl Ord for SortByOffset { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.offset.cmp(&other.0.offset) + } +} + +/// Collecting metadata of subcomapctions. +/// +/// Finally, it calculates which files can be deleted. +#[derive(Default, Debug)] +pub struct CompactionRunInfoBuilder { + files: HashMap, BTreeSet>, + compaction: brpb::LogFileCompaction, +} + +/// A set of deletable log files from the same metadata. +pub struct ExpiringFilesOfMeta { + meta_path: Arc, + logs: Vec>, + /// Whether the log file is still needed. + /// + /// When we are going to delete every log files recoreded in a log file, the + /// logfile itself can also be removed. + destruct_self: bool, + /// The logical log files that can be removed. + spans_of_file: HashMap, (Vec, /* physical file size */ u64)>, +} + +impl ExpiringFilesOfMeta { + /// Create a list of expliring log files from a meta file. + pub fn of(path: &Arc) -> Self { + Self { + meta_path: Arc::clone(path), + logs: vec![], + destruct_self: false, + spans_of_file: Default::default(), + } + } + + /// Whether we are going to delete nothing. + pub fn is_empty(&self) -> bool { + self.logs.is_empty() && self.spans_of_file.is_empty() && !self.destruct_self + } + + /// Get the list of physical files that can be deleted. + pub fn to_delete(&self) -> impl Iterator + '_ { + self.logs.iter().map(|s| s.as_ref()) + } + + pub fn spans(&self) -> impl Iterator + '_ { + self.spans_of_file.iter().map(|(file, (spans, size))| { + let mut so = DeleteSpansOfFile::new(); + so.set_path(file.to_string()); + so.set_spans(spans.clone().into()); + so.set_whole_file_length(*size); + so + }) + } +} + +impl CompactionRunInfoBuilder { + pub fn is_empty(&self) -> bool { + self.files.is_empty() + } + + pub fn add_subcompaction(&mut self, c: &SubcompactionResult) { + for file in &c.origin.inputs { + if !self.files.contains_key(&file.id.name) { + self.files + .insert(Arc::clone(&file.id.name), Default::default()); + } + self.files + .get_mut(&file.id.name) + .unwrap() + .insert(SortByOffset(file.id.clone())); + } + self.compaction.artifacts_hash ^= c.origin.crc64(); + } + + pub fn mut_meta(&mut self) -> &mut brpb::LogFileCompaction { + &mut self.compaction + } + + pub async fn write_migration(&self, s: &dyn ExternalStorage) -> Result<()> { + let migration = self.migration_of(self.find_expiring_files(s).await?); + let wrapped_storage = MigartionStorageWrapper::new(s); + wrapped_storage.write(migration).await?; + Ok(()) + } + + pub fn migration_of(&self, metas: Vec) -> brpb::Migration { + let mut migration = brpb::Migration::new(); + for files in metas { + let mut medit = brpb::MetaEdit::new(); + medit.set_path(files.meta_path.to_string()); + for file in files.to_delete() { + medit.delete_physical_files.push(file.to_owned()); + } + for span in files.spans() { + medit.delete_logical_files.push(span) + } + medit.destruct_self = files.destruct_self; + migration.edit_meta.push(medit); + } + migration.mut_compactions().push(self.compaction.clone()); + migration + } + + async fn find_expiring_files( + &self, + s: &dyn ExternalStorage, + ) -> Result> { + let ext = LoadFromExt::default(); + let mut storage = StreamMetaStorage::load_from_ext(s, ext); + + let mut result = vec![]; + while let Some(item) = storage.try_next().await? { + let exp = self.expiring(&item); + if !exp.is_empty() { + result.push(exp); + } + } + Ok(result) + } + + fn full_covers(&self, file: &PhysicalLogFile) -> bool { + match self.files.get(&file.name) { + None => false, + Some(spans) => { + let mut cur_offset = 0; + for span in spans { + if span.0.offset != cur_offset { + return false; + } + cur_offset += span.0.length + } + assert!( + cur_offset <= file.size, + "{},{},{:?}", + cur_offset, + file.size, + spans + ); + cur_offset == file.size + } + } + } + + fn expiring(&self, file: &MetaFile) -> ExpiringFilesOfMeta { + let mut result = ExpiringFilesOfMeta::of(&file.name); + let mut all_full_covers = true; + for p in &file.physical_files { + let full_covers = self.full_covers(p); + if full_covers { + result.logs.push(Arc::clone(&p.name)) + } else { + if let Some(vs) = self.files.get(&p.name) { + let segs = result + .spans_of_file + .entry(Arc::clone(&p.name)) + .or_insert_with(|| (vec![], p.size)); + for f in vs { + segs.0.push(f.0.span()); + } + } + all_full_covers = false; + } + } + if all_full_covers { + result.destruct_self = true; + } + result + } +} + +impl EpochHint { + pub fn to_pb(&self) -> brpb::RegionMetaHint { + let mut out = brpb::RegionMetaHint::default(); + out.set_start_key(self.start_key.to_vec()); + out.set_end_key(self.end_key.to_vec()); + out.set_region_epoch(self.region_epoch.into()); + out + } +} + +#[cfg(test)] +mod test { + use external_storage::ExternalStorage; + use kvproto::brpb; + + use super::CompactionRunInfoBuilder; + use crate::{ + compaction::{exec::SubcompactionExec, Subcompaction, SubcompactionResult}, + test_util::{gen_min_max, KvGen, LogFileBuilder, TmpStorage}, + }; + + impl CompactionRunInfoBuilder { + async fn mig(&self, s: &dyn ExternalStorage) -> crate::Result { + Ok(self.migration_of(self.find_expiring_files(s).await?)) + } + } + + #[tokio::test] + async fn test_collect_single() { + let const_val = |_| b"fiolvit".to_vec(); + let g1 = + LogFileBuilder::from_iter(KvGen::new(gen_min_max(1, 1, 2, 10, 20), const_val), |_| {}); + let g2 = + LogFileBuilder::from_iter(KvGen::new(gen_min_max(1, 3, 4, 15, 25), const_val), |_| {}); + let st = TmpStorage::create(); + let m = st + .build_flush("1.log", "v1/backupmeta/1.meta", [g1, g2]) + .await; + + let mut coll = CompactionRunInfoBuilder::default(); + let cr = SubcompactionExec::default_config(st.storage().clone()); + let subc = Subcompaction::singleton(m.physical_files[0].files[0].clone()); + let res = cr.run(subc, Default::default()).await.unwrap(); + coll.add_subcompaction(&res); + let mig = coll.mig(st.storage().as_ref()).await.unwrap(); + assert_eq!(mig.edit_meta.len(), 1); + assert!(!mig.edit_meta[0].destruct_self); + + let mut coll = CompactionRunInfoBuilder::default(); + let subc = Subcompaction::of_many(m.physical_files[0].files.iter().cloned()); + coll.add_subcompaction(&SubcompactionResult::of(subc)); + let mig = coll.mig(st.storage().as_ref()).await.unwrap(); + assert_eq!(mig.edit_meta.len(), 1); + assert!(mig.edit_meta[0].destruct_self); + } + + #[tokio::test] + async fn test_collect_many() { + let const_val = |_| b"fiolvit".to_vec(); + let st = TmpStorage::create(); + let of_region = |region| { + LogFileBuilder::from_iter( + KvGen::new(gen_min_max(region, 1, 2, 10, 20), const_val), + |v| v.region_id = region as u64, + ) + }; + let f1 = st + .build_flush( + "1.log", + "v1/backupmeta/1.meta", + [of_region(1), of_region(2), of_region(3)], + ) + .await; + let f2 = st + .build_flush( + "2.log", + "v1/backupmeta/2.meta", + [of_region(1), of_region(2), of_region(3)], + ) + .await; + + let subc1 = Subcompaction::of_many([ + f1.physical_files[0].files[0].clone(), + f2.physical_files[0].files[0].clone(), + ]); + let subc2 = Subcompaction::of_many([ + f1.physical_files[0].files[1].clone(), + f2.physical_files[0].files[1].clone(), + ]); + let subc3 = Subcompaction::singleton(f2.physical_files[0].files[2].clone()); + + let mut coll = CompactionRunInfoBuilder::default(); + coll.add_subcompaction(&SubcompactionResult::of(subc1)); + coll.add_subcompaction(&SubcompactionResult::of(subc2)); + coll.add_subcompaction(&SubcompactionResult::of(subc3)); + let mig = coll.mig(st.storage().as_ref()).await.unwrap(); + assert_eq!(mig.edit_meta.len(), 2); + let check = |me: &brpb::MetaEdit| match me.get_path() { + "v1/backupmeta/1.meta" => { + assert!(!me.destruct_self); + assert_eq!(me.delete_logical_files.len(), 1); + assert_eq!(me.delete_logical_files[0].spans.len(), 2); + } + "v1/backupmeta/2.meta" => { + assert!(me.destruct_self); + assert_eq!(me.delete_physical_files.len(), 1, "{:?}", me); + assert_eq!(me.delete_logical_files.len(), 0, "{:?}", me); + } + _ => unreachable!(), + }; + mig.edit_meta.iter().for_each(check); + } +} diff --git a/components/compact-log-backup/src/compaction/mod.rs b/components/compact-log-backup/src/compaction/mod.rs new file mode 100644 index 00000000000..3365fb0499f --- /dev/null +++ b/components/compact-log-backup/src/compaction/mod.rs @@ -0,0 +1,198 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::{collections::HashSet, ops::Deref, sync::Arc}; + +use derive_more::Display; +use kvproto::brpb::{self, FileType}; + +use self::collector::CollectSubcompactionConfig; +use crate::{ + statistic::{LoadStatistic, SubcompactStatistic}, + storage::{Epoch, LogFile, LogFileId}, + util, +}; + +pub const SST_OUT_REL: &str = "outputs"; +pub const META_OUT_REL: &str = "metas"; + +#[derive(Debug, Clone)] +pub struct Input { + pub id: LogFileId, + pub compression: brpb::CompressionType, + pub crc64xor: u64, + pub key_value_size: u64, + pub num_of_entries: u64, +} + +/// The group key of collecting subcompactions. +#[derive(Hash, Debug, PartialEq, Eq, Clone, Copy, Display)] +#[display( + fmt = "key(r={},{},{:?},m?={},t={})", + region_id, + cf, + ty, + is_meta, + table_id +)] +pub struct SubcompactionCollectKey { + pub cf: &'static str, + pub region_id: u64, + pub ty: FileType, + pub is_meta: bool, + pub table_id: i64, +} + +/// A subcompaction. +#[derive(Debug, Display, Clone)] +#[display(fmt = "compaction({},sz={})", subc_key, size)] +pub struct Subcompaction { + pub inputs: Vec, + pub size: u64, + pub subc_key: SubcompactionCollectKey, + + pub input_max_ts: u64, + pub input_min_ts: u64, + pub compact_from_ts: u64, + pub compact_to_ts: u64, + pub min_key: Arc<[u8]>, + pub max_key: Arc<[u8]>, + pub epoch_hints: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct EpochHint { + pub start_key: Arc<[u8]>, + pub end_key: Arc<[u8]>, + pub region_epoch: Epoch, +} + +// "Embed" the subcompaction collect key field here. +impl Deref for Subcompaction { + type Target = SubcompactionCollectKey; + + fn deref(&self) -> &Self::Target { + &self.subc_key + } +} + +#[derive(Debug, Clone)] +pub struct SubcompactionResult { + /// The origin subcompaction. + pub origin: Subcompaction, + /// The serializable metadata of this subcompaction. + pub meta: brpb::LogFileSubcompaction, + + /// The expected crc64 for the generated SSTs. + pub expected_crc64: Option, + /// The expected key count for the generated SSTs. + pub expected_keys: u64, + /// The expected logical data size for the generated SSTs. + pub expected_size: u64, + + pub load_stat: LoadStatistic, + pub compact_stat: SubcompactStatistic, +} + +impl SubcompactionResult { + pub fn of(origin: Subcompaction) -> Self { + Self { + meta: Default::default(), + expected_crc64: Some(0), + expected_keys: Default::default(), + expected_size: Default::default(), + load_stat: Default::default(), + compact_stat: Default::default(), + origin, + } + } +} + +#[derive(Debug)] +struct UnformedSubcompaction { + size: u64, + inputs: Vec, + min_ts: u64, + max_ts: u64, + min_key: Arc<[u8]>, + max_key: Arc<[u8]>, + epoch_hints: HashSet, +} + +impl UnformedSubcompaction { + /// create the initial state by a singleton file. + fn by_file(file: &LogFile) -> Self { + let epoch_hints = file.epoch_hints().collect(); + UnformedSubcompaction { + size: file.file_real_size, + inputs: vec![to_input(file)], + min_ts: file.min_ts, + max_ts: file.max_ts, + min_key: file.min_key.clone(), + max_key: file.max_key.clone(), + epoch_hints, + } + } + + /// Form a [`Subcompaction`] by the current state. + fn form( + self, + key: &SubcompactionCollectKey, + cfg: &CollectSubcompactionConfig, + ) -> Subcompaction { + Subcompaction { + inputs: self.inputs, + size: self.size, + input_min_ts: self.min_ts, + input_max_ts: self.max_ts, + min_key: self.min_key.clone(), + max_key: self.max_key.clone(), + compact_from_ts: cfg.compact_from_ts, + compact_to_ts: cfg.compact_to_ts, + subc_key: *key, + epoch_hints: self.epoch_hints.into_iter().collect(), + } + } + + /// add a new file to the state. + fn add_file(&mut self, file: LogFile) { + self.epoch_hints.extend(file.epoch_hints()); + + self.inputs.push(to_input(&file)); + self.size += file.file_real_size; + self.min_ts = self.min_ts.min(file.min_ts); + self.max_ts = self.max_ts.max(file.max_ts); + if self.max_key < file.max_key { + self.max_key = file.max_key; + } + if self.min_key > file.min_key { + self.min_key = file.min_key; + } + } +} + +impl SubcompactionCollectKey { + /// extract the keys from the meta file. + fn by_file(file: &LogFile) -> Self { + SubcompactionCollectKey { + is_meta: file.is_meta, + region_id: file.region_id, + cf: file.cf, + ty: file.ty, + table_id: file.table_id, + } + } +} + +/// Convert a log file to an input of compaction. +fn to_input(file: &LogFile) -> Input { + Input { + id: file.id.clone(), + compression: file.compression, + crc64xor: file.crc64xor, + key_value_size: file.hacky_key_value_size(), + num_of_entries: file.number_of_entries as u64, + } +} + +pub mod collector; +pub mod exec; +pub mod meta; diff --git a/components/compact-log-backup/src/errors.rs b/components/compact-log-backup/src/errors.rs new file mode 100644 index 00000000000..ec935da185b --- /dev/null +++ b/components/compact-log-backup/src/errors.rs @@ -0,0 +1,119 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::{fmt::Display, panic::Location}; + +use thiserror::Error as ThisError; +use tikv_util::codec; + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub struct Error { + pub kind: ErrorKind, + pub notes: String, + pub attached_frames: Vec>, +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}", self.kind))?; + if !self.notes.is_empty() { + f.write_fmt(format_args!(" (note = {})", self.notes))?; + } + if let Some(l) = self.attached_frames.first() { + f.write_fmt(format_args!( + " (top_caller = {}:{}:{})", + l.file(), + l.line(), + l.column() + ))?; + } + + Ok(()) + } +} + +#[derive(ThisError, Debug)] +pub enum ErrorKind { + #[error("I/O {0}")] + Io(#[from] std::io::Error), + #[error("Protobuf {0}")] + Protobuf(#[from] protobuf::error::ProtobufError), + #[error("Engine {0}")] + Engine(#[from] engine_traits::Error), + #[error("Codec {0}")] + Codec(#[from] codec::Error), + #[error("Uncategorised Error {0}")] + Other(String), +} + +impl> From for Error { + #[track_caller] + fn from(value: T) -> Self { + Error { + kind: value.into(), + notes: String::new(), + attached_frames: vec![*Location::caller()], + } + } +} + +pub trait TraceResultExt { + fn trace_err(self) -> Self; + fn annotate(self, message: impl Display) -> Self; +} + +impl TraceResultExt for Result { + #[track_caller] + fn trace_err(self) -> Result { + match self { + Ok(v) => Ok(v), + Err(err) => Err(err.attach_current_frame()), + } + } + + #[track_caller] + fn annotate(self, message: impl Display) -> Result { + match self { + Ok(v) => Ok(v), + Err(mut err) => { + err.notes = message.to_string(); + Err(err.attach_current_frame()) + } + } + } +} + +pub trait OtherErrExt { + fn adapt_err(self) -> Result; +} + +impl OtherErrExt for std::result::Result { + #[track_caller] + fn adapt_err(self) -> Result { + match self { + Ok(t) => Ok(t), + Err(err) => Err(Error { + kind: ErrorKind::Other(err.to_string()), + notes: String::new(), + attached_frames: vec![*Location::caller()], + }), + } + } +} + +impl Error { + #[track_caller] + pub fn attach_current_frame(mut self) -> Self { + self.attached_frames.push(*Location::caller()); + self + } + + pub fn message(mut self, m: impl Display) -> Self { + if self.notes.is_empty() { + self.notes = m.to_string(); + } else { + self.notes = format!("{}: {}", self.notes, m); + } + self + } +} diff --git a/components/compact-log-backup/src/exec_hooks/checkpoint.rs b/components/compact-log-backup/src/exec_hooks/checkpoint.rs new file mode 100644 index 00000000000..6bbfe42422b --- /dev/null +++ b/components/compact-log-backup/src/exec_hooks/checkpoint.rs @@ -0,0 +1,81 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{collections::HashSet, os::unix::ffi::OsStrExt, path::Path}; + +use external_storage::ExternalStorage; +use futures::stream::TryStreamExt; +use tikv_util::{info, time::Instant, warn}; + +use crate::{ + compaction::META_OUT_REL, + execute::hooking::{BeforeStartCtx, CId, ExecHooks, SubcompactionStartCtx}, + ErrorKind, OtherErrExt, Result, TraceResultExt, +}; + +#[derive(Default)] +pub struct Checkpoint { + loaded: HashSet, +} + +impl Checkpoint { + async fn load(&mut self, storage: &dyn ExternalStorage, dir: &str) -> Result<()> { + let mut stream = storage.iter_prefix(dir); + let begin = Instant::now(); + info!("Checkpoint: start loading."; "current" => %self.loaded.len()); + + while let Some(v) = stream.try_next().await? { + let hash = match Self::hash_of(&v.key) { + Err(err) => { + warn!("Checkpoint: failed to get hash of file, skipping it."; "err" => %err); + continue; + } + Ok(h) => h, + }; + self.loaded.insert(hash); + } + info!("Checkpoint: loaded finished tasks."; "current" => %self.loaded.len(), "take" => ?begin.saturating_elapsed()); + Ok(()) + } + + fn hash_of(key: &str) -> Result { + // The file name is: + // {MIN_TS}_{MAX_TS}_{COMPACTION_HASH}.cmeta + // NOTE: perhaps we need to a safer way to load hash... + + let file_name = Path::new(key).file_name().unwrap_or_default(); + let segs = file_name + .as_bytes() + .strip_suffix(b".cmeta") + .map(|v| v.split(|c| *c == b'_').collect::>()) + .unwrap_or_default(); + if segs.len() < 3 || segs[2].len() != 16 { + let err_msg = + format!("Checkpoint: here is a file we cannot get hash, skipping it. name = {key}"); + return Err(ErrorKind::Other(err_msg).into()); + } + let mut hash_bytes = [0u8; 8]; + hex::decode_to_slice(segs[2], &mut hash_bytes) + .adapt_err() + .annotate(format_args!("trying parse {:?} to hex", segs[2]))?; + Ok(u64::from_be_bytes(hash_bytes)) + } +} + +impl ExecHooks for Checkpoint { + async fn before_execution_started(&mut self, cx: BeforeStartCtx<'_>) -> crate::Result<()> { + self.load( + cx.storage, + &format!("{}/{}", cx.this.out_prefix, META_OUT_REL), + ) + .await + } + + fn before_a_subcompaction_start(&mut self, _cid: CId, cx: SubcompactionStartCtx<'_>) { + let hash = cx.subc.crc64(); + if self.loaded.contains(&hash) { + info!("Checkpoint: skipping a subcompaction because we have found it."; + "subc" => %cx.subc, "hash" => %format_args!("{:16X}", hash)); + cx.skip(); + } + } +} diff --git a/components/compact-log-backup/src/exec_hooks/consistency.rs b/components/compact-log-backup/src/exec_hooks/consistency.rs new file mode 100644 index 00000000000..c22a49c9719 --- /dev/null +++ b/components/compact-log-backup/src/exec_hooks/consistency.rs @@ -0,0 +1,104 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +pub use engine_traits::SstCompressionType; +use external_storage::{locking::RemoteLock, ExternalStorage}; +use futures::{io::AsyncReadExt, stream::TryStreamExt}; +use tikv_util::warn; + +use crate::{ + errors::Result, + execute::hooking::{AbortedCtx, AfterFinishCtx, BeforeStartCtx, ExecHooks}, + storage::LOCK_PREFIX, + util::storage_url, + ErrorKind, TraceResultExt, +}; + +#[derive(Default)] +pub struct StorageConsistencyGuard { + lock: Option, +} + +async fn load_storage_checkpoint(storage: &dyn ExternalStorage) -> Result> { + let path = "v1/global_checkpoint/"; + storage + .iter_prefix(path) + .err_into() + .try_fold(None, |i, v| async move { + if !v.key.ends_with(".ts") { + return Ok(i); + } + let mut ts = vec![]; + storage.read(&v.key).read_to_end(&mut ts).await?; + let ts_bytes = <[u8; 8]>::try_from(ts); + let ts = match ts_bytes { + Ok(bytes) => u64::from_le_bytes(bytes), + Err(_) => { + warn!("Cannot parse ts from file."; "file" => %v.key); + return Ok(i); + } + }; + let res = match i { + None => Some(ts), + Some(ts0) => Some(ts.min(ts0)), + }; + Ok(res) + }) + .await +} + +impl ExecHooks for StorageConsistencyGuard { + async fn before_execution_started(&mut self, cx: BeforeStartCtx<'_>) -> Result<()> { + use external_storage::locking::LockExt; + + let cp = load_storage_checkpoint(cx.storage) + .await + .annotate("failed to load storage checkpoint")?; + match cp { + Some(cp) => { + if cx.this.cfg.until_ts > cp { + let err_msg = format!( + "The `--until`({}) is greater than the checkpoint({}). We cannot compact unstable content for now.", + cx.this.cfg.until_ts, cp + ); + + // We use `?` instead of return here to keep the stack frame in the error. + // Or if we use `.into()` the frame attached will be the default implementation + // of `Into`... + Err(ErrorKind::Other(err_msg))?; + } + } + None => { + let url = storage_url(cx.storage); + warn!("No checkpoint loaded, maybe wrong storage used?"; "url" => %url); + Err(ErrorKind::Other(format!( + "Cannot load checkpoint from {}", + url + )))?; + } + } + + let hint = format!( + "This is generated by the compaction {}.", + cx.this.gen_name() + ); + self.lock = Some(cx.storage.lock_for_read(LOCK_PREFIX, hint).await?); + + Ok(()) + } + + async fn after_execution_finished(&mut self, cx: AfterFinishCtx<'_>) -> Result<()> { + if let Some(lock) = self.lock.take() { + lock.unlock(cx.storage).await?; + } + Ok(()) + } + + async fn on_aborted(&mut self, cx: AbortedCtx<'_>) { + if let Some(lock) = self.lock.take() { + warn!("It seems compaction failed. Resolving the lock."; "err" => %cx.err); + if let Err(err) = lock.unlock(cx.storage).await { + warn!("Failed to unlock when failed, you may resolve the lock manually"; "err" => %err, "lock" => ?lock); + } + } + } +} diff --git a/components/compact-log-backup/src/exec_hooks/mod.rs b/components/compact-log-backup/src/exec_hooks/mod.rs new file mode 100644 index 00000000000..134887051a6 --- /dev/null +++ b/components/compact-log-backup/src/exec_hooks/mod.rs @@ -0,0 +1,38 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +pub use engine_traits::SstCompressionType; + +use crate::{ + compaction::SubcompactionResult, + statistic::{ + CollectSubcompactionStatistic, LoadMetaStatistic, LoadStatistic, SubcompactStatistic, + }, +}; + +pub mod checkpoint; +pub mod consistency; +pub mod observability; +pub mod save_meta; + +#[derive(Default)] +pub struct CollectStatistic { + load_stat: LoadStatistic, + compact_stat: SubcompactStatistic, + load_meta_stat: LoadMetaStatistic, + collect_stat: CollectSubcompactionStatistic, +} + +impl CollectStatistic { + fn update_subcompaction(&mut self, res: &SubcompactionResult) { + self.load_stat += res.load_stat.clone(); + self.compact_stat += res.compact_stat.clone(); + } + + fn update_collect_compaction_stat(&mut self, stat: &CollectSubcompactionStatistic) { + self.collect_stat += stat.clone() + } + + fn update_load_meta_stat(&mut self, stat: &LoadMetaStatistic) { + self.load_meta_stat += stat.clone() + } +} diff --git a/components/compact-log-backup/src/exec_hooks/observability.rs b/components/compact-log-backup/src/exec_hooks/observability.rs new file mode 100644 index 00000000000..b6fe6583ef5 --- /dev/null +++ b/components/compact-log-backup/src/exec_hooks/observability.rs @@ -0,0 +1,133 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +pub use engine_traits::SstCompressionType; +use tikv_util::{ + error, info, + logger::{get_log_level, Level}, + warn, +}; +use tokio::{io::AsyncWriteExt, signal::unix::SignalKind}; + +use super::CollectStatistic; +use crate::{ + errors::Result, + execute::hooking::{ + AbortedCtx, AfterFinishCtx, BeforeStartCtx, CId, ExecHooks, SubcompactionFinishCtx, + SubcompactionStartCtx, + }, + statistic::prom, + storage::StreamMetaStorage, + util::storage_url, + ErrorKind, +}; + +/// The hooks that used for an execution from a TTY. Providing the basic +/// observability related to the progress of the comapction. +/// +/// This prints the log when events happens, and prints statistics after +/// compaction finished. +/// +/// This also enables async-backtrace, you can send `SIGUSR1` to the executing +/// compaction task and the running async tasks will be dumped to a file. +#[derive(Default)] +pub struct Observability { + stats: CollectStatistic, + meta_len: u64, +} + +impl ExecHooks for Observability { + fn before_a_subcompaction_start(&mut self, cid: CId, cx: SubcompactionStartCtx<'_>) { + let c = cx.subc; + self.stats + .update_collect_compaction_stat(cx.collect_compaction_stat_diff); + self.stats.update_load_meta_stat(cx.load_stat_diff); + + let level = get_log_level(); + if level < Some(Level::Info) { + warn!("Most of compact-log progress logs are only enabled in the `info` level."; "current_level" => ?level); + } + + info!("Spawning compaction."; "cid" => cid.0, + "cf" => c.cf, + "input_min_ts" => c.input_min_ts, + "input_max_ts" => c.input_max_ts, + "source" => c.inputs.len(), + "size" => c.size, + "region_id" => c.region_id); + } + + async fn after_a_subcompaction_end( + &mut self, + cid: CId, + cx: SubcompactionFinishCtx<'_>, + ) -> Result<()> { + let lst = &cx.result.load_stat; + let cst = &cx.result.compact_stat; + let logical_input_size = lst.logical_key_bytes_in + lst.logical_value_bytes_in; + let total_take = + cst.load_duration + cst.sort_duration + cst.save_duration + cst.write_sst_duration; + let speed = logical_input_size as f64 / total_take.as_millis() as f64; + + self.stats.update_subcompaction(cx.result); + + prom::COMPACT_LOG_BACKUP_LOAD_DURATION.observe(cst.load_duration.as_secs_f64()); + prom::COMPACT_LOG_BACKUP_SORT_DURATION.observe(cst.sort_duration.as_secs_f64()); + prom::COMPACT_LOG_BACKUP_SAVE_DURATION.observe(cst.save_duration.as_secs_f64()); + prom::COMPACT_LOG_BACKUP_WRITE_SST_DURATION.observe(cst.write_sst_duration.as_secs_f64()); + + info!("Finishing compaction."; + "meta_completed" => self.stats.load_meta_stat.meta_files_in, + "meta_total" => self.meta_len, + "bytes_to_compact" => self.stats.collect_stat.bytes_in, + "bytes_compacted" => self.stats.collect_stat.bytes_out, + "cid" => cid.0, + "load_stat" => ?lst, + "compact_stat" => ?cst, + "speed(KiB/s)" => speed, + "total_take" => ?total_take, + "global_load_meta_stat" => ?self.stats.load_meta_stat); + Ok(()) + } + + async fn on_aborted(&mut self, cx: AbortedCtx<'_>) { + error!("Compaction aborted."; "err" => %cx.err); + } + + async fn after_execution_finished(&mut self, cx: AfterFinishCtx<'_>) -> Result<()> { + if self.stats.load_meta_stat.meta_files_in == 0 { + let url = storage_url(cx.storage); + warn!("No meta files loaded, maybe wrong storage used?"; "url" => %url); + return Err(ErrorKind::Other(format!("Nothing loaded from {}", url)).into()); + } + info!("All compactions done."); + Ok(()) + } + + async fn before_execution_started(&mut self, cx: BeforeStartCtx<'_>) -> Result<()> { + tracing_active_tree::init(); + + let sigusr1_handler = async { + let mut signal = tokio::signal::unix::signal(SignalKind::user_defined1()).unwrap(); + while signal.recv().await.is_some() { + let file_name = "/tmp/compact-sst.dump".to_owned(); + let res = async { + let mut file = tokio::fs::File::create(&file_name).await?; + file.write_all(&tracing_active_tree::layer::global().fmt_bytes()) + .await + } + .await; + match res { + Ok(_) => warn!("dumped async backtrace."; "to" => file_name), + Err(err) => warn!("failed to dump async backtrace."; "err" => %err), + } + } + }; + + cx.async_rt.spawn(sigusr1_handler); + self.meta_len = StreamMetaStorage::count_objects(cx.storage).await?; + + info!("About to start compaction."; &cx.this.cfg, + "url" => cx.storage.url().map(|v| v.to_string()).unwrap_or_else(|err| format!(""))); + Ok(()) + } +} diff --git a/components/compact-log-backup/src/exec_hooks/save_meta.rs b/components/compact-log-backup/src/exec_hooks/save_meta.rs new file mode 100644 index 00000000000..adfc7225ec4 --- /dev/null +++ b/components/compact-log-backup/src/exec_hooks/save_meta.rs @@ -0,0 +1,163 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use chrono::Local; +pub use engine_traits::SstCompressionType; +use external_storage::UnpinReader; +use futures::{future::TryFutureExt, io::Cursor}; +use kvproto::brpb; +use tikv_util::{ + stream::{retry, JustRetry}, + warn, +}; + +use super::CollectStatistic; +use crate::{ + compaction::{meta::CompactionRunInfoBuilder, META_OUT_REL, SST_OUT_REL}, + errors::Result, + execute::hooking::{ + AfterFinishCtx, BeforeStartCtx, CId, ExecHooks, SubcompactionFinishCtx, + SubcompactionStartCtx, + }, + statistic::CompactLogBackupStatistic, + util, +}; + +/// Save the metadata to external storage after every subcompaction. After +/// everything done, it saves the whole compaction to a "migration" that can be +/// read by the BR CLI. +/// +/// This is an essential plugin for real-world compacting, as single SST cannot +/// be restored. +/// +/// "But why not just save the metadata of compaction in +/// [`SubcompactionExec`](crate::compaction::exec::SubcompactionExec)?" +/// +/// First, As the hook system isn't exposed to end user, whether inlining this +/// is transparent to them -- they won't mistakely forget to add this hook and +/// ruin everything. +/// +/// Also this make `SubcompactionExec` standalone, it will be easier to test. +/// +/// The most important is, the hook knows metadata crossing subcompactions, +/// we can then optimize the arrangement of subcompactions (say, batching +/// subcompactoins), and save the final result in a single migration. +/// While [`SubcompactionExec`](crate::compaction::exec::SubcompactionExec) +/// knows only the subcompaction it handles, it is impossible to do such +/// optimizations. +pub struct SaveMeta { + collector: CompactionRunInfoBuilder, + stats: CollectStatistic, + begin: chrono::DateTime, +} + +impl Default for SaveMeta { + fn default() -> Self { + Self { + collector: Default::default(), + stats: Default::default(), + begin: Local::now(), + } + } +} + +impl SaveMeta { + fn comments(&self) -> String { + let now = Local::now(); + let stat = CompactLogBackupStatistic { + start_time: self.begin, + end_time: Local::now(), + time_taken: (now - self.begin).to_std().unwrap_or_default(), + exec_by: tikv_util::sys::hostname().unwrap_or_default(), + + load_stat: self.stats.load_stat.clone(), + subcompact_stat: self.stats.compact_stat.clone(), + load_meta_stat: self.stats.load_meta_stat.clone(), + collect_subcompactions_stat: self.stats.collect_stat.clone(), + prometheus: Default::default(), + }; + serde_json::to_string(&stat).unwrap_or_else(|err| format!("ERR DURING MARSHALING: {}", err)) + } +} + +impl ExecHooks for SaveMeta { + async fn before_execution_started(&mut self, cx: BeforeStartCtx<'_>) -> Result<()> { + self.begin = Local::now(); + let run_info = &mut self.collector; + run_info.mut_meta().set_name(cx.this.gen_name()); + run_info + .mut_meta() + .set_compaction_from_ts(cx.this.cfg.from_ts); + run_info + .mut_meta() + .set_compaction_until_ts(cx.this.cfg.until_ts); + run_info + .mut_meta() + .set_artifacts(format!("{}/{}", cx.this.out_prefix, META_OUT_REL)); + run_info + .mut_meta() + .set_generated_files(format!("{}/{}", cx.this.out_prefix, SST_OUT_REL)); + Ok(()) + } + + fn before_a_subcompaction_start(&mut self, _cid: CId, c: SubcompactionStartCtx<'_>) { + self.stats + .update_collect_compaction_stat(c.collect_compaction_stat_diff); + self.stats.update_load_meta_stat(c.load_stat_diff); + } + + async fn after_a_subcompaction_end( + &mut self, + _cid: CId, + cx: SubcompactionFinishCtx<'_>, + ) -> Result<()> { + use protobuf::Message; + + self.collector.add_subcompaction(cx.result); + self.stats.update_subcompaction(cx.result); + + let first_version = cx + .result + .meta + .region_meta_hints + .first() + .map(|h| { + format!( + "_{}_{}", + h.get_region_epoch().get_version(), + h.get_region_epoch().get_conf_ver() + ) + }) + .unwrap_or_default(); + let meta_name = format!( + "{}_{}_{}_{}{}.cmeta", + util::aligned_u64(cx.result.origin.input_min_ts), + util::aligned_u64(cx.result.origin.input_max_ts), + util::aligned_u64(cx.result.origin.crc64()), + cx.result.origin.region_id, + first_version + ); + let meta_name = format!("{}/{}/{}", cx.this.out_prefix, META_OUT_REL, meta_name); + let mut metas = brpb::LogFileSubcompactions::new(); + metas.mut_subcompactions().push(cx.result.meta.clone()); + let meta_bytes = metas.write_to_bytes()?; + retry(|| async { + let reader = UnpinReader(Box::new(Cursor::new(&meta_bytes))); + cx.external_storage + .write(&meta_name, reader, meta_bytes.len() as _) + .map_err(JustRetry) + .await + }) + .await + .map_err(|err| err.0)?; + Result::Ok(()) + } + + async fn after_execution_finished(&mut self, cx: AfterFinishCtx<'_>) -> Result<()> { + if self.collector.is_empty() { + warn!("Nothing to write, skipping saving meta."); + return Ok(()); + } + let comments = self.comments(); + self.collector.mut_meta().set_comments(comments); + self.collector.write_migration(cx.storage).await + } +} diff --git a/components/compact-log-backup/src/execute/hooking.rs b/components/compact-log-backup/src/execute/hooking.rs new file mode 100644 index 00000000000..c0443dc968f --- /dev/null +++ b/components/compact-log-backup/src/execute/hooking.rs @@ -0,0 +1,215 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::cell::Cell; + +pub use engine_traits::SstCompressionType; +use external_storage::ExternalStorage; +use tokio::runtime::Handle; + +use crate::{ + compaction::{Subcompaction, SubcompactionResult}, + errors::Result, + execute::Execution, + statistic::{CollectSubcompactionStatistic, LoadMetaStatistic}, + Error, +}; + +pub struct NoHooks; + +impl ExecHooks for NoHooks {} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub struct CId(pub u64); + +impl std::fmt::Display for CId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +#[derive(Clone, Copy)] +pub struct BeforeStartCtx<'a> { + /// The asynchronous runtime that we are about to use. + pub async_rt: &'a Handle, + /// Reference to the execution context. + pub this: &'a Execution, + /// The source external storage of this compaction. + pub storage: &'a dyn ExternalStorage, +} + +#[derive(Clone, Copy)] +pub struct AfterFinishCtx<'a> { + /// The asynchronous runtime that we are about to use. + pub async_rt: &'a Handle, + /// The target external storage of this compaction. + /// + /// For now, it is always the same as the source storage. + pub storage: &'a dyn ExternalStorage, +} + +#[derive(Clone, Copy)] +pub struct SubcompactionFinishCtx<'a> { + /// Reference to the execution context. + pub this: &'a Execution, + /// The target external storage of this compaction. + pub external_storage: &'a dyn ExternalStorage, + /// The result of this compaction. + /// + /// If this is an `Err`, the whole procedure may fail soon. + pub result: &'a SubcompactionResult, +} + +#[derive(Clone, Copy)] +pub struct SubcompactionStartCtx<'a> { + /// The subcompaction about to start. + pub subc: &'a Subcompaction, + /// The diff of statistic of loading metadata. + /// + /// The diff is between the last trigger of `before_a_subcompaction_start` + /// and now. Due to we will usually prefetch metadata, this diff may not + /// just contributed by the `subc` only. + pub load_stat_diff: &'a LoadMetaStatistic, + /// The diff of collecting compaction between last trigger of this event. + /// + /// Like `load_stat_diff`, we collect subcompactions for every region + /// concurrently. The statistic diff may not just contributed by the `subc`. + pub collect_compaction_stat_diff: &'a CollectSubcompactionStatistic, + /// Whether to skip this compaction. + pub(super) skip: &'a Cell, +} + +impl<'a> SubcompactionStartCtx<'a> { + pub fn skip(&self) { + self.skip.set(true); + } +} + +#[derive(Clone, Copy)] +pub struct AbortedCtx<'a> { + pub storage: &'a dyn ExternalStorage, + pub err: &'a Error, +} + +/// The hook points of an execution of compaction. +// We don't need the returned future be either `Send` or `Sync`. +#[allow(async_fn_in_trait)] +#[allow(clippy::unused_async)] +pub trait ExecHooks: 'static { + /// This hook will be called when a subcompaction is about to start. + fn before_a_subcompaction_start(&mut self, _cid: CId, _c: SubcompactionStartCtx<'_>) {} + /// This hook will be called when a subcompaction has been finished. + /// You may use the `cid` to match a subcompaction previously known by + /// [`ExecHooks::before_a_subcompaction_start`]. + /// + /// If an error was returned, the whole procedure will fail and be + /// terminated! + async fn after_a_subcompaction_end( + &mut self, + _cid: CId, + _res: SubcompactionFinishCtx<'_>, + ) -> Result<()> { + Ok(()) + } + + /// This hook will be called before all works begin. + /// In this time, the asynchronous runtime and external storage have been + /// created. + /// + /// If an error was returned, the execution will be aborted. + async fn before_execution_started(&mut self, _cx: BeforeStartCtx<'_>) -> Result<()> { + Ok(()) + } + /// This hook will be called after the whole compaction finished. + /// + /// If an error was returned, the execution will be mark as failed. + async fn after_execution_finished(&mut self, _cx: AfterFinishCtx<'_>) -> Result<()> { + Ok(()) + } + + /// This hook will be called once the compaction failed due to some reason. + async fn on_aborted(&mut self, _cx: AbortedCtx<'_>) {} +} + +impl ExecHooks for (T, U) { + fn before_a_subcompaction_start(&mut self, cid: CId, c: SubcompactionStartCtx<'_>) { + self.0.before_a_subcompaction_start(cid, c); + self.1.before_a_subcompaction_start(cid, c); + } + + async fn after_a_subcompaction_end( + &mut self, + cid: CId, + cx: SubcompactionFinishCtx<'_>, + ) -> Result<()> { + futures::future::try_join( + self.0.after_a_subcompaction_end(cid, cx), + self.1.after_a_subcompaction_end(cid, cx), + ) + .await?; + Ok(()) + } + + async fn before_execution_started(&mut self, cx: BeforeStartCtx<'_>) -> Result<()> { + futures::future::try_join( + self.0.before_execution_started(cx), + self.1.before_execution_started(cx), + ) + .await?; + Ok(()) + } + + async fn after_execution_finished(&mut self, cx: AfterFinishCtx<'_>) -> Result<()> { + futures::future::try_join( + self.0.after_execution_finished(cx), + self.1.after_execution_finished(cx), + ) + .await?; + Ok(()) + } + + async fn on_aborted(&mut self, cx: AbortedCtx<'_>) { + futures::future::join(self.0.on_aborted(cx), self.1.on_aborted(cx)).await; + } +} + +impl ExecHooks for Option { + fn before_a_subcompaction_start(&mut self, cid: CId, c: SubcompactionStartCtx<'_>) { + if let Some(h) = self { + h.before_a_subcompaction_start(cid, c); + } + } + + async fn after_a_subcompaction_end( + &mut self, + cid: CId, + cx: SubcompactionFinishCtx<'_>, + ) -> Result<()> { + if let Some(h) = self { + h.after_a_subcompaction_end(cid, cx).await + } else { + Ok(()) + } + } + + async fn before_execution_started(&mut self, cx: BeforeStartCtx<'_>) -> Result<()> { + if let Some(h) = self { + h.before_execution_started(cx).await + } else { + Ok(()) + } + } + + async fn after_execution_finished(&mut self, cx: AfterFinishCtx<'_>) -> Result<()> { + if let Some(h) = self { + h.after_execution_finished(cx).await + } else { + Ok(()) + } + } + + async fn on_aborted(&mut self, cx: AbortedCtx<'_>) { + if let Some(h) = self { + h.on_aborted(cx).await + } + } +} diff --git a/components/compact-log-backup/src/execute/mod.rs b/components/compact-log-backup/src/execute/mod.rs new file mode 100644 index 00000000000..d1eff289d44 --- /dev/null +++ b/components/compact-log-backup/src/execute/mod.rs @@ -0,0 +1,298 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +pub mod hooking; + +#[cfg(test)] +mod test; + +use std::{borrow::Cow, cell::Cell, path::Path, sync::Arc}; + +use chrono::Utc; +use engine_rocks::RocksEngine; +pub use engine_traits::SstCompressionType; +use engine_traits::SstExt; +use external_storage::{BackendConfig, ExternalStorage}; +use futures::stream::{self, StreamExt}; +use hooking::{ + AfterFinishCtx, BeforeStartCtx, CId, ExecHooks, SubcompactionFinishCtx, SubcompactionStartCtx, +}; +use kvproto::brpb::StorageBackend; +use tikv_util::config::ReadableSize; +use tokio::runtime::Handle; +use tracing::{trace_span, Instrument}; +use tracing_active_tree::{frame, root}; +use txn_types::TimeStamp; + +use self::hooking::AbortedCtx; +use super::{ + compaction::{ + collector::{CollectSubcompaction, CollectSubcompactionConfig}, + exec::{SubcompactExt, SubcompactionExec}, + }, + storage::{LoadFromExt, StreamMetaStorage}, +}; +use crate::{ + compaction::{exec::SubcompactionExecArg, SubcompactionResult}, + errors::{Result, TraceResultExt}, + util, ErrorKind, +}; + +/// The config for an execution of a compaction. +/// +/// This structure itself fully defines what work the compaction need to do. +/// That is, keeping this structure unchanged, the compaction should always +/// generate the same artifices. +#[derive(Debug)] +pub struct ExecutionConfig { + /// Filter out files doesn't contain any record with TS great or equal than + /// this. + pub from_ts: u64, + /// Filter out files doesn't contain any record with TS less than this. + pub until_ts: u64, + /// The compress algorithm we are going to use for output. + pub compression: SstCompressionType, + /// The compress level we are going to use. + /// + /// If `None`, we will use the default level of the selected algorithm. + pub compression_level: Option, +} + +impl slog::KV for ExecutionConfig { + fn serialize( + &self, + _record: &slog::Record<'_>, + serializer: &mut dyn slog::Serializer, + ) -> slog::Result { + serializer.emit_u64("from_ts", self.from_ts)?; + serializer.emit_u64("until_ts", self.until_ts)?; + let date = |pts| { + let ts = TimeStamp::new(pts).physical(); + chrono::DateTime::::from_utc( + chrono::NaiveDateTime::from_timestamp( + ts as i64 / 1000, + (ts % 1000) as u32 * 1_000_000, + ), + Utc, + ) + }; + serializer.emit_arguments("from_date", &format_args!("{}", date(self.from_ts)))?; + serializer.emit_arguments("until_date", &format_args!("{}", date(self.until_ts)))?; + serializer.emit_arguments("compression", &format_args!("{:?}", self.compression))?; + if let Some(level) = self.compression_level { + serializer.emit_i32("compression.level", level)?; + } + + Ok(()) + } +} + +impl ExecutionConfig { + /// Create a suitable (but not forced) prefix for the artifices of the + /// compaction. + /// + /// You may specify a `name`, which will be included in the path, then the + /// compaction will be easier to be found. + pub fn recommended_prefix(&self, name: &str) -> String { + let mut hasher = crc64fast::Digest::new(); + hasher.write(name.as_bytes()); + hasher.write(&self.from_ts.to_le_bytes()); + hasher.write(&self.until_ts.to_le_bytes()); + hasher.write(&util::compression_type_to_u8(self.compression).to_le_bytes()); + hasher.write(&self.compression_level.unwrap_or(0).to_le_bytes()); + + format!("{}_{}", name, util::aligned_u64(hasher.sum64())) + } +} + +/// An execution of compaction. +pub struct Execution { + /// The configuration. + pub cfg: ExecutionConfig, + + /// Max subcompactions can be executed concurrently. + pub max_concurrent_subcompaction: u64, + /// The external storage for input and output. + pub external_storage: StorageBackend, + /// The RocksDB instance for generating SST. + pub db: Option, + /// The prefix of the artifices. + pub out_prefix: String, +} + +struct ExecuteCtx<'a, H: ExecHooks> { + storage: &'a Arc, + hooks: &'a mut H, +} + +impl Execution { + pub fn gen_name(&self) -> String { + let compaction_name = Path::new(&self.out_prefix) + .file_name() + .map(|v| v.to_string_lossy()) + .unwrap_or(Cow::Borrowed("unknown")); + let pid = tikv_util::sys::thread::thread_id(); + let hostname = tikv_util::sys::hostname(); + format!( + "{}#{}@{}", + compaction_name, + pid, + hostname.as_deref().unwrap_or("unknown") + ) + } + + async fn run_prepared(&self, cx: &mut ExecuteCtx<'_, impl ExecHooks>) -> Result<()> { + let mut ext = LoadFromExt::default(); + let next_compaction = trace_span!("next_compaction"); + ext.max_concurrent_fetch = 128; + ext.loading_content_span = Some(trace_span!( + parent: next_compaction.clone(), + "load_meta_file_names" + )); + + let ExecuteCtx { + ref storage, + ref mut hooks, + .. + } = cx; + + let cx = BeforeStartCtx { + storage: storage.as_ref(), + async_rt: &tokio::runtime::Handle::current(), + this: self, + }; + hooks.before_execution_started(cx).await?; + + let meta = StreamMetaStorage::load_from_ext(storage.as_ref(), ext); + let stream = meta.flat_map(|file| match file { + Ok(file) => stream::iter(file.into_logs()).map(Ok).left_stream(), + Err(err) => stream::once(futures::future::err(err)).right_stream(), + }); + let mut compact_stream = CollectSubcompaction::new( + stream, + CollectSubcompactionConfig { + compact_from_ts: self.cfg.from_ts, + compact_to_ts: self.cfg.until_ts, + subcompaction_size_threshold: ReadableSize::mb(128).0, + }, + ); + let mut pending = Vec::new(); + let mut id = 0; + + while let Some(c) = compact_stream + .next() + .instrument(next_compaction.clone()) + .await + { + let cstat = compact_stream.take_statistic(); + let lstat = compact_stream.get_mut().get_mut().take_statistic(); + + let c = c?; + let cid = CId(id); + let skip = Cell::new(false); + let cx = SubcompactionStartCtx { + subc: &c, + load_stat_diff: &lstat, + collect_compaction_stat_diff: &cstat, + skip: &skip, + }; + hooks.before_a_subcompaction_start(cid, cx); + if skip.get() { + continue; + } + + id += 1; + + let compact_args = SubcompactionExecArg { + out_prefix: Some(Path::new(&self.out_prefix).to_owned()), + db: self.db.clone(), + storage: Arc::clone(storage) as _, + }; + let compact_worker = SubcompactionExec::from(compact_args); + let mut ext = SubcompactExt::default(); + ext.max_load_concurrency = 32; + ext.compression = self.cfg.compression; + ext.compression_level = self.cfg.compression_level; + + let compact_work = async move { + let res = compact_worker.run(c, ext).await.trace_err()?; + res.verify_checksum() + .annotate(format_args!("the compaction is {:?}", res.origin))?; + Result::Ok((res, cid)) + }; + let join_handle = tokio::spawn(root!(compact_work)); + pending.push(join_handle); + + if pending.len() >= self.max_concurrent_subcompaction as _ { + let join = util::select_vec(&mut pending); + let (cres, cid) = frame!("wait_for_compaction"; join).await.unwrap()?; + self.on_compaction_finish(cid, &cres, storage.as_ref(), *hooks) + .await?; + } + } + drop(next_compaction); + + for join in pending { + let (cres, cid) = frame!("final_wait"; join).await.unwrap()?; + self.on_compaction_finish(cid, &cres, storage.as_ref(), *hooks) + .await?; + } + let cx = AfterFinishCtx { + async_rt: &Handle::current(), + storage: storage.as_ref(), + }; + hooks.after_execution_finished(cx).await?; + + Result::Ok(()) + } + + pub fn run(self, mut hooks: impl ExecHooks) -> Result<()> { + let storage = + external_storage::create_storage(&self.external_storage, BackendConfig::default())?; + let storage: Arc = Arc::from(storage); + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let mut cx = ExecuteCtx { + storage: &storage, + hooks: &mut hooks, + }; + + let guarded = async { + let all_works = self.run_prepared(&mut cx); + let res = tokio::select! { + res = all_works => res, + _ = tokio::signal::ctrl_c() => Err(ErrorKind::Other("User canceled by Ctrl-C".to_owned()).into()) + }; + + if let Err(ref err) = res { + cx.hooks + .on_aborted(AbortedCtx { + storage: cx.storage.as_ref(), + err, + }) + .await + } + + res + }; + + runtime.block_on(frame!(guarded)) + } + + async fn on_compaction_finish( + &self, + cid: CId, + result: &SubcompactionResult, + external_storage: &dyn ExternalStorage, + hooks: &mut impl ExecHooks, + ) -> Result<()> { + let cx = SubcompactionFinishCtx { + this: self, + external_storage, + result, + }; + hooks.after_a_subcompaction_end(cid, cx).await?; + Result::Ok(()) + } +} diff --git a/components/compact-log-backup/src/execute/test.rs b/components/compact-log-backup/src/execute/test.rs new file mode 100644 index 00000000000..78ab92ec0b6 --- /dev/null +++ b/components/compact-log-backup/src/execute/test.rs @@ -0,0 +1,311 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::HashMap, + future::Future, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, +}; + +use engine_rocks::RocksEngine; +use external_storage::ExternalStorage; +use futures::{future::FutureExt, stream::TryStreamExt}; +use kvproto::brpb::StorageBackend; +use tokio::sync::mpsc::Sender; + +use super::{Execution, ExecutionConfig}; +use crate::{ + compaction::SubcompactionResult, + errors::OtherErrExt, + exec_hooks::{ + checkpoint::Checkpoint, consistency::StorageConsistencyGuard, save_meta::SaveMeta, + }, + execute::hooking::{CId, ExecHooks, SubcompactionFinishCtx}, + storage::LOCK_PREFIX, + test_util::{gen_step, CompactInMem, KvGen, LogFileBuilder, TmpStorage}, + ErrorKind, +}; + +#[derive(Clone)] +struct CompactionSpy(Sender); + +impl ExecHooks for CompactionSpy { + async fn after_a_subcompaction_end( + &mut self, + _cid: super::hooking::CId, + res: super::hooking::SubcompactionFinishCtx<'_>, + ) -> crate::Result<()> { + self.0 + .send(res.result.clone()) + .map(|res| res.adapt_err()) + .await + } +} + +fn gen_builder(cm: &mut HashMap, batch: i64, num: i64) -> Vec { + let mut result = vec![]; + for (n, i) in (num * batch..num * (batch + 1)).enumerate() { + let it = cm + .entry(n) + .or_default() + .tap_on(KvGen::new(gen_step(1, i, num), move |_| { + vec![42u8; (n + 1) * 12] + })) + .take(200); + let mut b = LogFileBuilder::new(|v| v.region_id = n as u64); + for kv in it { + b.add_encoded(&kv.key, &kv.value) + } + result.push(b); + } + result +} + +pub fn create_compaction(st: StorageBackend) -> Execution { + Execution:: { + cfg: ExecutionConfig { + from_ts: 0, + until_ts: u64::MAX, + compression: engine_traits::SstCompressionType::Lz4, + compression_level: None, + }, + max_concurrent_subcompaction: 3, + external_storage: st, + db: None, + out_prefix: "test-output".to_owned(), + } +} + +#[tokio::test] +async fn test_exec_simple() { + let st = TmpStorage::create(); + let mut cm = HashMap::new(); + + for i in 0..3 { + st.build_flush( + &format!("{}.log", i), + &format!("v1/backupmeta/{}.meta", i), + gen_builder(&mut cm, i, 10), + ) + .await; + } + + let exec = create_compaction(st.backend()); + + let (tx, mut rx) = tokio::sync::mpsc::channel(16); + let bg_exec = tokio::task::spawn_blocking(move || { + exec.run((SaveMeta::default(), CompactionSpy(tx))).unwrap() + }); + while let Some(item) = rx.recv().await { + let rid = item.meta.get_meta().get_region_id() as usize; + st.verify_result(item, cm.remove(&rid).unwrap()); + } + bg_exec.await.unwrap(); + + let mut migs = st.load_migrations().await.unwrap(); + assert_eq!(migs.len(), 1); + let (id, mig) = migs.pop().unwrap(); + assert_eq!(id, 1); + assert_eq!(mig.edit_meta.len(), 3); + assert_eq!(mig.compactions.len(), 1); + let subc = st + .load_subcompactions(mig.compactions[0].get_artifacts()) + .await + .unwrap(); + assert_eq!(subc.len(), 10); +} + +#[tokio::test] +async fn test_checkpointing() { + let st = TmpStorage::create(); + let mut cm = HashMap::new(); + + for i in 0..3 { + st.build_flush( + &format!("{}.log", i), + &format!("v1/backupmeta/{}.meta", i), + gen_builder(&mut cm, i, 15), + ) + .await; + } + + #[derive(Clone)] + struct AbortEvery3TimesAndRecordFinishCount(Arc); + + const ERR_MSG: &str = "nameless you. back to where you from"; + + impl ExecHooks for AbortEvery3TimesAndRecordFinishCount { + async fn after_a_subcompaction_end( + &mut self, + cid: CId, + _res: SubcompactionFinishCtx<'_>, + ) -> crate::Result<()> { + if cid.0 == 4 { + Err(crate::ErrorKind::Other(ERR_MSG.to_owned()).into()) + } else { + self.0.fetch_add(1, Ordering::SeqCst); + Ok(()) + } + } + } + + let be = st.backend(); + let (tx, mut rx) = tokio::sync::mpsc::channel(16); + let cnt = Arc::new(AtomicU64::default()); + let cloneable_hooks = ( + AbortEvery3TimesAndRecordFinishCount(cnt.clone()), + CompactionSpy(tx), + ); + let hooks = move || { + ( + (SaveMeta::default(), Checkpoint::default()), + cloneable_hooks.clone(), + ) + }; + let bg_exec = tokio::task::spawn_blocking(move || { + while let Err(err) = create_compaction(be.clone()).run(hooks()) { + if !err.kind.to_string().contains(ERR_MSG) { + return Err(err); + } + } + Ok(()) + }); + + while let Some(item) = rx.recv().await { + let rid = item.meta.get_meta().get_region_id() as usize; + st.verify_result(item, cm.remove(&rid).unwrap()); + } + bg_exec.await.unwrap().unwrap(); + + let mut migs = st.load_migrations().await.unwrap(); + assert_eq!(migs.len(), 1); + let (id, mig) = migs.pop().unwrap(); + assert_eq!(id, 1); + assert_eq!(mig.edit_meta.len(), 3); + assert_eq!(mig.compactions.len(), 1); + let subc = st + .load_subcompactions(mig.compactions[0].get_artifacts()) + .await + .unwrap(); + assert_eq!(subc.len(), 15); + assert_eq!(cnt.load(Ordering::SeqCst), 15); +} + +async fn put_checkpoint(storage: &dyn ExternalStorage, store: u64, cp: u64) { + let pfx = format!("v1/global_checkpoint/{}.ts", store); + let content = futures::io::Cursor::new(cp.to_le_bytes()); + storage.write(&pfx, content.into(), 8).await.unwrap(); +} + +async fn load_locks(storage: &dyn ExternalStorage) -> Vec { + storage + .iter_prefix(LOCK_PREFIX) + .map_ok(|v| v.key) + .try_collect::>() + .await + .unwrap() +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_consistency_guard_() { + let st = TmpStorage::create(); + let strg = st.storage().as_ref(); + put_checkpoint(strg, 1, 42).await; + + let mut exec = create_compaction(st.backend()); + exec.cfg.until_ts = 43; + let c = StorageConsistencyGuard::default(); + tokio::task::block_in_place(|| exec.run(c).unwrap_err()); + + let mut exec = create_compaction(st.backend()); + exec.cfg.until_ts = 41; + let c = StorageConsistencyGuard::default(); + tokio::task::block_in_place(|| exec.run(c).unwrap()); + + put_checkpoint(strg, 2, 40).await; + + let mut exec = create_compaction(st.backend()); + exec.cfg.until_ts = 41; + let c = StorageConsistencyGuard::default(); + tokio::task::block_in_place(|| exec.run(c).unwrap_err()); + + let mut exec = create_compaction(st.backend()); + exec.cfg.until_ts = 39; + let c = StorageConsistencyGuard::default(); + tokio::task::block_in_place(|| exec.run(c).unwrap()); +} + +#[tokio::test] +async fn test_locking() { + let st = TmpStorage::create(); + let mut cm = HashMap::new(); + st.build_flush("0.log", "v1/backupmeta/0.meta", gen_builder(&mut cm, 0, 15)) + .await; + put_checkpoint(st.storage().as_ref(), 1, u64::MAX).await; + + struct Blocking(Option); + impl ExecHooks for Blocking { + async fn after_a_subcompaction_end( + &mut self, + _cid: CId, + _res: SubcompactionFinishCtx<'_>, + ) -> crate::Result<()> { + if let Some(fut) = self.0.take() { + fut.await; + } + Ok(()) + } + } + + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + let exec = create_compaction(st.backend()); + + let (ptx, prx) = tokio::sync::oneshot::channel::<()>(); + let run = Box::pin(async move { + ptx.send(()).unwrap(); + rx.await.unwrap(); + }); + + let hooks = (Blocking(Some(run)), StorageConsistencyGuard::default()); + let hnd = tokio::task::spawn_blocking(move || exec.run(hooks)); + + prx.await.unwrap(); + let l = load_locks(st.storage().as_ref()).await; + assert_eq!(l.len(), 1, "it is {:?}", l); + tx.send(()).unwrap(); + hnd.await.unwrap().unwrap(); + + let l = load_locks(st.storage().as_ref()).await; + assert_eq!(l.len(), 0, "it is {:?}", l); +} + +#[tokio::test] +async fn test_abort_unlocking() { + let st = TmpStorage::create(); + let mut cm = HashMap::new(); + st.build_flush("0.log", "v1/backupmeta/0.meta", gen_builder(&mut cm, 0, 15)) + .await; + + struct Abort; + impl ExecHooks for Abort { + async fn after_a_subcompaction_end( + &mut self, + _cid: CId, + _res: SubcompactionFinishCtx<'_>, + ) -> crate::Result<()> { + Err(ErrorKind::Other("Journey ends here.".to_owned()))? + } + } + + let exec = create_compaction(st.backend()); + let hooks = (Abort, StorageConsistencyGuard::default()); + + tokio::task::spawn_blocking(move || exec.run(hooks)) + .await + .unwrap() + .unwrap_err(); + let l = load_locks(st.storage().as_ref()).await; + assert_eq!(l.len(), 0, "it is {:?}", l); +} diff --git a/components/compact-log-backup/src/lib.rs b/components/compact-log-backup/src/lib.rs new file mode 100644 index 00000000000..30cbe629a23 --- /dev/null +++ b/components/compact-log-backup/src/lib.rs @@ -0,0 +1,19 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +#![feature(test)] +#![feature(custom_test_frameworks)] + +mod compaction; +mod errors; +mod source; +mod statistic; +mod storage; + +pub mod test_util; + +pub mod exec_hooks; +pub mod execute; + +pub use errors::{Error, ErrorKind, OtherErrExt, Result, TraceResultExt}; + +mod util; diff --git a/components/compact-log-backup/src/source.rs b/components/compact-log-backup/src/source.rs new file mode 100644 index 00000000000..44a6c75745b --- /dev/null +++ b/components/compact-log-backup/src/source.rs @@ -0,0 +1,206 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::{ + pin::{pin, Pin}, + sync::Arc, +}; + +use async_compression::futures::write::ZstdDecoder; +use external_storage::ExternalStorage; +use futures::io::{AsyncWriteExt, Cursor}; +use futures_io::AsyncWrite; +use kvproto::brpb; +use prometheus::core::{Atomic, AtomicU64}; +use tikv_util::{ + codec::stream_event::{self, Iterator}, + stream::{retry_all_ext, JustRetry, RetryExt}, +}; +use txn_types::Key; + +use super::{statistic::LoadStatistic, util::Cooperate}; +use crate::{compaction::Input, errors::Result}; + +/// The manager of fetching log files from remote for compacting. +#[derive(Clone)] +pub struct Source { + inner: Arc, +} + +impl Source { + pub fn new(inner: Arc) -> Self { + Self { inner } + } +} + +/// A record from log files. +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct Record { + pub key: Vec, + pub value: Vec, +} + +impl Record { + #[inline(always)] + pub fn cmp_key(&self, other: &Self) -> std::cmp::Ordering { + self.key.cmp(&other.key) + } + + pub fn ts(&self) -> Result { + let ts = Key::decode_ts_from(&self.key)?.into_inner(); + Ok(ts) + } +} + +impl Source { + /// Load the content of an input. + #[tracing::instrument(skip_all)] + pub async fn load_remote( + &self, + input: Input, + stat: &mut Option<&mut LoadStatistic>, + ) -> Result> { + let error_during_downloading = Arc::new(AtomicU64::new(0)); + let counter = error_during_downloading.clone(); + let ext = RetryExt::default() + .with_fail_hook(move |_: &JustRetry| counter.inc_by(1)); + let fetch = || { + let storage = self.inner.clone(); + let id = input.id.clone(); + let compression = input.compression; + async move { + let mut content = Vec::with_capacity(id.length as _); + let item = pin!(Cursor::new(&mut content)); + let mut decompress = decompress(compression, item)?; + let source = storage.read_part(&id.name, id.offset, id.length); + let n = futures::io::copy(source, &mut decompress).await?; + decompress.flush().await?; + drop(decompress); + std::io::Result::Ok((content, n)) + } + }; + let (content, size) = retry_all_ext(fetch, ext).await?; + if let Some(stat) = stat.as_mut() { + stat.physical_bytes_in += size; + stat.error_during_downloading += error_during_downloading.get(); + } + Ok(content) + } + + /// Load key value pairs from remote. + #[tracing::instrument(skip_all, fields(id=?input.id))] + pub async fn load( + &self, + input: Input, + mut stat: Option<&mut LoadStatistic>, + mut on_key_value: impl FnMut(&[u8], &[u8]), + ) -> Result<()> { + let content = self.load_remote(input, &mut stat).await?; + + let mut co = Cooperate::default(); + let mut iter = stream_event::EventIterator::new(&content); + while let Some((k, v)) = iter.get_next()? { + co.step().await; + on_key_value(k, v); + if let Some(stat) = stat.as_mut() { + stat.keys_in += 1; + stat.logical_key_bytes_in += iter.key().len() as u64; + stat.logical_value_bytes_in += iter.value().len() as u64; + } + } + if let Some(stat) = stat.as_mut() { + stat.files_in += 1; + } + Ok(()) + } +} + +fn decompress( + compression: brpb::CompressionType, + input: Pin<&mut (impl AsyncWrite + Send)>, +) -> std::io::Result { + match compression { + kvproto::brpb::CompressionType::Zstd => Ok(ZstdDecoder::new(input)), + compress => Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!("the compression type ({:?}) isn't supported", compress), + )), + } +} + +#[cfg(test)] +mod tests { + use super::Source; + use crate::{ + compaction::{Input, Subcompaction}, + statistic::LoadStatistic, + storage::{LogFile, MetaFile}, + test_util::{gen_adjacent_with_ts, KvGen, LogFileBuilder, TmpStorage}, + }; + + const NUM_FLUSH: usize = 2; + const NUM_REGION: usize = 5; + const NUM_KV: usize = 10; + + async fn construct_storage(st: &TmpStorage) -> Vec { + let gen_builder = |batch, num_kv, num_region| { + (0..num_region).map(move |v| { + let it = KvGen::new( + gen_adjacent_with_ts(1, v * num_kv, batch).take(num_kv), + move |_| format!("v@{batch}").into_bytes(), + ) + .take(num_kv); + + let mut b = LogFileBuilder::new(|b| b.region_id = v as u64); + for kv in it { + b.add_encoded(&kv.key, &kv.value) + } + b + }) + }; + let mut mfs = vec![]; + for i in 0..NUM_FLUSH { + let mf = st + .build_flush( + &format!("{i}.l"), + &format!("{i}.m"), + gen_builder(i as u64, NUM_KV, NUM_REGION), + ) + .await; + mfs.push(mf); + } + mfs + } + + fn as_input(l: &LogFile) -> Input { + Subcompaction::singleton(l.clone()).inputs.pop().unwrap() + } + + #[tokio::test] + async fn test_loading() { + let st = TmpStorage::create(); + let m = construct_storage(&st).await; + + let so = Source::new(st.storage().clone()); + for epoch in 0..NUM_FLUSH { + for seg in 0..NUM_REGION { + let input = as_input(&m[epoch].physical_files[0].files[seg]); + let mut i = 0; + let mut stat = LoadStatistic::default(); + so.load(input, Some(&mut stat), |k, v| { + assert_eq!( + k, + crate::test_util::sow((1, (seg * NUM_KV + i) as i64, epoch as u64)) + ); + assert_eq!(v, format!("v@{epoch}").as_bytes()); + i += 1; + }) + .await + .unwrap(); + assert_eq!(stat.files_in, 1); + assert_eq!(stat.keys_in, 10); + assert_eq!(stat.logical_key_bytes_in, 350); + assert_eq!(stat.logical_value_bytes_in, 30); + assert_eq!(stat.error_during_downloading, 0); + } + } + } +} diff --git a/components/compact-log-backup/src/statistic.rs b/components/compact-log-backup/src/statistic.rs new file mode 100644 index 00000000000..1c09cc2e6ab --- /dev/null +++ b/components/compact-log-backup/src/statistic.rs @@ -0,0 +1,216 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::time::Duration; + +use chrono::{DateTime, Local}; +use derive_more::{Add, AddAssign}; +use serde::Serialize; + +/// The statistic of an [`Execution`]. +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct CompactLogBackupStatistic { + /// When we start the execution? + pub start_time: DateTime, + /// When it ends? + pub end_time: DateTime, + /// How many time we spent for the whole execution? + pub time_taken: Duration, + /// From which host we executed this compaction? + pub exec_by: String, + + // Summary of statistics. + pub load_stat: LoadStatistic, + pub load_meta_stat: LoadMetaStatistic, + pub collect_subcompactions_stat: CollectSubcompactionStatistic, + pub subcompact_stat: SubcompactStatistic, + pub prometheus: prom::SerAll, +} + +/// The statistic of loading metadata of compactions' source files. +#[derive(Default, Debug, Add, AddAssign, Clone, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct LoadMetaStatistic { + /// How many meta files read? + pub meta_files_in: u64, + /// How many bytes read from remote, physically? + pub physical_bytes_loaded: u64, + /// How many physical data files' metadata we have processed? + pub physical_data_files_in: u64, + /// How many logical data files' (segments') metadata we have processed? + pub logical_data_files_in: u64, + /// How many time spent for loading remote files? + pub load_file_duration: Duration, + /// How many prefetch task spawned? + pub prefetch_task_emitted: u64, + /// How many spawned prefetch task finished? + pub prefetch_task_finished: u64, + /// How many errors happened during fetching from remote? + pub error_during_downloading: u64, +} + +/// The statistic of loading data files for a subcompaction. +#[derive(Default, Debug, Add, AddAssign, Clone, Serialize)] +#[serde(rename_all = "kebab-case")] + +pub struct LoadStatistic { + /// How many logical "files" we have loaded? + pub files_in: u64, + /// How many keys we have loaded? + pub keys_in: u64, + /// How many bytes we fetched from network, physically? + pub physical_bytes_in: u64, + /// How many bytes the keys we loaded have, in their original form? + pub logical_key_bytes_in: u64, + /// How many bytes the values we loaded have, without compression? + pub logical_value_bytes_in: u64, + /// How many errors happened during fetching from remote? + pub error_during_downloading: u64, +} + +/// The statistic of executing a subcompaction. +#[derive(Default, Debug, Add, AddAssign, Clone, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct SubcompactStatistic { + /// How many keys we have yielded? + pub keys_out: u64, + /// How many bytes we have yielded, physically? + pub physical_bytes_out: u64, + /// How many bytes that all the keys we have yielded uses? + pub logical_key_bytes_out: u64, + /// How many bytes that all the values we have yielded uses? + pub logical_value_bytes_out: u64, + + /// How many time we spent for writing the SST output? + pub write_sst_duration: Duration, + /// How many time we spent for reading the source of this subcompaction? + pub load_duration: Duration, + /// How many time we spent for sorting the inputs? + pub sort_duration: Duration, + /// How many time we spent for putting the artifacts to external storage? + pub save_duration: Duration, + + /// How many subcompactions generates no thing? + pub empty_generation: u64, +} + +/// The statistic of collecting subcompactions. +#[derive(Default, Debug, Add, AddAssign, Clone, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct CollectSubcompactionStatistic { + /// How many files we processed? + pub files_in: u64, + /// How many bytes the files we have processed have? + pub bytes_in: u64, + /// How many bytes the compactions we emitted need to handle? + pub bytes_out: u64, + /// How many compactions we have emitted? + pub compactions_out: u64, + + /// How many files we have filtered out due to the TS range? + pub files_filtered_out: u64, +} + +pub mod prom { + use prometheus::*; + use serde::{ser::SerializeMap, Serialize}; + + struct ShowPromHist<'a>(&'a Histogram); + + impl<'a> Serialize for ShowPromHist<'a> { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use ::prometheus::core::Metric; + let proto = self.0.metric(); + let hist = proto.get_histogram(); + let mut m = serializer.serialize_map(Some(hist.get_bucket().len() + 2))?; + m.serialize_entry("count", &hist.get_sample_count())?; + m.serialize_entry("sum", &hist.get_sample_sum())?; + for bucket in hist.get_bucket() { + m.serialize_entry( + &format!("le_{}", bucket.get_upper_bound()), + &bucket.get_cumulative_count(), + )?; + } + m.end() + } + } + + /// SerAll is a placeholder type that when being serialized, it reads + /// metrics registered to prometheus from the module and then serialize them + /// to the result. + #[derive(Clone, Copy, Debug, Default)] + pub struct SerAll; + + impl Serialize for SerAll { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + let histograms = [ + ( + "read_meta_duration", + &*COMPACT_LOG_BACKUP_READ_META_DURATION, + ), + ( + "load_a_file_duration", + &*COMPACT_LOG_BACKUP_LOAD_A_FILE_DURATION, + ), + ("load_duration", &*COMPACT_LOG_BACKUP_LOAD_DURATION), + ("sort_duration", &*COMPACT_LOG_BACKUP_SORT_DURATION), + ("save_duration", &*COMPACT_LOG_BACKUP_SAVE_DURATION), + ( + "write_sst_duration", + &*COMPACT_LOG_BACKUP_WRITE_SST_DURATION, + ), + ]; + + let mut m = serializer.serialize_map(Some(histograms.len()))?; + for (name, histogram) in histograms.iter() { + m.serialize_entry(name, &ShowPromHist(histogram))?; + } + m.end() + } + } + + lazy_static::lazy_static! { + // ==== The following metrics will be collected directly in the call site. + pub static ref COMPACT_LOG_BACKUP_READ_META_DURATION: Histogram = register_histogram!( + "compact_log_backup_read_meta_duration", + "The duration of reading meta files.", + exponential_buckets(0.001, 2.0, 13).unwrap() + ).unwrap(); + + pub static ref COMPACT_LOG_BACKUP_LOAD_A_FILE_DURATION: Histogram = register_histogram!( + "compact_log_backup_load_a_file_duration", + "The duration of loading a log file.", + exponential_buckets(0.001, 2.0, 13).unwrap() + ).unwrap(); + + // ==== The following metrics will be collected in the hooks. + pub static ref COMPACT_LOG_BACKUP_LOAD_DURATION: Histogram = register_histogram!( + "compact_log_backup_load_duration", + "The duration of loading log all log files for a compaction.", + exponential_buckets(0.1, 1.5, 13).unwrap() + ).unwrap(); + + pub static ref COMPACT_LOG_BACKUP_SORT_DURATION: Histogram = register_histogram!( + "compact_log_backup_sort_duration", + "The duration of sorting contents.", + exponential_buckets(0.1, 1.5, 13).unwrap() + ).unwrap(); + + pub static ref COMPACT_LOG_BACKUP_SAVE_DURATION: Histogram = register_histogram!( + "compact_log_backup_save_duration", + "The duration of saving log files.", + exponential_buckets(0.01, 2.0, 13).unwrap() + ).unwrap(); + + pub static ref COMPACT_LOG_BACKUP_WRITE_SST_DURATION: Histogram = register_histogram!( + "compact_log_backup_write_sst_duration", + "The duration of writing SST files.", + exponential_buckets(0.01, 2.0, 13).unwrap() + ).unwrap(); + } +} diff --git a/components/compact-log-backup/src/storage.rs b/components/compact-log-backup/src/storage.rs new file mode 100644 index 00000000000..24311d69b45 --- /dev/null +++ b/components/compact-log-backup/src/storage.rs @@ -0,0 +1,719 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. +use std::{ + collections::VecDeque, + future::Future, + ops::Not, + path::Path, + pin::Pin, + sync::Arc, + task::{ready, Context, Poll}, +}; + +use derive_more::Display; +use external_storage::{BlobObject, ExternalStorage, UnpinReader}; +use futures::{ + future::{FusedFuture, FutureExt, TryFutureExt}, + io::{AsyncReadExt, Cursor}, + stream::{Fuse, FusedStream, StreamExt, TryStreamExt}, +}; +use kvproto::{ + brpb::{self, FileType, Migration}, + metapb::RegionEpoch, +}; +use prometheus::core::{Atomic, AtomicU64}; +use tikv_util::{ + retry_expr, + stream::{JustRetry, RetryExt}, + time::Instant, +}; +use tokio_stream::Stream; +use tracing::{span::Entered, Span}; +use tracing_active_tree::frame; + +use super::{ + errors::{Error, Result}, + statistic::LoadMetaStatistic, +}; +use crate::{compaction::EpochHint, errors::ErrorKind, util}; + +pub const METADATA_PREFIX: &str = "v1/backupmeta"; +pub const COMPACTION_OUT_PREFIX: &str = "compaction_out"; +pub const MIGRATION_PREFIX: &str = "v1/migrations"; +pub const LOCK_PREFIX: &str = "v1/LOCK"; + +/// The in-memory presentation of the message [`brpb::Metadata`]. +#[derive(Debug, PartialEq, Eq)] +pub struct MetaFile { + pub name: Arc, + pub physical_files: Vec, + pub min_ts: u64, + pub max_ts: u64, +} + +impl From for MetaFile { + fn from(value: brpb::Metadata) -> Self { + Self::from_file(Arc::from(":memory:"), value) + } +} + +impl MetaFile { + pub fn from_file(name: Arc, mut meta_file: brpb::Metadata) -> Self { + let mut log_files = vec![]; + let min_ts = meta_file.min_ts; + let max_ts = meta_file.max_ts; + + // NOTE: perhaps we also need consider non-grouped backup meta here? + for mut group in meta_file.take_file_groups().into_iter() { + let name = Arc::from(group.path.clone().into_boxed_str()); + let mut g = PhysicalLogFile { + size: group.length, + name: Arc::clone(&name), + files: vec![], + }; + for log_file in group.take_data_files_info().into_iter() { + g.files.push(LogFile::from_pb(Arc::clone(&name), log_file)) + } + log_files.push(g); + } + + Self { + name, + physical_files: log_files, + min_ts, + max_ts, + } + } +} + +impl MetaFile { + pub fn into_logs(self) -> impl Iterator { + self.physical_files + .into_iter() + .flat_map(|g| g.files.into_iter()) + } +} + +/// The in-memory presentation of the message [`brpb::DataFileGroup`]. +#[derive(Debug, PartialEq, Eq)] +pub struct PhysicalLogFile { + pub size: u64, + pub name: Arc, + pub files: Vec, +} + +/// An [`RegionEpoch`] without protocol buffer fields and comparable. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Epoch { + pub version: u64, + pub conf_ver: u64, +} + +impl From for Epoch { + fn from(value: RegionEpoch) -> Self { + Self { + version: value.version, + conf_ver: value.conf_ver, + } + } +} + +impl From for RegionEpoch { + fn from(value: Epoch) -> Self { + let mut v = Self::new(); + v.version = value.version; + v.conf_ver = value.conf_ver; + v + } +} + +/// The in-memory presentation of the message [`brpb::DataFileInfo`]. +/// The difference is that all `Vec` are replaced with `Arc<[u8]>` to save +/// memory. +#[derive(Debug, Clone, PartialEq, Eq)] + +pub struct LogFile { + pub id: LogFileId, + pub file_real_size: u64, + pub number_of_entries: i64, + pub crc64xor: u64, + pub region_id: u64, + pub cf: &'static str, + pub min_ts: u64, + pub max_ts: u64, + pub min_start_ts: u64, + pub min_key: Arc<[u8]>, + pub max_key: Arc<[u8]>, + pub region_start_key: Option>, + pub region_end_key: Option>, + pub region_epoches: Option>, + pub is_meta: bool, + pub ty: FileType, + pub compression: brpb::CompressionType, + pub table_id: i64, + pub resolved_ts: u64, + pub sha256: Arc<[u8]>, +} + +impl LogFile { + pub fn hacky_key_value_size(&self) -> u64 { + const HEADER_SIZE_PER_ENTRY: u64 = std::mem::size_of::() as u64 * 2; + self.file_real_size - HEADER_SIZE_PER_ENTRY * self.number_of_entries as u64 + } + + pub fn epoch_hints(&self) -> impl Iterator + '_ { + self.region_epoches.iter().flat_map(|epoches| { + self.region_start_key.iter().flat_map(|sk| { + self.region_end_key.iter().flat_map(|ek| { + epoches.iter().map(|v| EpochHint { + region_epoch: *v, + start_key: Arc::clone(sk), + end_key: Arc::clone(ek), + }) + }) + }) + }) + } +} + +/// The identity of a log file. +/// A log file can be located in the storage with this. +#[derive(Clone, Display, Eq, PartialEq)] +#[display(fmt = "{}@{}+{}", name, offset, length)] +pub struct LogFileId { + pub name: Arc, + pub offset: u64, + pub length: u64, +} + +impl std::fmt::Debug for LogFileId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Id") + .field(&self.name) + .field(&format_args!("@{}+{}", self.offset, self.length)) + .finish() + } +} + +/// Extra config for loading metadata. +pub struct LoadFromExt<'a> { + /// Max number of concurrent fetching from remote tasks. + pub max_concurrent_fetch: usize, + /// The [`tracing::Span`] of loading remote tasks. + /// This span will be entered when fetching the remote tasks. + /// This span will be closed when all metadata loaded. + pub loading_content_span: Option, + /// The prefix of metadata in the external storage. + /// By default it is `v1/backupmeta`. + pub meta_prefix: &'a str, +} + +impl<'a> LoadFromExt<'a> { + fn enter_load_span(&self) -> Option> { + self.loading_content_span.as_ref().map(|span| span.enter()) + } +} + +impl<'a> Default for LoadFromExt<'a> { + fn default() -> Self { + Self { + max_concurrent_fetch: 16, + loading_content_span: None, + meta_prefix: METADATA_PREFIX, + } + } +} + +/// The storage of log backup. +/// +/// For now, it supports load all metadata only, by consuming the stream. +/// [`StreamyMetaStorage`] is a [`Stream`] that yields `Result`. +pub struct StreamMetaStorage<'a> { + // NOTE: we want to keep the order of incoming meta files, so calls with the same argument can + // generate the same compactions. + prefetch: VecDeque< + Prefetch> + 'a>>>, + >, + ext_storage: &'a dyn ExternalStorage, + ext: LoadFromExt<'a>, + stat: LoadMetaStatistic, + + files: Fuse> + 'a>>>, +} + +/// A future that stores its result for future use when completed. +/// +/// This wraps a [`Future`](std::future::Future) yields `T` to a future yields +/// nothing. Once the future is terminaled (resolved), the content can then be +/// fetch by `must_fetch`. +#[pin_project::pin_project(project = ProjPrefetch)] +enum Prefetch { + Polling(#[pin] F), + Ready(::Output), +} + +impl Prefetch { + fn must_fetch(self) -> ::Output { + match self { + Prefetch::Ready(v) => v, + _ => panic!("must_cached call but the future not ready"), + } + } + + fn new(f: F) -> Self { + Self::Polling(f) + } +} + +impl Future for Prefetch { + type Output = (); + + fn poll( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll { + match self.as_mut().project() { + ProjPrefetch::Polling(fut) => { + let resolved = ready!(fut.poll(cx)); + unsafe { + // SAFETY: we won't poll it anymore. + *self.get_unchecked_mut() = Prefetch::Ready(resolved); + } + ().into() + } + ProjPrefetch::Ready(_) => std::task::Poll::Pending, + } + } +} + +impl FusedFuture for Prefetch { + fn is_terminated(&self) -> bool { + match self { + Prefetch::Polling(_) => false, + Prefetch::Ready(_) => true, + } + } +} + +impl<'a> Stream for StreamMetaStorage<'a> { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + if self.prefetch.is_empty() { + return self.poll_fetch_or_finish(cx); + } + + let first_result = self.poll_first_prefetch(cx); + match first_result { + Poll::Ready(item) => Some(item).into(), + Poll::Pending => self.poll_fetch_or_finish(cx), + } + } +} + +impl<'a> StreamMetaStorage<'a> { + /// Poll the next event. + fn poll_fetch_or_finish(&mut self, cx: &mut Context<'_>) -> Poll>> { + loop { + // No more space for prefetching. + if self.prefetch.len() >= self.ext.max_concurrent_fetch { + return Poll::Pending; + } + if self.files.is_terminated() { + self.ext.loading_content_span.take(); + if self.prefetch.is_empty() { + return None.into(); + } else { + return Poll::Pending; + } + } + let res = { + let _enter = self.ext.enter_load_span(); + self.files.next().poll_unpin(cx) + }; + match res { + Poll::Ready(Some(load)) => { + let mut fut = + Prefetch::new(MetaFile::load_from(self.ext_storage, load?).boxed_local()); + // start the execution of this future. + let poll = fut.poll_unpin(cx); + if poll.is_ready() { + // We need to check this in next run. + cx.waker().wake_by_ref(); + } + self.stat.prefetch_task_emitted += 1; + self.prefetch.push_back(fut); + } + Poll::Ready(None) => continue, + Poll::Pending => return Poll::Pending, + } + } + } + + fn poll_first_prefetch(&mut self, cx: &mut Context<'_>) -> Poll> { + for fut in &mut self.prefetch { + if !fut.is_terminated() { + let _ = fut.poll_unpin(cx); + } + } + if self.prefetch[0].is_terminated() { + let file = self.prefetch.pop_front().unwrap().must_fetch(); + match file { + Ok((file, stat)) => { + self.stat += stat; + self.stat.meta_files_in += 1; + self.stat.prefetch_task_finished += 1; + Ok(file).into() + } + Err(err) => Err(err.attach_current_frame()).into(), + } + } else { + Poll::Pending + } + } + + pub fn take_statistic(&mut self) -> LoadMetaStatistic { + std::mem::take(&mut self.stat) + } + + /// Streaming metadata from an external storage. + /// Defaultly this will fetch metadata from `v1/backupmeta`, you may + /// override this in `ext`. + pub fn load_from_ext(s: &'a dyn ExternalStorage, ext: LoadFromExt<'a>) -> Self { + let files = s.iter_prefix(ext.meta_prefix).fuse(); + Self { + prefetch: VecDeque::new(), + files, + ext_storage: s, + ext, + stat: LoadMetaStatistic::default(), + } + } + + /// Count the number of the metadata prefix. + pub async fn count_objects(s: &'a dyn ExternalStorage) -> std::io::Result { + let mut n = 0; + // NOTE: should we allow user to specify the prefix? + let mut items = s.iter_prefix(METADATA_PREFIX); + while items.try_next().await?.is_some() { + n += 1 + } + Ok(n) + } +} + +impl MetaFile { + #[tracing::instrument(skip_all, fields(blob=%blob))] + async fn load_from( + s: &dyn ExternalStorage, + blob: BlobObject, + ) -> Result<(Self, LoadMetaStatistic)> { + use protobuf::Message; + + let _t = crate::statistic::prom::COMPACT_LOG_BACKUP_READ_META_DURATION.start_coarse_timer(); + + let mut stat = LoadMetaStatistic::default(); + let begin = Instant::now(); + + let error_cnt = Arc::new(AtomicU64::new(0)); + let error_cnt2 = Arc::clone(&error_cnt); + let ext = RetryExt::default().with_fail_hook(move |_err| { + error_cnt.inc_by(1); + }); + + let loading_file = tikv_util::stream::retry_all_ext( + || async { + let mut content = vec![]; + let n = s.read(&blob.key).read_to_end(&mut content).await?; + std::io::Result::Ok((n, content)) + }, + ext, + ); + let (n, content) = frame!(loading_file) + .await + .map_err(|err| Error::from(err).message(format_args!("reading {}", blob.key)))?; + stat.physical_bytes_loaded += n as u64; + stat.error_during_downloading += error_cnt2.get(); + + let mut meta_file = kvproto::brpb::Metadata::new(); + meta_file.merge_from_bytes(&content)?; + let name = Arc::from(blob.key.into_boxed_str()); + let result = Self::from_file(name, meta_file); + + stat.physical_data_files_in += result.physical_files.len() as u64; + stat.logical_data_files_in += result + .physical_files + .iter() + .map(|v| v.files.len() as u64) + .sum::(); + stat.load_file_duration += begin.saturating_elapsed(); + + Ok((result, stat)) + } +} + +impl LogFile { + fn from_pb(host_file: Arc, mut pb_info: brpb::DataFileInfo) -> Self { + let region_epoches = pb_info.region_epoch.is_empty().not().then(|| { + pb_info + .region_epoch + .iter() + .cloned() + .map(From::from) + .collect() + }); + Self { + id: LogFileId { + name: host_file, + offset: pb_info.range_offset, + length: pb_info.range_length, + }, + file_real_size: pb_info.length, + region_id: pb_info.region_id as _, + cf: util::cf_name(&pb_info.cf), + max_ts: pb_info.max_ts, + min_ts: pb_info.min_ts, + max_key: Arc::from(pb_info.take_end_key().into_boxed_slice()), + min_key: Arc::from(pb_info.take_start_key().into_boxed_slice()), + region_start_key: pb_info + .region_epoch + .is_empty() + .not() + .then(|| Arc::from(pb_info.take_region_start_key().into_boxed_slice())), + region_end_key: pb_info + .region_epoch + .is_empty() + .not() + .then(|| Arc::from(pb_info.take_region_end_key().into_boxed_slice())), + is_meta: pb_info.is_meta, + min_start_ts: pb_info.min_begin_ts_in_default_cf, + ty: pb_info.r_type, + crc64xor: pb_info.crc64xor, + number_of_entries: pb_info.number_of_entries, + sha256: Arc::from(pb_info.take_sha256().into_boxed_slice()), + resolved_ts: pb_info.resolved_ts, + table_id: pb_info.table_id, + compression: pb_info.compression_type, + region_epoches, + } + } + + pub fn into_pb(self) -> brpb::DataFileInfo { + let mut pb = brpb::DataFileInfo::new(); + pb.range_offset = self.id.offset; + pb.range_length = self.id.length; + pb.length = self.file_real_size; + pb.region_id = self.region_id as _; + pb.cf = self.cf.to_owned(); + pb.max_ts = self.max_ts; + pb.min_ts = self.min_ts; + pb.set_end_key(self.max_key.to_vec()); + pb.set_start_key(self.min_key.to_vec()); + pb.is_meta = self.is_meta; + pb.min_begin_ts_in_default_cf = self.min_start_ts; + pb.r_type = self.ty; + pb.crc64xor = self.crc64xor; + pb.number_of_entries = self.number_of_entries; + pb.set_sha256(self.sha256.to_vec()); + pb.resolved_ts = self.resolved_ts; + pb.table_id = self.table_id; + pb.compression_type = self.compression; + pb.set_region_start_key( + self.region_start_key + .map(|v| v.to_vec()) + .unwrap_or_default(), + ); + pb.set_region_end_key(self.region_end_key.map(|v| v.to_vec()).unwrap_or_default()); + pb.set_region_epoch( + self.region_epoches + .map(|v| v.iter().cloned().map(From::from).collect()) + .unwrap_or_default(), + ); + pb + } +} + +pub struct MigartionStorageWrapper<'a> { + storage: &'a dyn ExternalStorage, + migartions_prefix: &'a str, +} + +impl<'a> MigartionStorageWrapper<'a> { + pub fn new(storage: &'a dyn ExternalStorage) -> Self { + Self { + storage, + migartions_prefix: MIGRATION_PREFIX, + } + } + + pub async fn write(&self, migration: Migration) -> Result<()> { + use protobuf::Message; + let id = self.largest_id().await?; + // Note: perhaps we need to verify that there isn't concurrency writing in the + // future. + let name = name_of_migration(id + 1, &migration); + let bytes = migration.write_to_bytes()?; + retry_expr!( + self.storage + .write( + &format!("{}/{}", self.migartions_prefix, name), + UnpinReader(Box::new(Cursor::new(&bytes))), + bytes.len() as u64 + ) + .map_err(|err| JustRetry(err)) + ) + .await + .map_err(|err| err.0)?; + Ok(()) + } + + pub async fn largest_id(&self) -> Result { + self.storage + .iter_prefix(self.migartions_prefix) + .err_into::() + .map(|v| { + v.and_then(|v| match id_of_migration(&v.key) { + Some(v) => Ok(v), + None => Err(Error::from(ErrorKind::Other(format!( + "the file {} cannot be parsed as a migration", + v + )))), + }) + }) + .try_fold(u64::MIN, |val, new| futures::future::ok(val.max(new))) + .await + } +} + +pub fn name_of_migration(id: u64, m: &Migration) -> String { + format!("{:08}_{:016X}.mgrt", id, hash_migration(m)) +} + +pub fn id_of_migration(name: &str) -> Option { + let file_name = Path::new(name).file_name()?.to_string_lossy(); + if file_name == "BASE" { + return Some(0); + } + if file_name.len() < 8 { + return None; + } + file_name[..8].parse::().ok() +} + +pub fn hash_migration(m: &Migration) -> u64 { + let mut crc64 = 0; + for compaction in m.compactions.iter() { + crc64 ^= compaction.artifacts_hash; + } + for meta_edit in m.edit_meta.iter() { + crc64 ^= hash_meta_edit(meta_edit); + } + crc64 ^ m.truncated_to +} + +pub fn hash_meta_edit(meta_edit: &brpb::MetaEdit) -> u64 { + let mut crc64 = 0; + for df in meta_edit.delete_physical_files.iter() { + let mut digest = crc64fast::Digest::new(); + digest.write(df.as_bytes()); + crc64 ^= digest.sum64(); + } + for spans in meta_edit.delete_logical_files.iter() { + let mut crc = crc64fast::Digest::new(); + crc.write(spans.get_path().as_bytes()); + for span in spans.get_spans() { + let mut crc = crc.clone(); + crc.write(&span.offset.to_le_bytes()); + crc.write(&span.length.to_le_bytes()); + crc64 ^= crc.sum64(); + } + } + let mut crc = crc64fast::Digest::new(); + crc.write(&[meta_edit.destruct_self as u8]); + crc64 ^ crc.sum64() +} + +#[cfg(test)] +mod test { + use futures::stream::TryStreamExt; + + use super::{LoadFromExt, MetaFile, StreamMetaStorage}; + use crate::test_util::{gen_step, KvGen, LogFileBuilder, TmpStorage}; + + async fn construct_storage( + st: &TmpStorage, + meta_path: impl Fn(i64) -> String, + log_path: impl Fn(i64) -> String, + ) -> Vec { + let gen_builder = |batch, num| { + (num * batch..num * (batch + 1)) + .map(move |v| { + KvGen::new(gen_step(1, v + batch * num, num), |_| b"val".to_vec()).take(2) + }) + .enumerate() + .map(|(n, it)| { + let mut b = LogFileBuilder::new(|v| v.region_id = n as u64); + for kv in it { + b.add_encoded(&kv.key, &kv.value) + } + b + }) + }; + let mut mfs = vec![]; + for i in 0..5 { + let mf = st + .build_flush(&log_path(i), &meta_path(i), gen_builder(i, 10)) + .await; + mfs.push(mf); + } + mfs + } + + #[tokio::test] + async fn test_load_from_storage() { + let st = TmpStorage::create(); + let mfs = construct_storage( + &st, + |i| format!("v1/backupmeta/the-meta-{}.bin", i), + |i| format!("out/the-log-{}.bin", i), + ) + .await; + + tracing_active_tree::init(); + + let mfs = &mfs; + let st = &st; + let test_for_concurrency = |n| async move { + let mut ext = LoadFromExt::default(); + ext.max_concurrent_fetch = n; + let sst = StreamMetaStorage::load_from_ext(st.storage().as_ref(), ext); + let mut result = sst.try_collect::>().await.unwrap(); + result.sort_by(|a, b| a.name.cmp(&b.name)); + assert_eq!(&result, mfs); + }; + + test_for_concurrency(1).await; + test_for_concurrency(2).await; + test_for_concurrency(16).await; + } + + #[tokio::test] + async fn test_different_prefix() { + let st = TmpStorage::create(); + let mfs = construct_storage( + &st, + |i| format!("my-fantastic-meta-dir/{}.meta", i), + |i| format!("{}.log", i), + ) + .await; + + let mut ext = LoadFromExt::default(); + ext.meta_prefix = "my-fantastic-meta-dir"; + let sst = StreamMetaStorage::load_from_ext(st.storage().as_ref(), ext); + let mut result = sst.try_collect::>().await.unwrap(); + result.sort_by(|a, b| a.name.cmp(&b.name)); + assert_eq!(result, mfs); + } +} diff --git a/components/compact-log-backup/src/test_util.rs b/components/compact-log-backup/src/test_util.rs new file mode 100644 index 00000000000..7f3763ceed8 --- /dev/null +++ b/components/compact-log-backup/src/test_util.rs @@ -0,0 +1,540 @@ +#![cfg(test)] + +use std::{ + collections::BTreeMap, + io::{Cursor, Write}, + ops::Not, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use engine_rocks::RocksEngine; +use engine_traits::{IterOptions, Iterator as _, RefIterable, SstExt}; +use external_storage::ExternalStorage; +use file_system::sha256; +use futures::{ + io::{AsyncReadExt, Cursor as ACursor}, + stream::StreamExt, +}; +use keys::origin_key; +use kvproto::brpb::{self, Metadata}; +use protobuf::{parse_from_bytes, Message}; +use tempdir::TempDir; +use tidb_query_datatype::codec::table::encode_row_key; +use tikv_util::codec::stream_event::EventEncoder; +use txn_types::Key; + +use crate::{ + compaction::{ + exec::{SubcompactExt, SubcompactionExec}, + Subcompaction, SubcompactionResult, + }, + errors::{OtherErrExt, Result}, + storage::{id_of_migration, Epoch, LogFile, LogFileId, MetaFile}, +}; + +#[derive(Debug, PartialEq, Eq)] +pub struct Kv { + pub key: Vec, + pub value: Vec, +} + +/// A builder for fake [`LogFile`]. +pub struct LogFileBuilder { + pub name: String, + pub region_id: u64, + pub cf: &'static str, + pub ty: brpb::FileType, + pub is_meta: bool, + pub region_start_key: Option>, + pub region_end_key: Option>, + pub region_epoches: Vec, + + content: zstd::Encoder<'static, Cursor>>, + min_ts: u64, + max_ts: u64, + min_key: Vec, + max_key: Vec, + number_of_entries: u64, + crc64xor: u64, + compression: brpb::CompressionType, + file_real_size: u64, +} + +/// A structure for "compact" logs simply sort and dedup its input. +#[derive(Default, Clone)] +pub struct CompactInMem { + collect: Arc, Vec>>>, +} + +impl CompactInMem { + /// Wrap a iterator and add its content to the compact buffer. + pub fn tap_on<'it>( + &self, + it: impl Iterator + 'it, + ) -> impl Iterator + 'it { + RecordSorted { + target: self.clone(), + inner: it, + } + } + + /// Wrap a iterator and add its content to the compact buffer. + /// + /// But consume self instead of adding reference counter. + pub fn tap_on_owned<'it>( + self, + it: impl Iterator + 'it, + ) -> impl Iterator + 'it { + RecordSorted { + target: self, + inner: it, + } + } + + /// Get the compacted content from the compact buffer. + /// + /// # Panic + /// + /// Will panic if there are other concurrency writing. + #[track_caller] + pub fn must_iter(&mut self) -> impl Iterator + '_ { + self.collect + .try_lock() + .unwrap() + .iter() + .map(|(k, v)| Kv { + key: k.clone(), + value: v.clone(), + }) + .collect::>() + .into_iter() + } +} + +/// Verify that the content of an SST is the same as the input iterator. +/// +/// Note: `input` should yield keys without 'z' prefix. +#[track_caller] +pub fn verify_the_same( + sst: impl AsRef, + mut input: impl Iterator, +) -> Result<()> { + use engine_traits::SstReader; + + let rd = DB::SstReader::open( + sst.as_ref().to_str().ok_or("non utf-8 path").adapt_err()?, + None, + )?; + + let mut it = rd.iter(IterOptions::default())?; + + it.seek_to_first()?; + let mut n = 0; + while it.valid()? { + n += 1; + let key = it.key(); + let value = it.value(); + let kv = Kv { + key: origin_key(key).to_vec(), + value: value.to_vec(), + }; + match input.next() { + None => return Err("the input iterator has been exhausted").adapt_err(), + Some(ikv) => { + if kv != ikv { + return Err(format!( + "the #{} key isn't equal: input is {:?} while compaction result is {:?}", + n, ikv, kv + )) + .adapt_err(); + } + } + } + it.next()?; + } + if let Some(v) = input.next() { + return Err(format!( + "The input iterator not exhausted, there is one: {:?}", + v + )) + .adapt_err(); + } + Ok(()) +} + +pub struct RecordSorted { + target: CompactInMem, + inner: S, +} + +impl> Iterator for RecordSorted { + type Item = Kv; + + fn next(&mut self) -> Option { + let item = self.inner.next()?; + self.target + .collect + .lock() + .unwrap() + .insert(item.key.clone(), item.value.clone()); + Some(item) + } +} + +pub type KeySeed = ( + i64, // table_id + i64, // handle_id + u64, // ts +); + +pub struct KvGen { + value: Box Vec>, + sources: S, +} + +/// Sow a seed, and return the fruit it grow. +pub fn sow((table_id, handle_id, ts): KeySeed) -> Vec { + Key::from_raw(&encode_row_key(table_id, handle_id)) + .append_ts(ts.into()) + .into_encoded() +} + +impl KvGen { + pub fn new(s: S, value: impl FnMut(KeySeed) -> Vec + 'static) -> Self { + Self { + value: Box::new(value), + sources: s, + } + } +} + +impl> Iterator for KvGen { + type Item = Kv; + + fn next(&mut self) -> Option { + self.sources.next().map(|seed| { + let key = sow(seed); + let value = (self.value)(seed); + Kv { key, value } + }) + } +} + +pub fn gen_step(table_id: i64, start: i64, step: i64) -> impl Iterator { + (0..).map(move |v| (table_id, v * step + start, 42)) +} + +pub fn gen_adjacent_with_ts( + table_id: i64, + offset: usize, + ts: u64, +) -> impl Iterator { + (offset..).map(move |v| (table_id, v as i64, ts)) +} + +pub fn gen_min_max( + table_id: i64, + min_hnd: i64, + max_hnd: i64, + min_ts: u64, + max_ts: u64, +) -> impl Iterator { + [(table_id, min_hnd, min_ts), (table_id, max_hnd, max_ts)].into_iter() +} + +impl LogFileBuilder { + pub fn new(configure: impl FnOnce(&mut Self)) -> Self { + let mut res = Self { + name: "unamed.log".to_owned(), + region_id: 0, + cf: "default", + ty: brpb::FileType::Put, + is_meta: false, + + content: zstd::Encoder::new(Cursor::new(vec![]), 3).unwrap(), + min_ts: 0, + max_ts: 0, + min_key: vec![], + max_key: vec![], + number_of_entries: 0, + crc64xor: 0, + compression: brpb::CompressionType::Zstd, + file_real_size: 0, + region_start_key: None, + region_end_key: None, + region_epoches: vec![], + }; + configure(&mut res); + res + } + + pub fn from_iter(it: impl IntoIterator, configure: impl FnOnce(&mut Self)) -> Self { + let mut res = Self::new(configure); + for kv in it { + res.add_encoded(&kv.key, &kv.value); + } + res + } + + pub fn add_encoded(&mut self, key: &[u8], value: &[u8]) { + let ts = txn_types::Key::decode_ts_from(key) + .expect("key without ts") + .into_inner(); + for part in EventEncoder::encode_event(key, value) { + self.file_real_size += part.as_ref().len() as u64; + self.content.write_all(part.as_ref()).unwrap(); + } + // Update metadata. + self.number_of_entries += 1; + self.min_ts = self.min_ts.min(ts); + self.max_ts = self.max_ts.max(ts); + if self.min_key.is_empty() || key < self.min_key.as_slice() { + self.min_key = key.to_owned(); + } + if self.max_key.is_empty() || key > self.max_key.as_slice() { + self.max_key = key.to_owned(); + } + let mut d = crc64fast::Digest::new(); + d.write(key); + d.write(value); + self.crc64xor ^= d.sum64(); + } + + pub async fn must_save(self, st: &dyn ExternalStorage) -> LogFile { + let (info, content) = self.build(); + let cl = content.len() as u64; + st.write(&info.id.name, ACursor::new(content).into(), cl) + .await + .unwrap(); + + info + } + + pub fn build(self) -> (LogFile, Vec) { + let cnt = self + .content + .finish() + .expect("failed to do zstd compression"); + let file = LogFile { + region_id: self.region_id, + cf: self.cf, + ty: self.ty, + is_meta: self.is_meta, + + min_ts: self.min_ts, + max_ts: self.max_ts, + min_key: Arc::from(self.min_key.into_boxed_slice()), + max_key: Arc::from(self.max_key.into_boxed_slice()), + number_of_entries: self.number_of_entries as i64, + crc64xor: self.crc64xor, + compression: self.compression, + file_real_size: self.file_real_size, + + id: LogFileId { + name: Arc::from(self.name.into_boxed_str()), + offset: 0, + length: cnt.get_ref().len() as u64, + }, + min_start_ts: 0, + table_id: 0, + resolved_ts: 0, + sha256: Arc::from( + sha256(cnt.get_ref()) + .expect("cannot calculate sha256 for file") + .into_boxed_slice(), + ), + region_start_key: self.region_start_key.map(|v| v.into_boxed_slice().into()), + region_end_key: self.region_end_key.map(|v| v.into_boxed_slice().into()), + region_epoches: self + .region_epoches + .is_empty() + .not() + .then(|| self.region_epoches.into_boxed_slice().into()), + }; + (file, cnt.into_inner()) + } +} + +/// Simulating a flush: save all log files and generate a metadata by them. +/// Unlike [`save_many_log_files`], this returns the generated metadata. +pub fn build_many_log_files( + log_files: impl IntoIterator, + mut w: impl Write, +) -> std::io::Result { + let mut md = brpb::Metadata::new(); + md.mut_file_groups().push_default(); + md.set_meta_version(brpb::MetaVersion::V2); + let mut offset = 0; + for log in log_files { + let (mut log_info, content) = log.build(); + w.write_all(&content)?; + log_info.id.offset = offset; + log_info.id.length = content.len() as _; + md.min_ts = md.min_ts.min(log_info.min_ts); + md.max_ts = md.max_ts.max(log_info.max_ts); + + let pb = log_info.into_pb(); + md.mut_file_groups()[0].data_files_info.push(pb); + + offset += content.len() as u64; + } + md.mut_file_groups()[0].set_length(offset); + + Ok(md) +} + +/// Simulating a flush: save all log files and generate a metadata by them. +/// Then save the generated metadata. +pub async fn save_many_log_files( + name: &str, + log_files: impl IntoIterator, + st: &dyn ExternalStorage, +) -> std::io::Result { + let mut w = vec![]; + let mut md = build_many_log_files(log_files, &mut w)?; + let cl = w.len() as u64; + let v = &mut md.file_groups[0]; + v.set_path(name.to_string()); + st.write(name, ACursor::new(w).into(), cl).await?; + Ok(md) +} + +pub struct TmpStorage { + path: Option, + storage: Arc, +} + +impl Drop for TmpStorage { + fn drop(&mut self) { + if std::thread::panicking() { + let path = self.leak(); + eprintln!( + "It seems we are in a failed test case, the temprory storage will be kept at {}", + path.display() + ); + } + } +} + +impl TmpStorage { + pub fn create() -> TmpStorage { + let path = TempDir::new("test").unwrap(); + let storage = external_storage::LocalStorage::new(path.path()).unwrap(); + TmpStorage { + path: Some(path), + storage: Arc::new(storage), + } + } + + /// leak the current external storage. + /// this should only be called once. + pub fn leak(&mut self) -> PathBuf { + self.path.take().unwrap().into_path() + } + + pub fn path(&self) -> &Path { + self.path.as_ref().unwrap().path() + } + + pub fn storage(&self) -> &Arc { + &self.storage + } + + pub fn backend(&self) -> brpb::StorageBackend { + let mut bknd = brpb::StorageBackend::default(); + bknd.set_local({ + let mut loc = brpb::Local::default(); + loc.set_path(self.path().to_string_lossy().into_owned()); + loc + }); + bknd + } +} + +impl TmpStorage { + pub async fn run_subcompaction(&self, c: Subcompaction) -> SubcompactionResult { + self.try_run_subcompaction(c).await.unwrap() + } + + pub async fn try_run_subcompaction(&self, c: Subcompaction) -> Result { + let cw = SubcompactionExec::::default_config(self.storage.clone()); + let ext = SubcompactExt::default(); + cw.run(c, ext).await + } + + #[track_caller] + pub fn verify_result(&self, res: SubcompactionResult, mut cm: CompactInMem) { + let sst_path = self.path().join(&res.meta.sst_outputs[0].name); + res.verify_checksum().unwrap(); + verify_the_same::(sst_path, cm.must_iter()).unwrap(); + } + + pub async fn build_log_file(&self, name: &str, kvs: impl Iterator) -> LogFile { + let mut b = LogFileBuilder::new(|v| v.name = name.to_owned()); + for kv in kvs { + b.add_encoded(&kv.key, &kv.value); + } + b.must_save(self.storage.as_ref()).await + } + + pub async fn build_flush( + &self, + log_path: &str, + meta_path: &str, + builders: impl IntoIterator, + ) -> MetaFile { + let result = save_many_log_files(log_path, builders, self.storage.as_ref()) + .await + .unwrap(); + let content = result.write_to_bytes().unwrap(); + self.storage + .write( + meta_path, + ACursor::new(&content).into(), + content.len() as u64, + ) + .await + .unwrap(); + MetaFile::from_file(Arc::from(meta_path), result) + } + + pub async fn load_migrations(&self) -> crate::Result> { + let pfx = "v1/migrations"; + let mut stream = self.storage.iter_prefix(pfx); + let mut output = vec![]; + while let Some(file) = stream.next().await { + let file = file?; + let mut content = vec![]; + self.storage + .read(&file.key) + .read_to_end(&mut content) + .await?; + let mig = parse_from_bytes::(&content)?; + let id = id_of_migration(&file.key).unwrap_or(0); + output.push((id, mig)); + } + Ok(output) + } + + pub async fn load_subcompactions( + &self, + pfx: &str, + ) -> crate::Result> { + let mut stream = self.storage.iter_prefix(pfx); + let mut output = vec![]; + while let Some(file) = stream.next().await { + let file = file?; + let mut content = vec![]; + self.storage + .read(&file.key) + .read_to_end(&mut content) + .await?; + let mig = parse_from_bytes::(&content)?; + for c in mig.subcompactions.into_iter() { + output.push(c) + } + } + Ok(output) + } +} diff --git a/components/compact-log-backup/src/util.rs b/components/compact-log-backup/src/util.rs new file mode 100644 index 00000000000..4bfd31055ff --- /dev/null +++ b/components/compact-log-backup/src/util.rs @@ -0,0 +1,197 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{fmt::Display, future::Future, task::Poll}; + +use engine_traits::{CfName, SstCompressionType, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; +use external_storage::ExternalStorage; + +/// A helper for cooperative concurrency. +/// +/// When doing a tight loop (say, traversing a huge slice) that may use many +/// CPU time, you may inject [`Cooperate::step`] to each run. This will try +/// to yield the current task periodically so other coroutines can be +/// executed. +pub struct Cooperate { + work_count: usize, + yield_every: usize, +} + +pub struct Step(bool); + +impl Future for Step { + type Output = (); + + fn poll( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll { + if self.0 { + cx.waker().wake_by_ref(); + self.0 = false; + Poll::Pending + } else { + Poll::Ready(()) + } + } +} + +impl Default for Cooperate { + fn default() -> Self { + /// The default operations can be done before yielding the current + /// thread to another coroutine. The value `4096` is randomly + /// chosen. + /// + /// NOTE: Perhaps we can obtain better performance by tuning this or + /// pinning some critial tasks to a CPU. + const DEFAULT_YIELD_INTERVAL: usize = 4096; + Self::new(DEFAULT_YIELD_INTERVAL) + } +} + +impl Cooperate { + /// Create a new [`Cooperate`] that yields when a fixed number of works + /// done. + pub fn new(yield_every: usize) -> Self { + Self { + work_count: 0, + yield_every, + } + } + + /// Finishing one tiny task. This will yield the current carrier thread + /// when needed. + pub fn step(&mut self) -> Step { + self.work_count += 1; + if self.work_count > self.yield_every { + self.work_count = 0; + Step(true) + } else { + Step(false) + } + } +} + +/// Select any future completes from a vector. +/// The resolved future will be removed from the vector by `swap_remove`, +/// hence the order of execution may vary. Prefer using this for joining +/// unordered background tasks. +pub fn select_vec<'a, T, F>(v: &'a mut Vec) -> impl Future + 'a +where + // Note: this `Unpin` might be removed, as the returned future have + // a mutable reference to the vector, the vector itself cannot be moved. + F: Future + Unpin + 'a, +{ + use futures::FutureExt; + + futures::future::poll_fn(|cx| { + for (idx, fut) in v.iter_mut().enumerate() { + match fut.poll_unpin(cx) { + std::task::Poll::Ready(item) => { + let _ = v.swap_remove(idx); + return item.into(); + } + std::task::Poll::Pending => continue, + } + } + std::task::Poll::Pending + }) +} + +/// The extra config for [`execute_all_ext`]. +pub struct ExecuteAllExt { + /// The max number of concurrent tasks. + pub max_concurrency: usize, +} + +impl Default for ExecuteAllExt { + fn default() -> Self { + Self { + max_concurrency: 16, + } + } +} + +/// Execute a set of tasks concurrently. +/// +/// You may restrict the max concurrency by the `ext`. +#[tracing::instrument(skip_all, fields(size = futs.len()))] +pub async fn execute_all_ext(futs: Vec, ext: ExecuteAllExt) -> Result, E> +where + F: Future> + Unpin, +{ + let mut pending_futures = vec![]; + let mut result = Vec::with_capacity(futs.len()); + for fut in futs { + pending_futures.push(fut); + if pending_futures.len() >= ext.max_concurrency { + result.push(select_vec(&mut pending_futures).await?); + } + } + result.append(&mut futures::future::try_join_all(pending_futures.into_iter()).await?); + Ok(result) +} + +/// Transform a str to a [`engine_traits::CfName`]\(`&'static str`). +/// If the argument isn't one of `""`, `"DEFAULT"`, `"default"`, `"WRITE"`, +/// `"write"`, `"LOCK"`, `"lock"`... returns "ERR_CF". (Which would be +/// ignored then.) +pub fn cf_name(s: &str) -> CfName { + match s { + "" | "DEFAULT" | "default" => CF_DEFAULT, + "WRITE" | "write" => CF_WRITE, + "LOCK" | "lock" => CF_LOCK, + "RAFT" | "raft" => CF_RAFT, + _ => "ERR_CF", + } +} + +/// A wrapper that make a `u64` always be displayed as {:016X}. +#[derive(Debug)] +struct HexU64(u64); + +impl Display for HexU64 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:016X}", self.0) + } +} + +pub fn aligned_u64(v: u64) -> impl Display { + HexU64(v) +} + +pub fn compression_type_to_u8(c: SstCompressionType) -> u8 { + match c { + SstCompressionType::Lz4 => 0, + SstCompressionType::Snappy => 1, + SstCompressionType::Zstd => 2, + } +} + +#[derive(Eq, PartialEq)] +pub struct EndKey<'a>(pub &'a [u8]); + +impl<'a> PartialOrd for EndKey<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + use std::cmp::Ordering::*; + match (self, other) { + (EndKey(b""), EndKey(b"")) => Some(Equal), + (EndKey(b""), _) => Some(Greater), + (_, EndKey(b"")) => Some(Less), + (EndKey(a), EndKey(b)) => Some(a.cmp(b)), + } + } +} + +impl<'a> Ord for EndKey<'a> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.partial_cmp(other).unwrap() + } +} + +/// Get the storage URL string. +/// Once an error occurred, return the error message. +pub fn storage_url(s: &dyn ExternalStorage) -> String { + s.url() + .map(|v| v.to_string()) + .unwrap_or_else(|err| format!("")) +} diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index 991c925d1c4..c0cdf998d16 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -55,7 +55,7 @@ pub trait ExternalSstFileReader: std::io::Read + Send { } // compression type used for write sst file -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub enum SstCompressionType { Lz4, Snappy, diff --git a/components/tikv_util/src/codec/stream_event.rs b/components/tikv_util/src/codec/stream_event.rs index 3c1a04f77e3..7cc365e5124 100644 --- a/components/tikv_util/src/codec/stream_event.rs +++ b/components/tikv_util/src/codec/stream_event.rs @@ -57,6 +57,16 @@ impl EventIterator<'_> { } } + pub fn get_next(&mut self) -> Result> { + let it = self; + if !it.valid() { + return Ok(None); + } + it.next()?; + + Ok(Some((it.key(), it.value()))) + } + fn get_size(&mut self) -> u32 { let result = byteorder::LE::read_u32(&self.buf[self.offset..]); self.offset += 4; From b6252e9b66657f6767301ca183dbf7af9265a849 Mon Sep 17 00:00:00 2001 From: Bisheng Huang Date: Tue, 19 Nov 2024 16:41:26 +0800 Subject: [PATCH 24/86] raftstore: add metrics for snapshots that are dropped (#17837) close tikv/tikv#17836 This commit adds metrics to track Raft snapshots that are dropped during sending or receiving due to concurrency limits. These metrics help identify bottlenecks during scaling. Signed-off-by: Bisheng Huang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/metrics.rs | 2 ++ src/server/snap.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/server/metrics.rs b/src/server/metrics.rs index ae06155f4a1..bee26f28237 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -80,7 +80,9 @@ make_auto_flush_static_metric! { pub label_enum SnapTask { send, + send_dropped, recv, + recv_dropped, recv_v2, } diff --git a/src/server/snap.rs b/src/server/snap.rs index 0238bd2ef75..b8b1f5b97d7 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -516,6 +516,7 @@ impl Runnable for Runner { match task { Task::Recv { stream, sink } => { if let Some(status) = self.receiving_busy() { + SNAP_TASK_COUNTER_STATIC.recv_dropped.inc(); self.pool.spawn(sink.fail(status)); return; } @@ -583,6 +584,7 @@ impl Runnable for Runner { let region_id = msg.get_region_id(); if self.sending_count.load(Ordering::SeqCst) >= self.cfg.concurrent_send_snap_limit { + SNAP_TASK_COUNTER_STATIC.send_dropped.inc(); warn!( "too many sending snapshot tasks, drop Send Snap[to: {}, snap: {:?}]", addr, msg From 361a8ebfc67f92666384292a78c5832d073c4c2b Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 19 Nov 2024 18:03:32 +0800 Subject: [PATCH 25/86] raftstore: `campaign` newly created regions in time after `Split` (#17625) close tikv/tikv#12410 This pr make the `campaign` of the newly splitted regions triggered in time, when the leadership of the parent region is stable after `on_role_changed`. Signed-off-by: lucasliang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 54 ++++++- components/raftstore/src/store/msg.rs | 18 ++- components/raftstore/src/store/peer.rs | 45 +++++- components/snap_recovery/src/leader_keeper.rs | 8 +- tests/failpoints/cases/test_split_region.rs | 136 ++++++++++++++++++ .../raftstore/test_split_region.rs | 40 ++++++ 6 files changed, 288 insertions(+), 13 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 922818edac3..bbb3a2d2a70 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -88,7 +88,7 @@ use crate::{ local_metrics::{RaftMetrics, TimeTracker}, memory::*, metrics::*, - msg::{Callback, ExtCallback, InspectedRaftMessage}, + msg::{Callback, CampaignType, ExtCallback, InspectedRaftMessage}, peer::{ ConsistencyState, Peer, PersistSnapshotResult, StaleState, TRANSFER_LEADER_COMMAND_REPLY_CTX, @@ -1301,8 +1301,18 @@ where self.maybe_destroy(); } } - CasualMessage::Campaign => { - let _ = self.fsm.peer.raft_group.campaign(); + CasualMessage::Campaign(campaign_type) => { + match campaign_type { + CampaignType::ForceLeader => { + // Forcely campaign to be the leader of the region. + let _ = self.fsm.peer.raft_group.campaign(); + } + CampaignType::UnsafeSplitCampaign => { + // If the message is sent by the parent, it means that the parent is already + // the leader of the parent region. + let _ = self.fsm.peer.maybe_campaign(true); + } + } self.fsm.has_ready = true; } CasualMessage::InMemoryEngineLoadRegion { @@ -1899,7 +1909,9 @@ where // follower state let _ = self.ctx.router.send( self.region_id(), - PeerMsg::CasualMessage(Box::new(CasualMessage::Campaign)), + PeerMsg::CasualMessage(Box::new(CasualMessage::Campaign( + CampaignType::ForceLeader, + ))), ); } self.fsm.has_ready = true; @@ -2019,6 +2031,24 @@ where } } + #[inline] + /// Check whether the peer has any uncleared records in the + /// uncampaigned_new_regions list. + fn check_uncampaigned_regions(&mut self) { + fail_point!("on_skip_check_uncampaigned_regions", |_| {}); + let has_uncompaigned_regions = !self + .fsm + .peer + .uncampaigned_new_regions + .as_ref() + .map_or(false, |r| r.is_empty()); + // If the peer has any uncleared records in the uncampaigned_new_regions list, + // and there has valid leader in the region, it's safely to clear the records. + if has_uncompaigned_regions && self.fsm.peer.has_valid_leader() { + self.fsm.peer.uncampaigned_new_regions = None; + } + } + fn on_raft_log_fetched(&mut self, context: GetEntriesContext, res: Box) { let low = res.low; // If the peer is not the leader anymore and it's not in entry cache warmup @@ -2854,6 +2884,8 @@ where result?; + self.check_uncampaigned_regions(); + if self.fsm.peer.any_new_peer_catch_up(from_peer_id) { self.fsm.peer.heartbeat_pd(self.ctx); self.fsm.peer.should_wake_up = true; @@ -4455,7 +4487,7 @@ where ); self.fsm.peer.post_split(); - let is_leader = self.fsm.peer.is_leader(); + let (is_leader, is_follower) = (self.fsm.peer.is_leader(), self.fsm.peer.is_follower()); if is_leader { if share_source_region_size { self.fsm.peer.set_approximate_size(share_size); @@ -4621,6 +4653,18 @@ where .unwrap(); if !campaigned { + // The new peer has not campaigned yet, record it for later campaign. + if is_follower && self.fsm.peer.region().get_peers().len() > 1 { + if self.fsm.peer.uncampaigned_new_regions.is_none() { + self.fsm.peer.uncampaigned_new_regions = Some(vec![]); + } + self.fsm + .peer + .uncampaigned_new_regions + .as_mut() + .unwrap() + .push(new_region_id); + } if let Some(msg) = meta .pending_msgs .swap_remove_front(|m| m.get_to_peer() == &meta_peer) diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index ba3a30f02a8..4a2229949f2 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -554,6 +554,17 @@ where CheckPendingAdmin(UnboundedSender), } +/// Campaign type for triggering a Raft campaign. +#[derive(Debug, Clone, Copy)] +pub enum CampaignType { + /// Forcely campaign to be the leader. + ForceLeader, + /// Campaign triggered by the leader of a parent region. It's used to make + /// the new splitted peer campaign to get votes. + /// Only if the parent region has valid leader, will it be safe to do that. + UnsafeSplitCampaign, +} + /// Message that will be sent to a peer. /// /// These messages are not significant and can be dropped occasionally. @@ -648,7 +659,8 @@ pub enum CasualMessage { }, // Trigger raft to campaign which is used after exiting force leader - Campaign, + // or make new splitted peers campaign to get votes. + Campaign(CampaignType), // Trigger loading pending region for in_memory_engine, InMemoryEngineLoadRegion { region_id: u64, @@ -724,7 +736,9 @@ impl fmt::Debug for CasualMessage { "SnapshotApplied, peer_id={}, tombstone={}", peer_id, tombstone ), - CasualMessage::Campaign => write!(fmt, "Campaign"), + CasualMessage::Campaign(_) => { + write!(fmt, "Campaign") + } CasualMessage::InMemoryEngineLoadRegion { region_id, .. } => write!( fmt, "[region={}] try load in memory region cache", diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 208a5587b9d..f066c41d6c6 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -74,7 +74,10 @@ use super::{ cmd_resp, local_metrics::{IoType, RaftMetrics}, metrics::*, - peer_storage::{write_peer_state, CheckApplyingSnapStatus, HandleReadyResult, PeerStorage}, + peer_storage::{ + write_peer_state, CheckApplyingSnapStatus, HandleReadyResult, PeerStorage, + RAFT_INIT_LOG_TERM, + }, read_queue::{ReadIndexQueue, ReadIndexRequest}, transport::Transport, util::{ @@ -100,7 +103,7 @@ use crate::{ }, hibernate_state::GroupState, memory::{needs_evict_entry_cache, MEMTRACE_RAFT_ENTRIES}, - msg::{CasualMessage, ErrorCallback, RaftCommand}, + msg::{CampaignType, CasualMessage, ErrorCallback, RaftCommand}, peer_storage::HandleSnapshotResult, snapshot_backup::{AbortReason, SnapshotBrState}, txn_ext::LocksStatus, @@ -110,7 +113,7 @@ use crate::{ CleanupTask, CompactTask, HeartbeatTask, RaftlogGcTask, ReadDelegate, ReadExecutor, ReadProgress, RegionTask, SplitCheckTask, }, - Callback, Config, GlobalReplicationState, PdTask, ReadCallback, ReadIndexContext, + Callback, Config, GlobalReplicationState, PdTask, PeerMsg, ReadCallback, ReadIndexContext, ReadResponse, TxnExt, WriteCallback, RAFT_INIT_LOG_INDEX, }, Error, Result, @@ -925,6 +928,12 @@ where /// this peer has raft log gaps and whether should be marked busy on /// apply. pub last_leader_committed_idx: Option, + + /// Used to record uncampaigned regions, which are the new regions + /// created when a follower applies a split. If the follower becomes a + /// leader, a campaign is triggered for those regions. + /// Once the parent region has valid leader, this list will be cleared. + pub uncampaigned_new_regions: Option>, } impl Peer @@ -1076,6 +1085,7 @@ where snapshot_recovery_state: None, busy_on_apply: Some(false), last_leader_committed_idx: None, + uncampaigned_new_regions: None, }; // If this region has only one peer and I am the one, campaign directly. @@ -1680,6 +1690,11 @@ where self.raft_group.raft.state == StateRole::Leader } + #[inline] + pub fn is_follower(&self) -> bool { + self.raft_group.raft.state == StateRole::Follower && self.peer.role != PeerRole::Learner + } + #[inline] pub fn is_witness(&self) -> bool { self.peer.is_witness @@ -2322,6 +2337,22 @@ where "region_id" => self.region_id, ); } + // After the leadership changed, send `CasualMessage::Campaign + // {notify_by_parent: true}` to the target peer to campaign + // leader if there exists uncampaigned regions. It's used to + // ensure that a leader is elected promptly for the newly + // created Raft group, minimizing availability impact (e.g. + // #12410 and #17602.). + if let Some(new_regions) = self.uncampaigned_new_regions.take() { + for new_region in new_regions { + let _ = ctx.router.send( + new_region, + PeerMsg::CasualMessage(Box::new(CasualMessage::Campaign( + CampaignType::UnsafeSplitCampaign, + ))), + ); + } + } } StateRole::Follower => { self.leader_lease.expire(); @@ -2332,6 +2363,8 @@ where let _ = self.get_store().clear_data(); self.delay_clean_data = false; } + // Clear the uncampaigned list. + self.uncampaigned_new_regions = None; } _ => {} } @@ -3762,6 +3795,12 @@ where return false; } + // And only if the split region does not enter election state, will it be + // safe to campaign. + if self.term() > RAFT_INIT_LOG_TERM { + return false; + } + // If last peer is the leader of the region before split, it's intuitional for // it to become the leader of new split region. let _ = self.raft_group.campaign(); diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index 0115e8657c3..3c046f6d7dd 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -11,7 +11,9 @@ use engine_traits::KvEngine; use futures::compat::Future01CompatExt; use raftstore::{ errors::{Error, Result}, - store::{Callback, CasualMessage, CasualRouter, SignificantMsg, SignificantRouter}, + store::{ + msg::CampaignType, Callback, CasualMessage, CasualRouter, SignificantMsg, SignificantRouter, + }, }; use tikv_util::{future::paired_future_callback, timer::GLOBAL_TIMER_HANDLE}; @@ -125,7 +127,7 @@ where } fn force_leader(&self, region_id: u64) -> Result<()> { - let msg = CasualMessage::Campaign; + let msg = CasualMessage::Campaign(CampaignType::ForceLeader); self.router.send(region_id, msg)?; // We have nothing to do... Ok(()) @@ -192,7 +194,7 @@ mod test { msg: raftstore::store::CasualMessage, ) -> raftstore::errors::Result<()> { match msg { - raftstore::store::CasualMessage::Campaign => { + raftstore::store::CasualMessage::Campaign(_) => { if !self.regions.contains(®ion_id) { return Err(raftstore::Error::RegionNotFound(region_id)); } diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 435f7c057b5..e9b23df27f1 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1784,3 +1784,139 @@ fn test_turn_off_manual_compaction_caused_by_no_valid_split_key() { rx.recv_timeout(Duration::from_secs(1)).unwrap(); rx.try_recv().unwrap_err(); } + +/// Test that if the original leader of the parent region is tranfered to +/// another peer, the new leader of the parent region will notify the new split +/// region to campaign. +#[test_case(test_raftstore::new_node_cluster)] +fn test_region_split_after_parent_leader_transfer() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_election_timeout_ticks = 10; + + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k2", b"v2"); + cluster.must_put(b"k3", b"v3"); + // Transfer leader to peer 3. + let region = pd_client.get_region(b"k2").unwrap(); + cluster.must_transfer_leader(region.get_id(), new_peer(3, 3)); + + // Setting: only peers on store 2 can become leader. + for id in 1..=3 { + if id == 2 { + continue; + } + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(region.get_id(), id) + .msg_type(MessageType::MsgRequestPreVote) + .direction(Direction::Send), + )); + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1000, id) + .msg_type(MessageType::MsgRequestPreVote) + .direction(Direction::Send), + )); + } + + // Split region to peer 1 & 2, not allow peer 3 (leader) to split. + let no_split_on_store_3 = "on_split"; + fail::cfg(no_split_on_store_3, "pause").unwrap(); + cluster.split_region( + ®ion, + b"k2", + Callback::write(Box::new(move |_write_resp: WriteResponse| {})), + ); + // Wait the old lease of the leader timeout and peer 2 gets votes + // to become the new leader. + thread::sleep( + cluster.cfg.raft_store.raft_base_tick_interval.0 + * cluster.cfg.raft_store.raft_election_timeout_ticks as u32 + * 2, + ); + // As the split is paused, the leader of the parent region should + // be peer 2, not peer 3. And peer 2 will notify the new split region + // `campaign` to become leader. + cluster.reset_leader_of_region(region.get_id()); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap(), + new_peer(2, 2) + ); + // The leader of the new split region should be peer 1002. + let new_region = pd_client.get_region(b"k1").unwrap(); + assert_eq!( + cluster.leader_of_region(new_region.get_id()).unwrap(), + new_peer(2, 1002) + ); + fail::remove(no_split_on_store_3); +} + +/// Test that the leader of the new split region will not be changed after +/// the leader of the parent region is transferred. +#[test_case(test_raftstore::new_node_cluster)] +fn test_region_split_after_new_leader_elected() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_election_timeout_ticks = 10; + + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k2", b"v2"); + cluster.must_put(b"k3", b"v3"); + // Transfer leader to peer 3. + let region = pd_client.get_region(b"k2").unwrap(); + cluster.must_transfer_leader(region.get_id(), new_peer(3, 3)); + + // Setting: only peers on store 2 can become leader. + for id in 1..=3 { + if id == 2 { + continue; + } + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(region.get_id(), id) + .msg_type(MessageType::MsgRequestPreVote) + .direction(Direction::Send), + )); + } + + // Split region to peer 1 & 2, not allow peer 3 (leader) to split. + let skip_clear_uncampaign = "on_skip_check_uncampaigned_regions"; + fail::cfg(skip_clear_uncampaign, "return").unwrap(); + let no_split_on_store_3 = "on_split"; + fail::cfg(no_split_on_store_3, "pause").unwrap(); + cluster.split_region( + ®ion, + b"k2", + Callback::write(Box::new(move |_write_resp: WriteResponse| {})), + ); + // Wait the leader of the new split region has been elected. + thread::sleep( + cluster.cfg.raft_store.raft_base_tick_interval.0 + * cluster.cfg.raft_store.raft_election_timeout_ticks as u32 + * 2, + ); + cluster.reset_leader_of_region(region.get_id()); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap(), + new_peer(2, 2) + ); + // The leader of the new split region should be elected. + let new_region = pd_client.get_region(b"k1").unwrap(); + let new_region_leader = cluster.leader_of_region(new_region.get_id()).unwrap(); + // The new leader will notify the new split region `campaign` to become + // leader, but the leader of the new split region is already elected. + fail::remove(no_split_on_store_3); + // The leader of the new split region should not changed. + cluster.reset_leader_of_region(new_region.get_id()); + assert_eq!( + cluster.leader_of_region(new_region.get_id()).unwrap(), + new_region_leader + ); + fail::remove(skip_clear_uncampaign); +} diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 45c7fd1aab4..cd2436512df 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -1501,3 +1501,43 @@ fn test_node_split_during_read_index() { } } } + +#[test_case(test_raftstore::new_node_cluster)] +fn test_clear_uncampaigned_regions_after_split() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_election_timeout_ticks = 10; + + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k2", b"v2"); + cluster.must_put(b"k3", b"v3"); + // Transfer leader to peer 3. + let region = pd_client.get_region(b"k2").unwrap(); + cluster.must_transfer_leader(region.get_id(), new_peer(3, 3)); + + // New split regions will be recorded into uncampaigned region list of + // followers (in peer 1 and peer 2). + cluster.split_region( + ®ion, + b"k2", + Callback::write(Box::new(move |_write_resp: WriteResponse| {})), + ); + // Wait the old lease of the leader timeout and followers clear its + // uncampaigned region list. + thread::sleep( + cluster.cfg.raft_store.raft_base_tick_interval.0 + * cluster.cfg.raft_store.raft_election_timeout_ticks as u32 + * 3, + ); + // The leader of the parent region should still be peer 3 as no + // other peers can become leader. + cluster.reset_leader_of_region(region.get_id()); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap(), + new_peer(3, 3) + ); +} From 1174d1fc5c6dee0824321c3e589b7f0bdbe3cb92 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 19 Nov 2024 18:37:20 +0800 Subject: [PATCH 26/86] raftstore: skip handle remaining messages if peer is destroyed (#17841) close tikv/tikv#17840 Skip handling remain raft messages after peer fsm is stopped. This can avoid potential panic if the raft message need to read raft log from raft engine. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 10 ++++- tests/failpoints/cases/test_merge.rs | 48 +++++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index bbb3a2d2a70..4d5a9a36b2d 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -647,6 +647,12 @@ where }; for m in msgs.drain(..) { + // skip handling remain messages if fsm is destroyed. This can aviod handling + // arbitary messages(e.g. CasualMessage::ForceCompactRaftLogs) that may need + // to read raft logs which maybe lead to panic. + if self.fsm.stopped { + break; + } distribution[m.discriminant()] += 1; match m { PeerMsg::RaftMessage(msg, sent_time) => { @@ -3972,6 +3978,7 @@ where ) .flush() .when_done(move || { + fail_point!("destroy_region_before_gc_flush"); if let Err(e) = mb.force_send(PeerMsg::SignificantMsg(Box::new( SignificantMsg::RaftLogGcFlushed, ))) { @@ -3983,6 +3990,7 @@ where region_id, peer_id, e ); } + fail_point!("destroy_region_after_gc_flush"); }); if let Err(e) = self.ctx.raftlog_gc_scheduler.schedule(task) { if tikv_util::thread_group::is_shutdown(!cfg!(test)) { @@ -6015,7 +6023,7 @@ where } fail_point!("on_raft_log_gc_tick_1", self.fsm.peer_id() == 1, |_| {}); fail_point!("on_raft_gc_log_tick", |_| {}); - debug_assert!(!self.fsm.stopped); + assert!(!self.fsm.stopped); // As leader, we would not keep caches for the peers that didn't response // heartbeat in the last few seconds. That happens probably because diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index a9da6b1a233..502fce4d727 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -19,7 +19,7 @@ use kvproto::{ }; use pd_client::PdClient; use raft::eraftpb::MessageType; -use raftstore::store::*; +use raftstore::{router::RaftStoreRouter, store::*}; use raftstore_v2::router::{PeerMsg, PeerTick}; use test_raftstore::*; use test_raftstore_macro::test_case; @@ -2269,3 +2269,49 @@ fn test_destroy_race_during_atomic_snapshot_after_merge() { cluster.must_transfer_leader(right.get_id(), new_peer(3, new_peer_id)); cluster.must_put(b"k4", b"v4"); } + +// `test_raft_log_gc_after_merge` tests when a region is destoryed, e.g. due to +// region merge, PeerFsm can still handle pending raft messages correctly. +#[test] +fn test_raft_log_gc_after_merge() { + let mut cluster = new_node_cluster(0, 1); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + cluster.run(); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + fail::cfg_callback("destroy_region_before_gc_flush", move || { + fail::cfg("pause_on_peer_collect_message", "pause").unwrap(); + }) + .unwrap(); + + let (tx, rx) = channel(); + fail::cfg_callback("destroy_region_after_gc_flush", move || { + tx.send(()).unwrap(); + }) + .unwrap(); + + // the right peer's id is 1. + pd_client.must_merge(right.get_id(), left.get_id()); + rx.recv_timeout(Duration::from_secs(1)).unwrap(); + + let raft_router = cluster.get_router(1).unwrap(); + raft_router + .send_casual_msg(1, CasualMessage::ForceCompactRaftLogs) + .unwrap(); + + fail::remove("pause_on_peer_collect_message"); + + // wait some time for merge finish. + std::thread::sleep(Duration::from_secs(1)); + must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); +} From c2389be8c11efa0410a0c8fcfc420b950ee74940 Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Tue, 19 Nov 2024 19:50:26 +0800 Subject: [PATCH 27/86] expr: fix panic when using radians and degree (#17853) close tikv/tikv#17852 expr: fix panic when using radians and degree Signed-off-by: gengliqi --- components/tidb_query_expr/src/impl_math.rs | 38 +++++++++++++++------ 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index bdd75be7f23..41d8a625816 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -280,7 +280,7 @@ fn sqrt(arg: &Real) -> Result> { #[inline] #[rpn_fn] fn radians(arg: &Real) -> Result> { - Ok(Real::new(**arg * std::f64::consts::PI / 180_f64).ok()) + Ok(Real::new(**arg * (std::f64::consts::PI / 180_f64)).ok()) } #[inline] @@ -353,7 +353,12 @@ fn rand_with_seed_first_gen(seed: Option<&i64>) -> Result> { #[inline] #[rpn_fn] fn degrees(arg: &Real) -> Result> { - Ok(Real::new(arg.to_degrees()).ok()) + let ret = arg.to_degrees(); + if ret.is_infinite() { + Err(Error::overflow("DOUBLE", format!("degrees({})", arg)).into()) + } else { + Ok(Real::new(ret).ok()) + } } #[inline] @@ -1182,6 +1187,10 @@ mod tests { ), (Some(f64::NAN), None), (Some(f64::INFINITY), Some(Real::new(f64::INFINITY).unwrap())), + ( + Some(1.0E308), + Some(Real::new(1.0E308 * (std::f64::consts::PI / 180_f64)).unwrap()), + ), ]; for (input, expect) in test_cases { let output = RpnFnScalarEvaluator::new() @@ -1221,25 +1230,34 @@ mod tests { #[test] fn test_degrees() { let tests_cases = vec![ - (None, None), - (Some(f64::NAN), None), - (Some(0f64), Some(Real::new(0f64).unwrap())), - (Some(1f64), Some(Real::new(57.29577951308232_f64).unwrap())), + (None, None, false), + (Some(f64::NAN), None, false), + (Some(0f64), Some(Real::new(0f64).unwrap()), false), + ( + Some(1f64), + Some(Real::new(57.29577951308232_f64).unwrap()), + false, + ), ( Some(std::f64::consts::PI), Some(Real::new(180.0_f64).unwrap()), + false, ), ( Some(-std::f64::consts::PI / 2.0_f64), Some(Real::new(-90.0_f64).unwrap()), + false, ), + (Some(1.0E307), None, true), ]; - for (input, expect) in tests_cases { + for (input, expect, is_err) in tests_cases { let output = RpnFnScalarEvaluator::new() .push_param(input) - .evaluate(ScalarFuncSig::Degrees) - .unwrap(); - assert_eq!(expect, output, "{:?}", input); + .evaluate(ScalarFuncSig::Degrees); + assert_eq!(is_err, output.is_err()); + if let Ok(out) = output { + assert_eq!(expect, out, "{:?}", input); + } } } From 4776689cbddb9b470d9ad9d3d75bbbc6f3f4d087 Mon Sep 17 00:00:00 2001 From: Zack Zhao <57036248+joccau@users.noreply.github.com> Date: Tue, 19 Nov 2024 21:54:50 +0800 Subject: [PATCH 28/86] import: call sink.fail() when failed to send message by grpc (#17834) close tikv/tikv#17830 Signed-off-by: joccau Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- src/import/ingest.rs | 5 ++ src/import/sst_service.rs | 13 ++- tests/failpoints/cases/test_import_service.rs | 90 ++++++++++++++++++- 4 files changed, 105 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba111ddaf6a..cd7be165f66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1502,7 +1502,7 @@ dependencies = [ "file_system", "futures 0.3.15", "futures-io", - "hex 0.4.2", + "hex 0.4.3", "keys", "kvproto", "lazy_static", diff --git a/src/import/ingest.rs b/src/import/ingest.rs index b46ff25fb4b..7ce8d6ca0d1 100644 --- a/src/import/ingest.rs +++ b/src/import/ingest.rs @@ -162,6 +162,11 @@ pub(super) fn async_snapshot( ..Default::default() }); async move { + fail::fail_point!("failed_to_async_snapshot", |_| { + let mut e = errorpb::Error::default(); + e.set_message("faild to get snapshot".to_string()); + Err(e) + }); res.await.map_err(|e| { let err: storage::Error = e.into(); if let Some(e) = extract_region_error_from_error(&err) { diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 73935013897..20fbb09294e 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -11,7 +11,8 @@ use engine_traits::{CompactExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; use grpcio::{ - ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, + ClientStreamingSink, RequestStream, RpcContext, RpcStatus, RpcStatusCode, ServerStreamingSink, + UnarySink, WriteFlags, }; use kvproto::{ encryptionpb::EncryptionMethod, @@ -1145,15 +1146,18 @@ impl ImportSst for ImportSstService { IMPORT_RPC_DURATION .with_label_values(&[label, "ok"]) .observe(timer.saturating_elapsed_secs()); + let _ = sink.close().await; } Err(e) => { warn!( "connection send message fail"; "err" => %e ); + let status = + RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); + let _ = sink.fail(status).await; } } - let _ = sink.close().await; return; } }; @@ -1169,7 +1173,10 @@ impl ImportSst for ImportSstService { "connection send message fail"; "err" => %e ); - break; + let status = + RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); + let _ = sink.fail(status).await; + return; } } let _ = sink.close().await; diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 9eaa876b09f..068b0eafcef 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -5,7 +5,7 @@ use std::{ time::Duration, }; -use futures::executor::block_on; +use futures::{executor::block_on, stream::StreamExt}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{disk_usage::DiskUsage, import_sstpb::*, tikvpb_grpc::TikvClient}; use tempfile::{Builder, TempDir}; @@ -483,3 +483,91 @@ fn test_flushed_applied_index_after_ingset() { fail::remove("on_apply_ingest"); fail::remove("on_flush_completed"); } + +#[test] +fn test_duplicate_detect_with_client_stop() { + let (_cluster, ctx, _, import) = new_cluster_and_tikv_import_client(); + let mut req = SwitchModeRequest::default(); + req.set_mode(SwitchMode::Import); + import.switch_mode(&req).unwrap(); + + let data_count: u64 = 4096; + for commit_ts in 0..4 { + let mut meta = new_sst_meta(0, 0); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + + let mut keys = vec![]; + let mut values = vec![]; + for i in 1000..data_count { + let key = i.to_string(); + keys.push(key.as_bytes().to_vec()); + values.push(key.as_bytes().to_vec()); + } + let resp = send_write_sst(&import, &meta, keys, values, commit_ts).unwrap(); + for m in resp.metas.into_iter() { + must_ingest_sst(&import, ctx.clone(), m.clone()); + } + } + + let mut duplicate = DuplicateDetectRequest::default(); + duplicate.set_context(ctx); + duplicate.set_start_key((0_u64).to_string().as_bytes().to_vec()); + + // failed to get snapshot. and stream is normal, it will get response with err. + fail::cfg("failed_to_async_snapshot", "return()").unwrap(); + let mut stream = import.duplicate_detect(&duplicate).unwrap(); + let resp = block_on(async move { + let resp: DuplicateDetectResponse = stream.next().await.unwrap().unwrap(); + resp + }); + assert_eq!( + resp.get_region_error().get_message(), + "faild to get snapshot" + ); + + // failed to get snapshot, and stream stops. + // A stopeed remote don't cause panic in server. + let stream = import.duplicate_detect(&duplicate).unwrap(); + drop(stream); + + // drop stream after received part of response. + // A stopped remote must not cause panic at server. + fail::remove("failed_to_async_snapshot"); + let mut stream = import.duplicate_detect(&duplicate).unwrap(); + let ret: Vec = block_on(async move { + let mut resp: DuplicateDetectResponse = stream.next().await.unwrap().unwrap(); + let pairs = resp.take_pairs(); + // drop stream, Do not cause panic at server. + drop(stream); + pairs.into() + }); + + assert_eq!(ret.len(), 4096); + + // call duplicate_detect() successfully. + let mut stream = import.duplicate_detect(&duplicate).unwrap(); + let ret = block_on(async move { + let mut ret: Vec = vec![]; + while let Some(resp) = stream.next().await { + match resp { + Ok(mut resp) => { + if resp.has_key_error() || resp.has_region_error() { + break; + } + let pairs = resp.take_pairs(); + ret.append(&mut pairs.into()); + } + Err(e) => { + println!("receive error: {:?}", e); + break; + } + } + } + + ret + }); + assert_eq!(ret.len(), (data_count - 1000) as usize * 4); + req.set_mode(SwitchMode::Normal); + import.switch_mode(&req).unwrap(); +} From 78ddd95648935f3d9af39faeacdc30a3834204c1 Mon Sep 17 00:00:00 2001 From: hhwyt Date: Wed, 20 Nov 2024 11:08:47 +0800 Subject: [PATCH 29/86] raftstore: Increase the default raft_client_queue_size and raft_msg_max_batch_size. (#17821) close tikv/tikv#17101 Increase the default raft_client_queue_size and raft_msg_max_batch_size. This PR addresses an issue where too many Raft messages can delay sending, increasing the commit log duration and the heartbeat latency. The delayed heartbeats can lead to leader drops, especially during PD restarts that trigger a surge of hibernated regions. About this scenario, see more details at: https://github.com/tikv/tikv/issues/17101. We increased the raft_client_queue_size to prevent Raft messages from being dropped when the RaftClient queue becomes full under too many message workloads. Additionally, we increased the raft_msg_max_batch_size to improve the efficiency of Raft message sending. Signed-off-by: hhwyt Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/config.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/server/config.rs b/src/server/config.rs index 0e54626f63d..feba21a09f2 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -256,10 +256,13 @@ impl Default for Config { status_thread_pool_size: 1, max_grpc_send_msg_len: DEFAULT_MAX_GRPC_SEND_MSG_LEN, raft_client_grpc_send_msg_buffer: 512 * 1024, - raft_client_queue_size: 8192, + // As of https://github.com/tikv/tikv/pull/17821, the raft_client_queue_size has been + // increased from 8192 to 16384 to reduce the message delays under too many messages + // load. Additionally, the raft_msg_max_batch_size has also been increased. + raft_client_queue_size: 16384, raft_client_max_backoff: ReadableDuration::secs(5), raft_client_initial_reconnect_backoff: ReadableDuration::secs(1), - raft_msg_max_batch_size: 128, + raft_msg_max_batch_size: 256, grpc_compression_type: GrpcCompressionType::None, grpc_gzip_compression_level: DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL, grpc_min_message_size_to_compress: DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS, From 3bd8c24d1a1eb277877557b35d3b175330e7010e Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:54:14 +0800 Subject: [PATCH 30/86] lock_manager: Skip updating lock wait info for non-fair-locking requests (#17500) (#17870) close tikv/tikv#17394 lock_manager: Skip updating lock wait info for non-fair-locking requests This is a simpler and lower-risky fix of the OOM issue #17394 for released branches, as an alternative solution to #17451 . In this way, for acquire_pessimistic_lock requests without enabling fair locking, the behavior of update_wait_for will be a noop. So that if fair locking is globally disabled, the behavior will be equivalent to versions before 7.0. Signed-off-by: MyonKeminta --- src/server/lock_manager/waiter_manager.rs | 7 ++++++- src/storage/lock_manager/lock_waiting_queue.rs | 1 + src/storage/lock_manager/mod.rs | 1 + src/storage/txn/scheduler.rs | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index c0e97e25e3a..f1b3c9b3dbe 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -554,7 +554,9 @@ impl WaiterManager { continue; } - if let Some((previous_wait_info, diag_ctx)) = previous_wait_info { + if let Some((previous_wait_info, diag_ctx)) = previous_wait_info + && previous_wait_info.allow_lock_with_conflict + { self.detector_scheduler .clean_up_wait_for(event.start_ts, previous_wait_info); self.detector_scheduler @@ -678,6 +680,7 @@ pub mod tests { key: Key::from_raw(b""), lock_digest: LockDigest { ts: lock_ts, hash }, lock_info: Default::default(), + allow_lock_with_conflict: false, }, cancel_callback: Box::new(|_| ()), diag_ctx: DiagnosticContext::default(), @@ -798,6 +801,7 @@ pub mod tests { key: Key::from_raw(&raw_key), lock_digest: lock, lock_info: info.clone(), + allow_lock_with_conflict: false, }, cb, Instant::now() + Duration::from_millis(3000), @@ -1202,6 +1206,7 @@ pub mod tests { key: key.to_raw().unwrap(), ..Default::default() }, + allow_lock_with_conflict: false, }, }; scheduler.update_wait_for(vec![event]); diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index a81248fe9e2..dbe71f6267b 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -618,6 +618,7 @@ impl LockWaitQueues { hash: entry.lock_hash, }, lock_info: key_state.current_lock.clone(), + allow_lock_with_conflict: entry.parameters.allow_lock_with_conflict, }, }; update_wait_for_events.push(event); diff --git a/src/storage/lock_manager/mod.rs b/src/storage/lock_manager/mod.rs index 5c103f40f82..4c2b4b0ccca 100644 --- a/src/storage/lock_manager/mod.rs +++ b/src/storage/lock_manager/mod.rs @@ -97,6 +97,7 @@ pub struct KeyLockWaitInfo { pub key: Key, pub lock_digest: LockDigest, pub lock_info: LockInfo, + pub allow_lock_with_conflict: bool, } /// Uniquely identifies a lock-waiting request in a `LockManager`. diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 3cea323ed08..955e8395965 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -989,6 +989,7 @@ impl TxnScheduler { let start_ts = lock_info.parameters.start_ts; let is_first_lock = lock_info.parameters.is_first_lock; let wait_timeout = lock_info.parameters.wait_timeout; + let allow_lock_with_conflict = lock_info.parameters.allow_lock_with_conflict; let diag_ctx = DiagnosticContext { key: lock_info.key.to_raw().unwrap(), @@ -1016,6 +1017,7 @@ impl TxnScheduler { key, lock_digest, lock_info: lock_info_pb, + allow_lock_with_conflict, }; self.inner.lock_mgr.wait_for( wait_token, From ffd7e34ee74af240348d9f05d5c03158ff2a48f3 Mon Sep 17 00:00:00 2001 From: hhwyt Date: Fri, 22 Nov 2024 12:09:53 +0800 Subject: [PATCH 31/86] Add metrics for raft message send wait & receive delay duration (#17735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#17683 Add metrics for raft message send wait & receive delay duration ## Send wait duration This phase begins when the RaftStore thread sends the RaftMessage to the RaftClient and ends when the RaftMessage leaves the BatchRaftMessage buffer, just before being flushed to the gRPC client. Since this phase occurs entirely within the same node, we measure it using monotonic time. ## Receive delay duration Receive delay duration can also be called send duration. The name 'Receive delay duration' is used because this duration is reported by the receiver, making it more clearer. This phase begins after the send wait ends and continues as the message is sent over the network, ends when the target peer receives it. Since this phase spans across nodes, we measure it using physical time. To facilitate this, we’ve introduced a last_observed_time field in the BatchRaftMessage to record the physical times (please also review the related PR: https://github.com/pingcap/kvproto/pull/1276). Although physical clock drift between nodes is a possibility, its impact on our measurement is limited. Our primary goal is to observe trends in duration changes rather than relying on absolute precision. NOTE: Metrics are only added for the batch_raft RPC, as the raft RPC is deprecated and no longer tracked. Signed-off-by: hhwyt --- Cargo.lock | 2 +- components/tikv_util/src/time.rs | 50 +- metrics/grafana/tikv_details.dashboard.py | 24 + metrics/grafana/tikv_details.json | 3176 ++++++++++++--------- metrics/grafana/tikv_details.json.sha256 | 2 +- src/server/metrics.rs | 27 + src/server/raft_client.rs | 92 +- src/server/service/kv.rs | 12 +- 8 files changed, 2042 insertions(+), 1343 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cd7be165f66..49f4541c393 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3485,7 +3485,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#4a3e17f5e62dc3999e2c0f63293fdeffced80626" +source = "git+https://github.com/pingcap/kvproto.git#17bdaaa37b9fbc3231cf90a3dec9ecbfa3f27e4e" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index efab331e9f1..af541d385d6 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -17,19 +17,34 @@ use std::{ use async_speed_limit::clock::{BlockingClock, Clock, StandardClock}; use time::{Duration as TimeDuration, Timespec}; +/// Returns the monotonic raw time since some unspecified starting point. +pub use self::inner::monotonic_raw_now; +pub use self::inner::{monotonic_coarse_now, monotonic_now}; +use crate::sys::thread::StdThreadBuildWrapper; + +const NANOSECONDS_PER_SECOND: u64 = 1_000_000_000; +const MILLISECONDS_PER_SECOND: u64 = 1_000; +const MICROSECONDS_PER_SECOND: u64 = 1_000_000; +const NANOSECONDS_PER_MILLISECOND: u64 = 1_000_000; +const NANOSECONDS_PER_MICROSECOND: u64 = 1_000; + /// Converts Duration to milliseconds. #[inline] pub fn duration_to_ms(d: Duration) -> u64 { let nanos = u64::from(d.subsec_nanos()); // If Duration is too large, the result may be overflow. - d.as_secs() * 1_000 + (nanos / 1_000_000) + d.as_secs() * MILLISECONDS_PER_SECOND + (nanos / NANOSECONDS_PER_MILLISECOND) } /// Converts Duration to seconds. #[inline] pub fn duration_to_sec(d: Duration) -> f64 { let nanos = f64::from(d.subsec_nanos()); - d.as_secs() as f64 + (nanos / 1_000_000_000.0) + d.as_secs() as f64 + (nanos / NANOSECONDS_PER_SECOND as f64) +} + +pub fn nanos_to_secs(nanos: u64) -> f64 { + nanos as f64 / NANOSECONDS_PER_SECOND as f64 } /// Converts Duration to microseconds. @@ -37,7 +52,7 @@ pub fn duration_to_sec(d: Duration) -> f64 { pub fn duration_to_us(d: Duration) -> u64 { let nanos = u64::from(d.subsec_nanos()); // If Duration is too large, the result may be overflow. - d.as_secs() * 1_000_000 + (nanos / 1_000) + d.as_secs() * MICROSECONDS_PER_SECOND + (nanos / NANOSECONDS_PER_MICROSECOND) } /// Converts TimeSpec to nanoseconds @@ -51,7 +66,7 @@ pub fn timespec_to_ns(t: Timespec) -> u64 { pub fn duration_to_ns(d: Duration) -> u64 { let nanos = u64::from(d.subsec_nanos()); // If Duration is too large, the result may be overflow. - d.as_secs() * 1_000_000_000 + nanos + d.as_secs() * NANOSECONDS_PER_SECOND + nanos } pub trait InstantExt { @@ -203,15 +218,6 @@ impl Drop for Monitor { } } -/// Returns the monotonic raw time since some unspecified starting point. -pub use self::inner::monotonic_raw_now; -pub use self::inner::{monotonic_coarse_now, monotonic_now}; -use crate::sys::thread::StdThreadBuildWrapper; - -const NANOSECONDS_PER_SECOND: u64 = 1_000_000_000; -const MILLISECOND_PER_SECOND: i64 = 1_000; -const NANOSECONDS_PER_MILLISECOND: i64 = 1_000_000; - #[cfg(not(target_os = "linux"))] mod inner { use time::{self, Timespec}; @@ -390,10 +396,10 @@ impl Instant { later: Timespec, earlier: Timespec, ) -> Duration { - let later_ms = later.sec * MILLISECOND_PER_SECOND - + i64::from(later.nsec) / NANOSECONDS_PER_MILLISECOND; - let earlier_ms = earlier.sec * MILLISECOND_PER_SECOND - + i64::from(earlier.nsec) / NANOSECONDS_PER_MILLISECOND; + let later_ms = later.sec * MILLISECONDS_PER_SECOND as i64 + + i64::from(later.nsec) / NANOSECONDS_PER_MILLISECOND as i64; + let earlier_ms = earlier.sec * MILLISECONDS_PER_SECOND as i64 + + i64::from(earlier.nsec) / NANOSECONDS_PER_MILLISECOND as i64; let dur = later_ms - earlier_ms; if dur >= 0 { Duration::from_millis(dur as u64) @@ -638,6 +644,16 @@ mod tests { } } + #[test] + fn test_nanos_to_secs() { + assert_eq!(nanos_to_secs(0), 0.0); + assert_eq!(nanos_to_secs(1), 1e-9); + assert_eq!(nanos_to_secs(NANOSECONDS_PER_SECOND), 1.0); + assert_eq!(nanos_to_secs(1_500_000_000), 1.5); + // Test with a large number of nanoseconds (e.g., 10 billion ns = 10 seconds) + assert_eq!(nanos_to_secs(10 * NANOSECONDS_PER_SECOND), 10.0); + } + #[test] fn test_now() { let pairs = vec![ diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index ba90aed4a53..0a9b619cd4b 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -2547,6 +2547,30 @@ def RaftMessage() -> RowPanel: ), ] ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Raft Message Send Wait duration", + heatmap_description="The time consumed waiting to send Raft Messages", + graph_title="99% Raft Message Send Wait Duration", + graph_description="The time consumed waiting to send Raft Messages per TiKV instance", + graph_by_labels=["instance"], + yaxis_format=UNITS.SECONDS, + metric="tikv_server_raft_message_duration_seconds", + label_selectors=['type="send_wait"'], + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Raft Message Receive Delay duration", + heatmap_description="The time consumed to transmit Raft Messages over the network, reported by the receiver", + graph_title="99% Raft Message Receive Delay Duration", + graph_description="The time consumed to transmit Raft Messages over the network per TiKV instance, reported by the receiver", + graph_by_labels=["instance"], + yaxis_format=UNITS.SECONDS, + metric="tikv_server_raft_message_duration_seconds", + label_selectors=['type="receive_delay"'], + ) + ) return layout.row_panel diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index f120345c391..4d310b46d86 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -23778,6 +23778,618 @@ "align": false, "alignLevel": 0 } + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed waiting to send Raft Messages", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 173, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft Message Send Wait duration", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed waiting to send Raft Messages per TiKV instance", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "height": null, + "hideTimeOverride": false, + "id": 174, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_raft_message_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) / sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "(sum(rate(\n tikv_server_raft_message_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) / sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"send_wait\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "99% Raft Message Send Wait Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed to transmit Raft Messages over the network, reported by the receiver", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 175, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft Message Receive Delay duration", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed to transmit Raft Messages over the network per TiKV instance, reported by the receiver", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "height": null, + "hideTimeOverride": false, + "id": 176, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, le, $additional_groupby) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_raft_message_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) / sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "(sum(rate(\n tikv_server_raft_message_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) / sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{instance}} {{$additional_groupby}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"receive_delay\"}\n [$__rate_interval]\n)) by (instance, $additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "99% Raft Message Receive Delay Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } } ], "repeat": null, @@ -23814,7 +24426,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 173, + "id": 177, "interval": null, "links": [], "maxDataPoints": 100, @@ -23853,7 +24465,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 174, + "id": 178, "interval": null, "isNew": true, "legend": { @@ -23986,7 +24598,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 175, + "id": 179, "interval": null, "isNew": true, "legend": { @@ -24119,7 +24731,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 176, + "id": 180, "interval": null, "isNew": true, "legend": { @@ -24252,7 +24864,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 177, + "id": 181, "interval": null, "isNew": true, "legend": { @@ -24385,7 +24997,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 178, + "id": 182, "interval": null, "isNew": true, "legend": { @@ -24518,7 +25130,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 179, + "id": 183, "interval": null, "isNew": true, "legend": { @@ -24681,7 +25293,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 180, + "id": 184, "interval": null, "isNew": true, "legend": { @@ -24817,7 +25429,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 181, + "id": 185, "interval": null, "links": [], "maxDataPoints": 100, @@ -24856,7 +25468,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 182, + "id": 186, "interval": null, "isNew": true, "legend": { @@ -25004,7 +25616,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 183, + "id": 187, "interval": null, "isNew": true, "legend": { @@ -25152,7 +25764,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 184, + "id": 188, "interval": null, "isNew": true, "legend": { @@ -25285,7 +25897,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 185, + "id": 189, "interval": null, "isNew": true, "legend": { @@ -25418,7 +26030,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 186, + "id": 190, "interval": null, "isNew": true, "legend": { @@ -25551,7 +26163,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 187, + "id": 191, "interval": null, "isNew": true, "legend": { @@ -25684,7 +26296,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 188, + "id": 192, "interval": null, "isNew": true, "legend": { @@ -25817,7 +26429,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 189, + "id": 193, "interval": null, "isNew": true, "legend": { @@ -25950,7 +26562,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 190, + "id": 194, "interval": null, "isNew": true, "legend": { @@ -26127,7 +26739,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 191, + "id": 195, "interval": null, "links": [], "maxDataPoints": 100, @@ -26166,7 +26778,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 192, + "id": 196, "interval": null, "isNew": true, "legend": { @@ -26329,7 +26941,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 193, + "id": 197, "interval": null, "isNew": true, "legend": { @@ -26530,7 +27142,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 194, + "id": 198, "interval": null, "isNew": true, "legend": { @@ -26678,7 +27290,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 195, + "id": 199, "interval": null, "isNew": true, "legend": { @@ -26841,7 +27453,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 196, + "id": 200, "interval": null, "isNew": true, "legend": { @@ -27042,7 +27654,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 197, + "id": 201, "interval": null, "isNew": true, "legend": { @@ -27220,7 +27832,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 198, + "id": 202, "interval": null, "isNew": true, "legend": { @@ -27383,7 +27995,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 199, + "id": 203, "interval": null, "isNew": true, "legend": { @@ -27546,7 +28158,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 200, + "id": 204, "interval": null, "isNew": true, "legend": { @@ -27679,7 +28291,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 201, + "id": 205, "interval": null, "isNew": true, "legend": { @@ -27883,7 +28495,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 202, + "id": 206, "interval": null, "links": [], "maxDataPoints": 100, @@ -27922,7 +28534,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 203, + "id": 207, "interval": null, "isNew": true, "legend": { @@ -28115,7 +28727,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 204, + "id": 208, "interval": null, "isNew": true, "legend": { @@ -28293,7 +28905,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 205, + "id": 209, "interval": null, "isNew": true, "legend": { @@ -28501,7 +29113,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 206, + "id": 210, "interval": null, "isNew": true, "legend": { @@ -28679,7 +29291,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 207, + "id": 211, "interval": null, "isNew": true, "legend": { @@ -28842,7 +29454,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 208, + "id": 212, "interval": null, "isNew": true, "legend": { @@ -29020,7 +29632,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 209, + "id": 213, "interval": null, "isNew": true, "legend": { @@ -29153,7 +29765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 210, + "id": 214, "interval": null, "isNew": true, "legend": { @@ -29331,7 +29943,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 211, + "id": 215, "interval": null, "isNew": true, "legend": { @@ -29464,7 +30076,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 212, + "id": 216, "interval": null, "isNew": true, "legend": { @@ -29642,7 +30254,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 213, + "id": 217, "interval": null, "isNew": true, "legend": { @@ -29775,7 +30387,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 214, + "id": 218, "interval": null, "isNew": true, "legend": { @@ -29953,7 +30565,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 215, + "id": 219, "interval": null, "isNew": true, "legend": { @@ -30131,7 +30743,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 216, + "id": 220, "interval": null, "isNew": true, "legend": { @@ -30309,7 +30921,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 217, + "id": 221, "interval": null, "isNew": true, "legend": { @@ -30442,7 +31054,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 218, + "id": 222, "interval": null, "isNew": true, "legend": { @@ -30575,7 +31187,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 219, + "id": 223, "interval": null, "isNew": true, "legend": { @@ -30708,7 +31320,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 220, + "id": 224, "interval": null, "isNew": true, "legend": { @@ -30931,7 +31543,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 221, + "id": 225, "interval": null, "isNew": true, "legend": { @@ -31124,7 +31736,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 222, + "id": 226, "interval": null, "isNew": true, "legend": { @@ -31287,7 +31899,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 223, + "id": 227, "interval": null, "isNew": true, "legend": { @@ -31480,7 +32092,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 224, + "id": 228, "interval": null, "isNew": true, "legend": { @@ -31628,7 +32240,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 225, + "id": 229, "interval": null, "isNew": true, "legend": { @@ -31761,7 +32373,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 226, + "id": 230, "interval": null, "isNew": true, "legend": { @@ -31882,185 +32494,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The bytes per read", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 77 - }, - "height": null, - "hideTimeOverride": false, - "id": 227, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max {{$additional_groupby}}", - "metric": "", - "query": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99% {{$additional_groupby}}", - "metric": "", - "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95% {{$additional_groupby}}", - "metric": "", - "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg {{$additional_groupby}}", - "metric": "", - "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bytes / Read", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 10, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The flow rate of compaction operations per type", + "description": "The bytes per read", "editable": true, "error": false, "fieldConfig": { @@ -32082,12 +32516,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 84 + "x": 12, + "y": 77 }, "height": null, "hideTimeOverride": false, - "id": 228, + "id": 231, "interval": null, "isNew": true, "legend": { @@ -32130,45 +32564,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "expr": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "read {{$additional_groupby}}", + "legendFormat": "max {{$additional_groupby}}", "metric": "", - "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "query": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "written {{$additional_groupby}}", + "legendFormat": "99% {{$additional_groupby}}", "metric": "", - "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "flushed {{$additional_groupby}}", + "legendFormat": "95% {{$additional_groupby}}", "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -32177,7 +32626,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction flow", + "title": "Bytes / Read", "tooltip": { "msResolution": true, "shared": true, @@ -32196,9 +32645,9 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "bytes", "label": null, - "logBase": 1, + "logBase": 10, "max": null, "min": null, "show": true @@ -32223,7 +32672,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The bytes per write", + "description": "The flow rate of compaction operations per type", "editable": true, "error": false, "fieldConfig": { @@ -32245,12 +32694,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 84 }, "height": null, "hideTimeOverride": false, - "id": 229, + "id": 232, "interval": null, "isNew": true, "legend": { @@ -32293,60 +32742,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max {{$additional_groupby}}", - "metric": "", - "query": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) by ($additional_groupby) ", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99% {{$additional_groupby}}", + "legendFormat": "read {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) by ($additional_groupby) ", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) by ($additional_groupby) ", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95% {{$additional_groupby}}", + "legendFormat": "written {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) by ($additional_groupby) ", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) by ($additional_groupby) ", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg {{$additional_groupby}}", + "legendFormat": "flushed {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) by ($additional_groupby) ", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -32355,7 +32789,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Bytes / Write", + "title": "Compaction flow", "tooltip": { "msResolution": true, "shared": true, @@ -32374,7 +32808,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -32401,7 +32835,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The read amplification per TiKV instance", + "description": "The bytes per write", "editable": true, "error": false, "fieldConfig": { @@ -32423,12 +32857,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 91 + "x": 12, + "y": 84 }, "height": null, "hideTimeOverride": false, - "id": 230, + "id": 233, "interval": null, "isNew": true, "legend": { @@ -32471,148 +32905,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "max {{$additional_groupby}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Read amplification", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + }, { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99% {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" }, { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The pending bytes to be compacted", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 91 - }, - "height": null, - "hideTimeOverride": false, - "id": 231, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95% {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, $additional_groupby) ", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}} {{$additional_groupby}}", + "legendFormat": "avg {{$additional_groupby}}", "metric": "", - "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, $additional_groupby) ", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -32621,7 +32967,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction pending bytes", + "title": "Bytes / Write", "tooltip": { "msResolution": true, "shared": true, @@ -32667,7 +33013,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of snapshot of each TiKV instance", + "description": "The read amplification per TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -32690,11 +33036,11 @@ "h": 7, "w": 12, "x": 0, - "y": 98 + "y": 91 }, "height": null, "hideTimeOverride": false, - "id": 232, + "id": 234, "interval": null, "isNew": true, "legend": { @@ -32737,7 +33083,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, "instant": false, @@ -32745,7 +33091,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "query": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" @@ -32754,7 +33100,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Number of snapshots", + "title": "Read amplification", "tooltip": { "msResolution": true, "shared": true, @@ -32800,7 +33146,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The compression ratio of each level", + "description": "The pending bytes to be compacted", "editable": true, "error": false, "fieldConfig": { @@ -32823,11 +33169,11 @@ "h": 7, "w": 12, "x": 12, - "y": 98 + "y": 91 }, "height": null, "hideTimeOverride": false, - "id": 233, + "id": 235, "interval": null, "isNew": true, "legend": { @@ -32870,15 +33216,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", + "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, $additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-L{{level}} {{$additional_groupby}}", + "legendFormat": "{{cf}} {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", + "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, $additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -32887,7 +33233,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compression ratio", + "title": "Compaction pending bytes", "tooltip": { "msResolution": true, "shared": true, @@ -32906,7 +33252,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -32933,7 +33279,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of SST files for different column families in each level", + "description": "The number of snapshot of each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -32956,11 +33302,11 @@ "h": 7, "w": 12, "x": 0, - "y": 105 + "y": 98 }, "height": null, "hideTimeOverride": false, - "id": 234, + "id": 236, "interval": null, "isNew": true, "legend": { @@ -33003,15 +33349,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", + "expr": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-L{{level}} {{$additional_groupby}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", + "query": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -33020,7 +33366,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Number files at each level", + "title": "Number of snapshots", "tooltip": { "msResolution": true, "shared": true, @@ -33066,7 +33412,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time that the oldest unreleased snapshot survivals", + "description": "The compression ratio of each level", "editable": true, "error": false, "fieldConfig": { @@ -33089,11 +33435,11 @@ "h": 7, "w": 12, "x": 12, - "y": 105 + "y": 98 }, "height": null, "hideTimeOverride": false, - "id": 235, + "id": 237, "interval": null, "isNew": true, "legend": { @@ -33136,15 +33482,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "expr": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{cf}}-L{{level}} {{$additional_groupby}}", "metric": "", - "query": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "query": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -33153,7 +33499,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Oldest snapshots duration", + "title": "Compression ratio", "tooltip": { "msResolution": true, "shared": true, @@ -33172,7 +33518,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -33194,117 +33540,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The level that the external file ingests into", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 112 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 236, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(increase(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(increase(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Ingestion picked level", - "tooltip": { - "msResolution": true, - "shared": true, - "show": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of ingest external SST files duration.", + "description": "The number of SST files for different column families in each level", "editable": true, "error": false, "fieldConfig": { @@ -33326,12 +33567,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 112 + "x": 0, + "y": 105 }, "height": null, "hideTimeOverride": false, - "id": 237, + "id": 238, "interval": null, "isNew": true, "legend": { @@ -33367,90 +33608,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{cf}}-{{type}} {{$additional_groupby}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%-{{cf}}-{{type}} {{$additional_groupby}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_ingest_external_file_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) / sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) )", + "expr": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{cf}}-{{type}} {{$additional_groupby}}", - "metric": "", - "query": "(sum(rate(\n tikv_storage_ingest_external_file_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) / sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{cf}}-{{type}} {{$additional_groupby}}", + "legendFormat": "{{cf}}-L{{level}} {{$additional_groupby}}", "metric": "", - "query": "sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) ", + "query": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level, $additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -33459,7 +33632,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Ingest SST duration seconds", + "title": "Number files at each level", "tooltip": { "msResolution": true, "shared": true, @@ -33478,7 +33651,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -33505,7 +33678,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The time that the oldest unreleased snapshot survivals", "editable": true, "error": false, "fieldConfig": { @@ -33527,12 +33700,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 119 + "x": 12, + "y": 105 }, "height": null, "hideTimeOverride": false, - "id": 238, + "id": 239, "interval": null, "isNew": true, "legend": { @@ -33575,15 +33748,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", + "expr": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} {{$additional_groupby}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", + "query": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -33592,7 +33765,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write Stall Reason", + "title": "Oldest snapshots duration", "tooltip": { "msResolution": true, "shared": true, @@ -33611,7 +33784,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -33633,12 +33806,117 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The level that the external file ingests into", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 112 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 240, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(increase(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(increase(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Ingestion picked level", + "tooltip": { + "msResolution": true, + "shared": true, + "show": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time which is caused by write stall", + "description": "Bucketed histogram of ingest external SST files duration.", "editable": true, "error": false, "fieldConfig": { @@ -33661,11 +33939,11 @@ "h": 7, "w": 12, "x": 12, - "y": 119 + "y": 112 }, "height": null, "hideTimeOverride": false, - "id": 239, + "id": 241, "interval": null, "isNew": true, "legend": { @@ -33701,67 +33979,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) by ($additional_groupby) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max {{$additional_groupby}}", + "legendFormat": "99.99%-{{cf}}-{{type}} {{$additional_groupby}}", "metric": "", - "query": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) by ($additional_groupby) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) by ($additional_groupby) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99% {{$additional_groupby}}", + "legendFormat": "99%-{{cf}}-{{type}} {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) by ($additional_groupby) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_ingest_external_file_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, le, $additional_groupby) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) by ($additional_groupby) ", + "expr": "(sum(rate(\n tikv_storage_ingest_external_file_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) / sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95% {{$additional_groupby}}", + "legendFormat": "avg-{{cf}}-{{type}} {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) by ($additional_groupby) ", + "query": "(sum(rate(\n tikv_storage_ingest_external_file_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) / sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) by ($additional_groupby) ", + "expr": "sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg {{$additional_groupby}}", + "legendFormat": "count-{{cf}}-{{type}} {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) by ($additional_groupby) ", + "query": "sum(rate(\n tikv_storage_ingest_external_file_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cf, type, $additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -33770,7 +34071,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write stall duration", + "title": "Ingest SST duration seconds", "tooltip": { "msResolution": true, "shared": true, @@ -33789,7 +34090,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -33816,7 +34117,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Stall conditions changed of each column family", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -33839,11 +34140,11 @@ "h": 7, "w": 12, "x": 0, - "y": 126 + "y": 119 }, "height": null, "hideTimeOverride": false, - "id": 240, + "id": 242, "interval": null, "isNew": true, "legend": { @@ -33886,15 +34187,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "expr": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{cf}}-{{type}}", + "legendFormat": "{{type}} {{$additional_groupby}}", "metric": "", - "query": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "query": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type, $additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -33903,7 +34204,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Stall conditions changed of each CF", + "title": "Write Stall Reason", "tooltip": { "msResolution": true, "shared": true, @@ -33949,7 +34250,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The memtable size of each column family", + "description": "The time which is caused by write stall", "editable": true, "error": false, "fieldConfig": { @@ -33972,11 +34273,11 @@ "h": 7, "w": 12, "x": 12, - "y": 126 + "y": 119 }, "height": null, "hideTimeOverride": false, - "id": 241, + "id": 243, "interval": null, "isNew": true, "legend": { @@ -34019,15 +34320,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf, $additional_groupby) ", + "expr": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}} {{$additional_groupby}}", + "legendFormat": "max {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf, $additional_groupby) ", + "query": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99% {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95% {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -34036,7 +34382,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memtable size", + "title": "Write stall duration", "tooltip": { "msResolution": true, "shared": true, @@ -34055,7 +34401,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -34076,55 +34422,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": "db", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "RocksDB - $db", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 242, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Stall conditions changed of each column family", "editable": true, "error": false, "fieldConfig": { @@ -34147,11 +34451,11 @@ "h": 7, "w": 12, "x": 0, - "y": 0 + "y": 126 }, "height": null, "hideTimeOverride": false, - "id": 243, + "id": 244, "interval": null, "isNew": true, "legend": { @@ -34194,30 +34498,148 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "expr": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "live blob file num {{$additional_groupby}}", + "legendFormat": "{{instance}}-{{cf}}-{{type}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "query": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "refId": "", "step": 10, "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Stall conditions changed of each CF", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The memtable size of each column family", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 126 + }, + "height": null, + "hideTimeOverride": false, + "id": 245, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf, $additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "obsolete blob file num {{$additional_groupby}}", + "legendFormat": "{{cf}} {{$additional_groupby}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf, $additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -34226,7 +34648,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file count", + "title": "Memtable size", "tooltip": { "msResolution": true, "shared": true, @@ -34245,7 +34667,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -34266,7 +34688,49 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": "db", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "RocksDB - $db", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 246, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, @@ -34294,12 +34758,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 244, + "id": 247, "interval": null, "isNew": true, "legend": { @@ -34342,30 +34806,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "expr": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "live blob file size {{$additional_groupby}}", + "legendFormat": "live blob file num {{$additional_groupby}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "query": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "expr": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "obsolete blob file size {{$additional_groupby}}", + "legendFormat": "obsolete blob file num {{$additional_groupby}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "query": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -34374,7 +34838,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file size", + "title": "Blob file count", "tooltip": { "msResolution": true, "shared": true, @@ -34393,7 +34857,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -34420,7 +34884,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The blob cache size.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -34442,12 +34906,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 245, + "id": 248, "interval": null, "isNew": true, "legend": { @@ -34490,15 +34954,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "expr": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{cf}}", + "legendFormat": "live blob file size {{$additional_groupby}}", "metric": "", - "query": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "query": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "obsolete blob file size {{$additional_groupby}}", + "metric": "", + "query": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -34507,7 +34986,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob cache size", + "title": "Blob file size", "tooltip": { "msResolution": true, "shared": true, @@ -34553,7 +35032,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The hit rate of block cache", + "description": "The blob cache size.", "editable": true, "error": false, "fieldConfig": { @@ -34575,12 +35054,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 246, + "id": 249, "interval": null, "isNew": true, "legend": { @@ -34623,15 +35102,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) by ($additional_groupby) ))", + "expr": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "all {{$additional_groupby}}", + "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) by ($additional_groupby) ))", + "query": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -34640,7 +35119,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob cache hit", + "title": "Blob cache size", "tooltip": { "msResolution": true, "shared": true, @@ -34659,7 +35138,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -34686,7 +35165,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The hit rate of block cache", "editable": true, "error": false, "fieldConfig": { @@ -34707,13 +35186,13 @@ }, "gridPos": { "h": 7, - "w": 24, - "x": 0, - "y": 14 + "w": 12, + "x": 12, + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 247, + "id": 250, "interval": null, "isNew": true, "legend": { @@ -34756,60 +35235,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg {{$additional_groupby}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95% {{$additional_groupby}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99% {{$additional_groupby}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) by ($additional_groupby) ", + "expr": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) by ($additional_groupby) ))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max {{$additional_groupby}}", + "legendFormat": "all {{$additional_groupby}}", "metric": "", - "query": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) by ($additional_groupby) ", + "query": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) by ($additional_groupby) ))", "refId": "", "step": 10, "target": "" @@ -34818,7 +35252,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Iter touched blob file count", + "title": "Blob cache hit", "tooltip": { "msResolution": true, "shared": true, @@ -34837,7 +35271,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -34885,13 +35319,13 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 21 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 248, + "id": 251, "interval": null, "isNew": true, "legend": { @@ -34934,7 +35368,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -34942,14 +35376,14 @@ "intervalFactor": 1, "legendFormat": "avg {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -34957,14 +35391,14 @@ "intervalFactor": 1, "legendFormat": "95% {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -34972,14 +35406,14 @@ "intervalFactor": 1, "legendFormat": "99% {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) by ($additional_groupby) ", + "expr": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -34987,7 +35421,7 @@ "intervalFactor": 1, "legendFormat": "max {{$additional_groupby}}", "metric": "", - "query": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) by ($additional_groupby) ", + "query": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -34996,7 +35430,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob key size", + "title": "Iter touched blob file count", "tooltip": { "msResolution": true, "shared": true, @@ -35015,7 +35449,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -35064,12 +35498,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 249, + "id": 252, "interval": null, "isNew": true, "legend": { @@ -35112,7 +35546,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -35120,14 +35554,14 @@ "intervalFactor": 1, "legendFormat": "avg {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -35135,14 +35569,14 @@ "intervalFactor": 1, "legendFormat": "95% {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) by ($additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -35150,14 +35584,14 @@ "intervalFactor": 1, "legendFormat": "99% {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) by ($additional_groupby) ", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) by ($additional_groupby) ", + "expr": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, @@ -35165,7 +35599,7 @@ "intervalFactor": 1, "legendFormat": "max {{$additional_groupby}}", "metric": "", - "query": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) by ($additional_groupby) ", + "query": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -35174,7 +35608,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob value size", + "title": "Blob key size", "tooltip": { "msResolution": true, "shared": true, @@ -35215,139 +35649,6 @@ "alignLevel": 0 } }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 28 - }, - "height": null, - "hideTimeOverride": false, - "id": 250, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "get {{$additional_groupby}}", - "metric": "", - "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Blob get operations", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, { "aliasColors": {}, "bars": false, @@ -35376,11 +35677,11 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 251, + "id": 253, "interval": null, "isNew": true, "legend": { @@ -35423,60 +35724,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type, $additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{type}} {{$additional_groupby}}", + "legendFormat": "avg {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type, $additional_groupby) ", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type, $additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%-{{type}} {{$additional_groupby}}", + "legendFormat": "95% {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type, $additional_groupby) ", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type, $additional_groupby) ", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{type}} {{$additional_groupby}}", + "legendFormat": "99% {{$additional_groupby}}", "metric": "", - "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type, $additional_groupby) ", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type, $additional_groupby) ", + "expr": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) by ($additional_groupby) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max-{{type}} {{$additional_groupby}}", + "legendFormat": "max {{$additional_groupby}}", "metric": "", - "query": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type, $additional_groupby) ", + "query": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) by ($additional_groupby) ", "refId": "", "step": 10, "target": "" @@ -35485,7 +35786,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob get duration", + "title": "Blob value size", "tooltip": { "msResolution": true, "shared": true, @@ -35504,7 +35805,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -35554,11 +35855,322 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 252, + "id": 254, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "get {{$additional_groupby}}", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Blob get operations", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "height": null, + "hideTimeOverride": false, + "id": 255, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type, $additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{type}} {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type, $additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type, $additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%-{{type}} {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type, $additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type, $additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}} {{$additional_groupby}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type, $additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type, $additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max-{{type}} {{$additional_groupby}}", + "metric": "", + "query": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type, $additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Blob get duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "\u00b5s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 256, "interval": null, "isNew": true, "legend": { @@ -35691,7 +36303,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 253, + "id": 257, "interval": null, "isNew": true, "legend": { @@ -35854,7 +36466,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 254, + "id": 258, "interval": null, "isNew": true, "legend": { @@ -36032,7 +36644,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 255, + "id": 259, "interval": null, "isNew": true, "legend": { @@ -36210,7 +36822,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 256, + "id": 260, "interval": null, "isNew": true, "legend": { @@ -36388,7 +37000,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 257, + "id": 261, "interval": null, "isNew": true, "legend": { @@ -36521,7 +37133,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 258, + "id": 262, "interval": null, "isNew": true, "legend": { @@ -36699,7 +37311,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 259, + "id": 263, "interval": null, "isNew": true, "legend": { @@ -36832,7 +37444,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 260, + "id": 264, "interval": null, "isNew": true, "legend": { @@ -37010,7 +37622,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 261, + "id": 265, "interval": null, "isNew": true, "legend": { @@ -37143,7 +37755,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 262, + "id": 266, "interval": null, "isNew": true, "legend": { @@ -37276,7 +37888,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 263, + "id": 267, "interval": null, "isNew": true, "legend": { @@ -37454,7 +38066,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 264, + "id": 268, "interval": null, "isNew": true, "legend": { @@ -37632,7 +38244,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 265, + "id": 269, "interval": null, "isNew": true, "legend": { @@ -37765,7 +38377,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 266, + "id": 270, "interval": null, "isNew": true, "legend": { @@ -37943,7 +38555,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 267, + "id": 271, "interval": null, "isNew": true, "legend": { @@ -38076,7 +38688,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 268, + "id": 272, "interval": null, "isNew": true, "legend": { @@ -38254,7 +38866,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 269, + "id": 273, "interval": null, "isNew": true, "legend": { @@ -38390,7 +39002,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 270, + "id": 274, "interval": null, "links": [], "maxDataPoints": 100, @@ -38429,7 +39041,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 271, + "id": 275, "interval": null, "isNew": true, "legend": { @@ -38562,7 +39174,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 272, + "id": 276, "interval": null, "isNew": true, "legend": { @@ -38710,7 +39322,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 273, + "id": 277, "interval": null, "isNew": true, "legend": { @@ -38911,7 +39523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 274, + "id": 278, "interval": null, "isNew": true, "legend": { @@ -39044,7 +39656,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 275, + "id": 279, "interval": null, "isNew": true, "legend": { @@ -39177,7 +39789,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 276, + "id": 280, "interval": null, "isNew": true, "legend": { @@ -39310,7 +39922,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 277, + "id": 281, "interval": null, "isNew": true, "legend": { @@ -39443,7 +40055,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 278, + "id": 282, "interval": null, "isNew": true, "legend": { @@ -39576,7 +40188,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 279, + "id": 283, "interval": null, "isNew": true, "legend": { @@ -39716,7 +40328,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 280, + "id": 284, "interval": null, "legend": { "show": false @@ -39821,7 +40433,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 281, + "id": 285, "interval": null, "legend": { "show": false @@ -39919,7 +40531,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 282, + "id": 286, "interval": null, "isNew": true, "legend": { @@ -40059,7 +40671,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 283, + "id": 287, "interval": null, "legend": { "show": false @@ -40157,7 +40769,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 284, + "id": 288, "interval": null, "isNew": true, "legend": { @@ -40297,7 +40909,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 285, + "id": 289, "interval": null, "legend": { "show": false @@ -40395,7 +41007,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 286, + "id": 290, "interval": null, "isNew": true, "legend": { @@ -40603,7 +41215,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 287, + "id": 291, "interval": null, "legend": { "show": false @@ -40701,7 +41313,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 288, + "id": 292, "interval": null, "isNew": true, "legend": { @@ -40902,7 +41514,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 289, + "id": 293, "interval": null, "isNew": true, "legend": { @@ -41110,7 +41722,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 290, + "id": 294, "interval": null, "isNew": true, "legend": { @@ -41288,7 +41900,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 291, + "id": 295, "interval": null, "isNew": true, "legend": { @@ -41421,7 +42033,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 292, + "id": 296, "interval": null, "isNew": true, "legend": { @@ -41554,7 +42166,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 293, + "id": 297, "interval": null, "isNew": true, "legend": { @@ -41687,7 +42299,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 294, + "id": 298, "interval": null, "isNew": true, "legend": { @@ -41823,7 +42435,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 295, + "id": 299, "interval": null, "links": [], "maxDataPoints": 100, @@ -41862,7 +42474,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 296, + "id": 300, "interval": null, "isNew": true, "legend": { @@ -42010,7 +42622,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 297, + "id": 301, "interval": null, "isNew": true, "legend": { @@ -42150,7 +42762,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 298, + "id": 302, "interval": null, "legend": { "show": false @@ -42248,7 +42860,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 299, + "id": 303, "interval": null, "isNew": true, "legend": { @@ -42381,7 +42993,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 300, + "id": 304, "interval": null, "isNew": true, "legend": { @@ -42514,7 +43126,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 301, + "id": 305, "interval": null, "isNew": true, "legend": { @@ -42692,7 +43304,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 302, + "id": 306, "interval": null, "isNew": true, "legend": { @@ -42855,7 +43467,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 303, + "id": 307, "interval": null, "isNew": true, "legend": { @@ -43003,7 +43615,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 304, + "id": 308, "interval": null, "isNew": true, "legend": { @@ -43136,7 +43748,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 305, + "id": 309, "interval": null, "isNew": true, "legend": { @@ -43272,7 +43884,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 306, + "id": 310, "interval": null, "links": [], "maxDataPoints": 100, @@ -43311,7 +43923,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 307, + "id": 311, "interval": null, "isNew": true, "legend": { @@ -43459,7 +44071,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 308, + "id": 312, "interval": null, "isNew": true, "legend": { @@ -43592,7 +44204,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 309, + "id": 313, "interval": null, "isNew": true, "legend": { @@ -43725,7 +44337,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 310, + "id": 314, "interval": null, "isNew": true, "legend": { @@ -43858,7 +44470,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 311, + "id": 315, "interval": null, "isNew": true, "legend": { @@ -43991,7 +44603,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 312, + "id": 316, "interval": null, "isNew": true, "legend": { @@ -44146,7 +44758,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 313, + "id": 317, "interval": null, "legend": { "show": false @@ -44247,7 +44859,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 314, + "id": 318, "interval": null, "links": [], "maxDataPoints": 100, @@ -44286,7 +44898,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 315, + "id": 319, "interval": null, "isNew": true, "legend": { @@ -44419,7 +45031,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 316, + "id": 320, "interval": null, "isNew": true, "legend": { @@ -44552,7 +45164,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 317, + "id": 321, "interval": null, "isNew": true, "legend": { @@ -44692,7 +45304,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 318, + "id": 322, "interval": null, "legend": { "show": false @@ -44790,7 +45402,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 319, + "id": 323, "interval": null, "isNew": true, "legend": { @@ -44991,7 +45603,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 320, + "id": 324, "interval": null, "isNew": true, "legend": { @@ -45192,7 +45804,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 321, + "id": 325, "interval": null, "isNew": true, "legend": { @@ -45396,7 +46008,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 322, + "id": 326, "interval": null, "links": [], "maxDataPoints": 100, @@ -45435,7 +46047,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 323, + "id": 327, "interval": null, "isNew": true, "legend": { @@ -45583,7 +46195,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 324, + "id": 328, "interval": null, "isNew": true, "legend": { @@ -45784,7 +46396,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 325, + "id": 329, "interval": null, "isNew": true, "legend": { @@ -45985,7 +46597,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 326, + "id": 330, "interval": null, "isNew": true, "legend": { @@ -46186,7 +46798,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 327, + "id": 331, "interval": null, "isNew": true, "legend": { @@ -46387,7 +46999,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 328, + "id": 332, "interval": null, "isNew": true, "legend": { @@ -46520,7 +47132,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 329, + "id": 333, "interval": null, "isNew": true, "legend": { @@ -46653,7 +47265,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 330, + "id": 334, "interval": null, "isNew": true, "legend": { @@ -46786,7 +47398,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 331, + "id": 335, "interval": null, "isNew": true, "legend": { @@ -46919,7 +47531,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 332, + "id": 336, "interval": null, "isNew": true, "legend": { @@ -47127,7 +47739,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 333, + "id": 337, "interval": null, "legend": { "show": false @@ -47228,7 +47840,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 334, + "id": 338, "interval": null, "links": [], "maxDataPoints": 100, @@ -47274,7 +47886,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 335, + "id": 339, "interval": null, "legend": { "show": false @@ -47372,7 +47984,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 336, + "id": 340, "interval": null, "isNew": true, "legend": { @@ -47573,7 +48185,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 337, + "id": 341, "interval": null, "isNew": true, "legend": { @@ -47706,7 +48318,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 338, + "id": 342, "interval": null, "isNew": true, "legend": { @@ -47839,7 +48451,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 339, + "id": 343, "interval": null, "isNew": true, "legend": { @@ -47972,7 +48584,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 340, + "id": 344, "interval": null, "isNew": true, "legend": { @@ -48173,7 +48785,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 341, + "id": 345, "interval": null, "isNew": true, "legend": { @@ -48306,7 +48918,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 342, + "id": 346, "interval": null, "isNew": true, "legend": { @@ -48439,7 +49051,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 343, + "id": 347, "interval": null, "isNew": true, "legend": { @@ -48575,7 +49187,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 344, + "id": 348, "interval": null, "links": [], "maxDataPoints": 100, @@ -48614,7 +49226,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 345, + "id": 349, "interval": null, "isNew": true, "legend": { @@ -48815,7 +49427,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 346, + "id": 350, "interval": null, "isNew": true, "legend": { @@ -49016,7 +49628,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 347, + "id": 351, "interval": null, "isNew": true, "legend": { @@ -49217,7 +49829,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 348, + "id": 352, "interval": null, "isNew": true, "legend": { @@ -49418,7 +50030,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 349, + "id": 353, "interval": null, "isNew": true, "legend": { @@ -49551,7 +50163,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 350, + "id": 354, "interval": null, "isNew": true, "legend": { @@ -49684,7 +50296,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 351, + "id": 355, "interval": null, "isNew": true, "legend": { @@ -49817,7 +50429,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 352, + "id": 356, "interval": null, "isNew": true, "legend": { @@ -49950,7 +50562,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 353, + "id": 357, "interval": null, "isNew": true, "legend": { @@ -50083,7 +50695,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 354, + "id": 358, "interval": null, "isNew": true, "legend": { @@ -50223,7 +50835,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 355, + "id": 359, "interval": null, "legend": { "show": false @@ -50321,7 +50933,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 356, + "id": 360, "interval": null, "isNew": true, "legend": { @@ -50525,7 +51137,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 357, + "id": 361, "interval": null, "links": [], "maxDataPoints": 100, @@ -50564,7 +51176,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 358, + "id": 362, "interval": null, "isNew": true, "legend": { @@ -50697,7 +51309,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 359, + "id": 363, "interval": null, "isNew": true, "legend": { @@ -50830,7 +51442,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 360, + "id": 364, "interval": null, "isNew": true, "legend": { @@ -50970,7 +51582,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 361, + "id": 365, "interval": null, "legend": { "show": false @@ -51068,7 +51680,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 362, + "id": 366, "interval": null, "isNew": true, "legend": { @@ -51269,7 +51881,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 363, + "id": 367, "interval": null, "isNew": true, "legend": { @@ -51470,7 +52082,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 364, + "id": 368, "interval": null, "isNew": true, "legend": { @@ -51674,7 +52286,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 365, + "id": 369, "interval": null, "links": [], "maxDataPoints": 100, @@ -51713,7 +52325,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 366, + "id": 370, "interval": null, "isNew": true, "legend": { @@ -51891,7 +52503,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 367, + "id": 371, "interval": null, "isNew": true, "legend": { @@ -52092,7 +52704,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 368, + "id": 372, "interval": null, "isNew": true, "legend": { @@ -52225,7 +52837,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 369, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -52358,7 +52970,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 370, + "id": 374, "interval": null, "isNew": true, "legend": { @@ -52491,7 +53103,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 375, "interval": null, "isNew": true, "legend": { @@ -52624,7 +53236,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 372, + "id": 376, "interval": null, "isNew": true, "legend": { @@ -52757,7 +53369,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 377, "interval": null, "isNew": true, "legend": { @@ -52886,7 +53498,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 374, + "id": 378, "interval": null, "links": [], "maxDataPoints": 100, @@ -52961,7 +53573,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 375, + "id": 379, "interval": null, "links": [], "maxDataPoints": 100, @@ -53040,7 +53652,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 376, + "id": 380, "interval": null, "isNew": true, "legend": { @@ -53293,7 +53905,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 377, + "id": 381, "interval": null, "isNew": true, "legend": { @@ -53426,7 +54038,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 378, + "id": 382, "interval": null, "isNew": true, "legend": { @@ -53562,7 +54174,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 379, + "id": 383, "interval": null, "links": [], "maxDataPoints": 100, @@ -53601,7 +54213,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 380, + "id": 384, "interval": null, "isNew": true, "legend": { @@ -53749,7 +54361,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 381, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -53882,7 +54494,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 382, + "id": 386, "interval": null, "isNew": true, "legend": { @@ -54083,7 +54695,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 383, + "id": 387, "interval": null, "isNew": true, "legend": { @@ -54231,7 +54843,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 384, + "id": 388, "interval": null, "isNew": true, "legend": { @@ -54432,7 +55044,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 389, "interval": null, "isNew": true, "legend": { @@ -54565,7 +55177,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 386, + "id": 390, "interval": null, "isNew": true, "legend": { @@ -54698,7 +55310,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 387, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -54831,7 +55443,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 392, "interval": null, "isNew": true, "legend": { @@ -54964,7 +55576,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 389, + "id": 393, "interval": null, "isNew": true, "legend": { @@ -55104,7 +55716,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 390, + "id": 394, "interval": null, "legend": { "show": false @@ -55202,7 +55814,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 395, "interval": null, "isNew": true, "legend": { @@ -55406,7 +56018,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 392, + "id": 396, "interval": null, "links": [], "maxDataPoints": 100, @@ -55445,7 +56057,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 393, + "id": 397, "interval": null, "isNew": true, "legend": { @@ -55578,7 +56190,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 394, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -55711,7 +56323,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 399, "interval": null, "isNew": true, "legend": { @@ -55844,7 +56456,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 396, + "id": 400, "interval": null, "isNew": true, "legend": { @@ -55980,7 +56592,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 397, + "id": 401, "interval": null, "links": [], "maxDataPoints": 100, @@ -56019,7 +56631,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 402, "interval": null, "isNew": true, "legend": { @@ -56152,7 +56764,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 399, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -56285,7 +56897,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 404, "interval": null, "isNew": true, "legend": { @@ -56433,7 +57045,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 405, "interval": null, "isNew": true, "legend": { @@ -56566,7 +57178,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 406, "interval": null, "isNew": true, "legend": { @@ -56699,7 +57311,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 407, "interval": null, "isNew": true, "legend": { @@ -56832,7 +57444,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 408, "interval": null, "isNew": true, "legend": { @@ -56968,7 +57580,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 405, + "id": 409, "interval": null, "links": [], "maxDataPoints": 100, @@ -57007,7 +57619,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 406, + "id": 410, "interval": null, "isNew": true, "legend": { @@ -57140,7 +57752,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 407, + "id": 411, "interval": null, "isNew": true, "legend": { @@ -57273,7 +57885,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 408, + "id": 412, "interval": null, "isNew": true, "legend": { @@ -57406,7 +58018,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 409, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -57539,7 +58151,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 410, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -57672,7 +58284,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 415, "interval": null, "isNew": true, "legend": { @@ -57808,7 +58420,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 412, + "id": 416, "interval": null, "links": [], "maxDataPoints": 100, @@ -57847,7 +58459,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 413, + "id": 417, "interval": null, "isNew": true, "legend": { @@ -57980,7 +58592,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 414, + "id": 418, "interval": null, "isNew": true, "legend": { @@ -58113,7 +58725,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 415, + "id": 419, "interval": null, "isNew": true, "legend": { @@ -58261,7 +58873,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 416, + "id": 420, "interval": null, "isNew": true, "legend": { @@ -58424,7 +59036,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 417, + "id": 421, "interval": null, "isNew": true, "legend": { @@ -58557,7 +59169,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 418, + "id": 422, "interval": null, "isNew": true, "legend": { @@ -58690,7 +59302,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 419, + "id": 423, "interval": null, "isNew": true, "legend": { @@ -58838,7 +59450,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 420, + "id": 424, "interval": null, "isNew": true, "legend": { @@ -58986,7 +59598,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 421, + "id": 425, "interval": null, "isNew": true, "legend": { @@ -59122,7 +59734,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 422, + "id": 426, "interval": null, "links": [], "maxDataPoints": 100, @@ -59161,7 +59773,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 423, + "id": 427, "interval": null, "isNew": true, "legend": { @@ -59294,7 +59906,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 424, + "id": 428, "interval": null, "isNew": true, "legend": { @@ -59427,7 +60039,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 425, + "id": 429, "interval": null, "isNew": true, "legend": { @@ -59560,7 +60172,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 426, + "id": 430, "interval": null, "isNew": true, "legend": { @@ -59693,7 +60305,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 427, + "id": 431, "interval": null, "isNew": true, "legend": { @@ -59826,7 +60438,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 428, + "id": 432, "interval": null, "isNew": true, "legend": { @@ -59959,7 +60571,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 429, + "id": 433, "interval": null, "isNew": true, "legend": { @@ -60092,7 +60704,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 430, + "id": 434, "interval": null, "isNew": true, "legend": { @@ -60225,7 +60837,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 431, + "id": 435, "interval": null, "isNew": true, "legend": { @@ -60365,7 +60977,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 432, + "id": 436, "interval": null, "legend": { "show": false @@ -60463,7 +61075,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 433, + "id": 437, "interval": null, "isNew": true, "legend": { @@ -60596,7 +61208,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 434, + "id": 438, "interval": null, "isNew": true, "legend": { @@ -60744,7 +61356,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 435, + "id": 439, "interval": null, "isNew": true, "legend": { @@ -60892,7 +61504,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 436, + "id": 440, "interval": null, "isNew": true, "legend": { @@ -61032,7 +61644,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 437, + "id": 441, "interval": null, "legend": { "show": false @@ -61130,7 +61742,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 438, + "id": 442, "interval": null, "isNew": true, "legend": { @@ -61263,7 +61875,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 443, "interval": null, "isNew": true, "legend": { @@ -61399,7 +62011,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 440, + "id": 444, "interval": null, "links": [], "maxDataPoints": 100, @@ -61438,7 +62050,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 441, + "id": 445, "interval": null, "isNew": true, "legend": { @@ -61571,7 +62183,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 442, + "id": 446, "interval": null, "isNew": true, "legend": { @@ -61734,7 +62346,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 447, "interval": null, "isNew": true, "legend": { @@ -61882,7 +62494,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 444, + "id": 448, "interval": null, "isNew": true, "legend": { @@ -62015,7 +62627,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 449, "interval": null, "isNew": true, "legend": { @@ -62155,7 +62767,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 446, + "id": 450, "interval": null, "legend": { "show": false @@ -62260,7 +62872,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 447, + "id": 451, "interval": null, "legend": { "show": false @@ -62365,7 +62977,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 448, + "id": 452, "interval": null, "legend": { "show": false @@ -62463,7 +63075,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 449, + "id": 453, "interval": null, "isNew": true, "legend": { @@ -62603,7 +63215,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 450, + "id": 454, "interval": null, "legend": { "show": false @@ -62708,7 +63320,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 451, + "id": 455, "interval": null, "legend": { "show": false @@ -62813,7 +63425,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 452, + "id": 456, "interval": null, "legend": { "show": false @@ -62911,7 +63523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 453, + "id": 457, "interval": null, "isNew": true, "legend": { @@ -63044,7 +63656,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 454, + "id": 458, "interval": null, "isNew": true, "legend": { @@ -63177,7 +63789,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 455, + "id": 459, "interval": null, "isNew": true, "legend": { @@ -63317,7 +63929,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 456, + "id": 460, "interval": null, "legend": { "show": false @@ -63415,7 +64027,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 457, + "id": 461, "interval": null, "isNew": true, "legend": { @@ -63551,7 +64163,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 458, + "id": 462, "interval": null, "links": [], "maxDataPoints": 100, @@ -63590,7 +64202,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 459, + "id": 463, "interval": null, "isNew": true, "legend": { @@ -63753,7 +64365,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 460, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -63886,7 +64498,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 465, "interval": null, "isNew": true, "legend": { @@ -64026,7 +64638,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 462, + "id": 466, "interval": null, "legend": { "show": false @@ -64131,7 +64743,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 463, + "id": 467, "interval": null, "legend": { "show": false @@ -64229,7 +64841,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 468, "interval": null, "isNew": true, "legend": { @@ -64384,7 +64996,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 465, + "id": 469, "interval": null, "legend": { "show": false @@ -64489,7 +65101,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 466, + "id": 470, "interval": null, "legend": { "show": false @@ -64594,7 +65206,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 467, + "id": 471, "interval": null, "legend": { "show": false @@ -64692,7 +65304,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 468, + "id": 472, "interval": null, "isNew": true, "legend": { @@ -64862,7 +65474,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 469, + "id": 473, "interval": null, "legend": { "show": false @@ -64960,7 +65572,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 470, + "id": 474, "interval": null, "isNew": true, "legend": { @@ -65161,7 +65773,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 471, + "id": 475, "interval": null, "isNew": true, "legend": { @@ -65362,7 +65974,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 472, + "id": 476, "interval": null, "isNew": true, "legend": { @@ -65495,7 +66107,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 473, + "id": 477, "interval": null, "isNew": true, "legend": { @@ -65658,7 +66270,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 474, + "id": 478, "interval": null, "isNew": true, "legend": { @@ -65791,7 +66403,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 475, + "id": 479, "interval": null, "isNew": true, "legend": { @@ -65924,7 +66536,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 476, + "id": 480, "interval": null, "isNew": true, "legend": { @@ -66125,7 +66737,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 477, + "id": 481, "interval": null, "isNew": true, "legend": { @@ -66258,7 +66870,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 482, "interval": null, "isNew": true, "legend": { @@ -66398,7 +67010,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 479, + "id": 483, "interval": null, "legend": { "show": false @@ -66503,7 +67115,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 480, + "id": 484, "interval": null, "legend": { "show": false @@ -66608,7 +67220,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 481, + "id": 485, "interval": null, "legend": { "show": false @@ -66713,7 +67325,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 482, + "id": 486, "interval": null, "legend": { "show": false @@ -66818,7 +67430,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 483, + "id": 487, "interval": null, "legend": { "show": false @@ -66923,7 +67535,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 484, + "id": 488, "interval": null, "legend": { "show": false @@ -67028,7 +67640,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 485, + "id": 489, "interval": null, "legend": { "show": false @@ -67126,7 +67738,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 486, + "id": 490, "interval": null, "isNew": true, "legend": { @@ -67274,7 +67886,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 487, + "id": 491, "interval": null, "isNew": true, "legend": { @@ -67407,7 +68019,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 488, + "id": 492, "interval": null, "isNew": true, "legend": { @@ -67540,7 +68152,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 489, + "id": 493, "interval": null, "isNew": true, "legend": { @@ -67688,7 +68300,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 494, "interval": null, "isNew": true, "legend": { @@ -67824,7 +68436,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 495, "interval": null, "links": [], "maxDataPoints": 100, @@ -67875,7 +68487,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 496, "interval": null, "links": [], "maxDataPoints": 100, @@ -67971,7 +68583,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, + "id": 497, "interval": null, "links": [], "maxDataPoints": 100, @@ -68046,7 +68658,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 494, + "id": 498, "interval": null, "links": [], "maxDataPoints": 100, @@ -68121,7 +68733,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 495, + "id": 499, "interval": null, "links": [], "maxDataPoints": 100, @@ -68196,7 +68808,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 496, + "id": 500, "interval": null, "links": [], "maxDataPoints": 100, @@ -68271,7 +68883,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 497, + "id": 501, "interval": null, "links": [], "maxDataPoints": 100, @@ -68346,7 +68958,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 498, + "id": 502, "interval": null, "links": [], "maxDataPoints": 100, @@ -68421,7 +69033,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 499, + "id": 503, "interval": null, "links": [], "maxDataPoints": 100, @@ -68500,7 +69112,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 500, + "id": 504, "interval": null, "isNew": true, "legend": { @@ -68633,7 +69245,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 501, + "id": 505, "interval": null, "isNew": true, "legend": { @@ -68766,7 +69378,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 502, + "id": 506, "interval": null, "isNew": true, "legend": { @@ -68899,7 +69511,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 503, + "id": 507, "interval": null, "isNew": true, "legend": { @@ -69032,7 +69644,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 504, + "id": 508, "interval": null, "isNew": true, "legend": { @@ -69165,7 +69777,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 505, + "id": 509, "interval": null, "isNew": true, "legend": { @@ -69313,7 +69925,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 506, + "id": 510, "interval": null, "isNew": true, "legend": { @@ -69446,7 +70058,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 507, + "id": 511, "interval": null, "isNew": true, "legend": { @@ -69579,7 +70191,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 508, + "id": 512, "interval": null, "isNew": true, "legend": { @@ -69745,7 +70357,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 509, + "id": 513, "interval": null, "legend": { "show": false @@ -69850,7 +70462,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 510, + "id": 514, "interval": null, "legend": { "show": false @@ -69955,7 +70567,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 511, + "id": 515, "interval": null, "legend": { "show": false @@ -70060,7 +70672,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 512, + "id": 516, "interval": null, "legend": { "show": false @@ -70165,7 +70777,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 513, + "id": 517, "interval": null, "legend": { "show": false @@ -70270,7 +70882,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 514, + "id": 518, "interval": null, "legend": { "show": false @@ -70375,7 +70987,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 515, + "id": 519, "interval": null, "legend": { "show": false @@ -70480,7 +71092,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 516, + "id": 520, "interval": null, "legend": { "show": false @@ -70578,7 +71190,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 517, + "id": 521, "interval": null, "isNew": true, "legend": { @@ -70711,7 +71323,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 518, + "id": 522, "interval": null, "isNew": true, "legend": { @@ -70844,7 +71456,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 519, + "id": 523, "interval": null, "isNew": true, "legend": { @@ -70977,7 +71589,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 520, + "id": 524, "interval": null, "isNew": true, "legend": { @@ -71110,7 +71722,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 521, + "id": 525, "interval": null, "isNew": true, "legend": { @@ -71243,7 +71855,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 522, + "id": 526, "interval": null, "isNew": true, "legend": { @@ -71376,7 +71988,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 523, + "id": 527, "interval": null, "isNew": true, "legend": { @@ -71509,7 +72121,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 524, + "id": 528, "interval": null, "isNew": true, "legend": { @@ -71649,7 +72261,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 525, + "id": 529, "interval": null, "legend": { "show": false @@ -71754,7 +72366,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 526, + "id": 530, "interval": null, "legend": { "show": false @@ -71852,7 +72464,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 527, + "id": 531, "interval": null, "isNew": true, "legend": { @@ -71985,7 +72597,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 528, + "id": 532, "interval": null, "isNew": true, "legend": { @@ -72118,7 +72730,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 529, + "id": 533, "interval": null, "isNew": true, "legend": { @@ -72251,7 +72863,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 530, + "id": 534, "interval": null, "isNew": true, "legend": { @@ -72384,7 +72996,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 531, + "id": 535, "interval": null, "isNew": true, "legend": { @@ -72517,7 +73129,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 532, + "id": 536, "interval": null, "isNew": true, "legend": { @@ -72653,7 +73265,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 533, + "id": 537, "interval": null, "links": [], "maxDataPoints": 100, @@ -72692,7 +73304,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 534, + "id": 538, "interval": null, "isNew": true, "legend": { @@ -72840,7 +73452,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 535, + "id": 539, "interval": null, "isNew": true, "legend": { @@ -72973,7 +73585,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 536, + "id": 540, "interval": null, "isNew": true, "legend": { @@ -73106,7 +73718,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 537, + "id": 541, "interval": null, "isNew": true, "legend": { @@ -73242,7 +73854,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 538, + "id": 542, "interval": null, "links": [], "maxDataPoints": 100, @@ -73281,7 +73893,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 539, + "id": 543, "interval": null, "isNew": true, "legend": { @@ -73414,7 +74026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 540, + "id": 544, "interval": null, "isNew": true, "legend": { @@ -73547,7 +74159,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 541, + "id": 545, "interval": null, "isNew": true, "legend": { @@ -73680,7 +74292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 542, + "id": 546, "interval": null, "isNew": true, "legend": { @@ -73813,7 +74425,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 543, + "id": 547, "interval": null, "isNew": true, "legend": { @@ -73946,7 +74558,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 544, + "id": 548, "interval": null, "isNew": true, "legend": { @@ -74082,7 +74694,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 545, + "id": 549, "interval": null, "links": [], "maxDataPoints": 100, @@ -74121,7 +74733,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 546, + "id": 550, "interval": null, "isNew": true, "legend": { @@ -74254,7 +74866,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 547, + "id": 551, "interval": null, "isNew": true, "legend": { @@ -74390,7 +75002,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 548, + "id": 552, "interval": null, "links": [], "maxDataPoints": 100, @@ -74429,7 +75041,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 549, + "id": 553, "interval": null, "isNew": true, "legend": { @@ -74630,7 +75242,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 550, + "id": 554, "interval": null, "isNew": true, "legend": { @@ -74766,7 +75378,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 551, + "id": 555, "interval": null, "links": [], "maxDataPoints": 100, @@ -74805,7 +75417,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 552, + "id": 556, "interval": null, "isNew": true, "legend": { @@ -74938,7 +75550,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 553, + "id": 557, "interval": null, "isNew": true, "legend": { @@ -75071,7 +75683,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 554, + "id": 558, "interval": null, "isNew": true, "legend": { @@ -75204,7 +75816,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 555, + "id": 559, "interval": null, "isNew": true, "legend": { @@ -75337,7 +75949,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 556, + "id": 560, "interval": null, "isNew": true, "legend": { @@ -75485,7 +76097,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 557, + "id": 561, "interval": null, "isNew": true, "legend": { @@ -75689,7 +76301,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 558, + "id": 562, "interval": null, "links": [], "maxDataPoints": 100, @@ -75728,7 +76340,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 559, + "id": 563, "interval": null, "isNew": true, "legend": { @@ -75861,7 +76473,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 560, + "id": 564, "interval": null, "isNew": true, "legend": { @@ -75994,7 +76606,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 561, + "id": 565, "interval": null, "isNew": true, "legend": { @@ -76127,7 +76739,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 562, + "id": 566, "interval": null, "isNew": true, "legend": { @@ -76260,7 +76872,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 563, + "id": 567, "interval": null, "isNew": true, "legend": { @@ -76457,7 +77069,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 564, + "id": 568, "interval": null, "links": [], "maxDataPoints": 100, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 239e6c44191..cc657439837 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -d8d8d708f9e11b7a9e318f9c50184212a2adb0a7967b313f92fbef55021fffed ./metrics/grafana/tikv_details.json +b4ba015c2532b3191788f5c1e12bcebec27d64560f85900f2f3fb2e3c05d88fb ./metrics/grafana/tikv_details.json diff --git a/src/server/metrics.rs b/src/server/metrics.rs index bee26f28237..11ee6512831 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -112,6 +112,17 @@ make_auto_flush_static_metric! { unknown, } + pub label_enum RaftMessageDurationKind { + // This duration **begins** when the RaftStore thread sends the RaftMessage to + // the RaftClient and **ends** when the RaftMessage leaves the + // BatchRaftMessage buffer, just before being flushed to the gRPC client. + send_wait, + // This duration **begins** after the send_wait finishes and continues as the + // message is sent over the network, **ends** when the target peer receives it. + // This metric is reported by the receiver, so it is named receive delay. + receive_delay, + } + pub struct GcCommandCounterVec: LocalIntCounter { "type" => GcCommandKind, } @@ -153,6 +164,10 @@ make_auto_flush_static_metric! { pub struct ReplicaReadLockCheckHistogramVec: LocalHistogram { "result" => ReplicaReadLockCheckResult, } + + pub struct RaftMessageDurationVec: LocalHistogram { + "type" => RaftMessageDurationKind, + } } make_static_metric! { @@ -423,6 +438,13 @@ lazy_static! { exponential_buckets(5e-5, 2.0, 22).unwrap() // 50us ~ 104s ) .unwrap(); + pub static ref RAFT_MESSAGE_DURATION_VEC: HistogramVec = register_histogram_vec!( + "tikv_server_raft_message_duration_seconds", + "Duration of raft messages.", + &["type"], + exponential_buckets(0.00001, 2.0, 26).unwrap() + ) + .unwrap(); pub static ref RAFT_MESSAGE_FLUSH_COUNTER: RaftMessageFlushCounterVec = register_static_int_counter_vec!( RaftMessageFlushCounterVec, @@ -477,6 +499,11 @@ lazy_static! { .unwrap(); } +lazy_static! { + pub static ref RAFT_MESSAGE_DURATION: RaftMessageDurationVec = + auto_flush_from!(RAFT_MESSAGE_DURATION_VEC, RaftMessageDurationVec); +} + make_auto_flush_static_metric! { pub label_enum RequestStatusKind { all, diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index 83373103c54..43e7fc7681a 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -4,14 +4,13 @@ use std::{ collections::VecDeque, ffi::CString, marker::Unpin, - mem, pin::Pin, result, sync::{ atomic::{AtomicI32, AtomicU8, Ordering}, Arc, Mutex, }, - time::{Duration, Instant}, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; use collections::{HashMap, HashSet}; @@ -40,7 +39,7 @@ use tikv_kv::RaftExtension; use tikv_util::{ config::{Tracker, VersionTrack}, lru::LruCache, - time::duration_to_sec, + time::{duration_to_sec, InstantExt}, timer::GLOBAL_TIMER_HANDLE, worker::Scheduler, }; @@ -95,7 +94,7 @@ impl From for ConnState { /// A quick queue for sending raft messages. struct Queue { - buf: ArrayQueue, + buf: ArrayQueue<(RaftMessage, Instant)>, conn_state: AtomicU8, waker: Mutex>, } @@ -116,9 +115,9 @@ impl Queue { /// finally. /// /// True when the message is pushed into queue otherwise false. - fn push(&self, msg: RaftMessage) -> Result<(), DiscardReason> { + fn push(&self, msg_with_time: (RaftMessage, Instant)) -> Result<(), DiscardReason> { match self.conn_state.load(Ordering::SeqCst).into() { - ConnState::Established => match self.buf.push(msg) { + ConnState::Established => match self.buf.push(msg_with_time) { Ok(()) => Ok(()), Err(_) => Err(DiscardReason::Full), }, @@ -148,7 +147,7 @@ impl Queue { } /// Gets message from the head of the queue. - fn try_pop(&self) -> Option { + fn try_pop(&self) -> Option<(RaftMessage, Instant)> { self.buf.pop() } @@ -158,7 +157,7 @@ impl Queue { /// The method should be called in polling context. If the queue is empty, /// it will register current polling task for notifications. #[inline] - fn pop(&self, ctx: &Context<'_>) -> Option { + fn pop(&self, ctx: &Context<'_>) -> Option<(RaftMessage, Instant)> { self.buf.pop().or_else(|| { { let mut waker = self.waker.lock().unwrap(); @@ -177,7 +176,7 @@ trait Buffer { /// A full buffer should be flushed successfully before calling `push`. fn full(&self) -> bool; /// Pushes the message into buffer. - fn push(&mut self, msg: RaftMessage); + fn push(&mut self, msg_with_time: (RaftMessage, Instant)); /// Checks if the batch is empty. fn empty(&self) -> bool; /// Flushes the message to grpc. @@ -197,8 +196,8 @@ trait Buffer { /// A buffer for BatchRaftMessage. struct BatchMessageBuffer { - batch: BatchRaftMessage, - overflowing: Option, + batch: Vec<(RaftMessage, Instant)>, + overflowing: Option<(RaftMessage, Instant)>, size: usize, cfg: Config, cfg_tracker: Tracker, @@ -213,7 +212,7 @@ impl BatchMessageBuffer { let cfg_tracker = Arc::clone(global_cfg_track).tracker("raft-client-buffer".into()); let cfg = global_cfg_track.value().clone(); BatchMessageBuffer { - batch: BatchRaftMessage::default(), + batch: Vec::with_capacity(cfg.raft_msg_max_batch_size), overflowing: None, size: 0, cfg, @@ -245,7 +244,7 @@ impl BatchMessageBuffer { #[cfg(test)] fn clear(&mut self) { - self.batch = BatchRaftMessage::default(); + self.batch.clear(); self.size = 0; self.overflowing = None; // try refresh config @@ -262,32 +261,44 @@ impl Buffer for BatchMessageBuffer { } #[inline] - fn push(&mut self, msg: RaftMessage) { - let msg_size = Self::message_size(&msg); + fn push(&mut self, msg_with_time: (RaftMessage, Instant)) { + let msg_size = Self::message_size(&msg_with_time.0); // To avoid building too large batch, we limit each batch's size. Since // `msg_size` is estimated, `GRPC_SEND_MSG_BUF` is reserved for errors. if self.size > 0 && (self.size + msg_size + self.cfg.raft_client_grpc_send_msg_buffer >= self.cfg.max_grpc_send_msg_len as usize - || self.batch.get_msgs().len() >= self.cfg.raft_msg_max_batch_size) + || self.batch.len() >= self.cfg.raft_msg_max_batch_size) { - self.overflowing = Some(msg); + self.overflowing = Some(msg_with_time); return; } self.size += msg_size; - self.batch.mut_msgs().push(msg); + self.batch.push(msg_with_time); } #[inline] fn empty(&self) -> bool { - self.batch.get_msgs().is_empty() + self.batch.is_empty() } #[inline] fn flush(&mut self, sender: &mut ClientCStreamSender) -> grpcio::Result<()> { - let batch = mem::take(&mut self.batch); + let mut batch_msgs = BatchRaftMessage::default(); + self.batch.drain(..).for_each(|(msg, time)| { + RAFT_MESSAGE_DURATION + .send_wait + .observe(time.saturating_elapsed().as_secs_f64()); + batch_msgs.msgs.push(msg); + }); + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos() as u64; + batch_msgs.last_observed_time = now; + let res = Pin::new(sender).start_send(( - batch, + batch_msgs, WriteFlags::default().buffer_hint(self.overflowing.is_some()), )); @@ -342,8 +353,8 @@ impl Buffer for MessageBuffer { } #[inline] - fn push(&mut self, msg: RaftMessage) { - self.batch.push_back(msg); + fn push(&mut self, msg_with_time: (RaftMessage, Instant)) { + self.batch.push_back(msg_with_time.0); } #[inline] @@ -471,26 +482,26 @@ where fn fill_msg(&mut self, ctx: &Context<'_>) { while !self.buffer.full() { - let msg = match self.queue.pop(ctx) { - Some(msg) => msg, + let msg_with_time = match self.queue.pop(ctx) { + Some(msg_with_time) => msg_with_time, None => return, }; - if msg.get_message().has_snapshot() { + if msg_with_time.0.get_message().has_snapshot() { let mut snapshot = RaftSnapshotData::default(); snapshot - .merge_from_bytes(msg.get_message().get_snapshot().get_data()) + .merge_from_bytes(msg_with_time.0.get_message().get_snapshot().get_data()) .unwrap(); // Witness's snapshot must be empty, no need to send snapshot files, report // immediately if !snapshot.get_meta().get_for_witness() { - self.send_snapshot_sock(msg); + self.send_snapshot_sock(msg_with_time.0); continue; } else { - let rep = self.new_snapshot_reporter(&msg); + let rep = self.new_snapshot_reporter(&msg_with_time.0); rep.report(SnapshotStatus::Finish); } } - self.buffer.push(msg); + self.buffer.push(msg_with_time); } } } @@ -687,8 +698,8 @@ where fn clear_pending_message(&self, reason: &str) { let len = self.queue.len(); for _ in 0..len { - let msg = self.queue.try_pop().unwrap(); - report_unreachable(&self.builder.router, &msg) + let msg_with_time = self.queue.try_pop().unwrap(); + report_unreachable(&self.builder.router, &msg_with_time.0) } REPORT_FAILURE_MSG_COUNTER .with_label_values(&[reason, &self.store_id.to_string()]) @@ -1055,6 +1066,7 @@ where /// the message is enqueued to buffer. Caller is expected to call `flush` to /// ensure all buffered messages are sent out. pub fn send(&mut self, msg: RaftMessage) -> result::Result<(), DiscardReason> { + let wait_send_start = Instant::now(); let store_id = msg.get_to_peer().store_id; let grpc_raft_conn_num = self.builder.cfg.value().grpc_raft_conn_num as u64; let conn_id = if grpc_raft_conn_num == 1 { @@ -1092,7 +1104,7 @@ where transport_on_send_store_fp(); loop { if let Some(s) = self.cache.get_mut(&(store_id, conn_id)) { - match s.queue.push(msg) { + match s.queue.push((msg, wait_send_start)) { Ok(_) => { if !s.dirty { s.dirty = true; @@ -1231,7 +1243,7 @@ mod tests { if i != 0 { msg.mut_message().set_context(context.into()); } - msg_buf.push(msg); + msg_buf.push((msg, Instant::now())); } assert!(msg_buf.full()); } @@ -1259,7 +1271,7 @@ mod tests { if i != 0 { msg.set_extra_ctx(ctx); } - msg_buf.push(msg); + msg_buf.push((msg, Instant::now())); } assert!(msg_buf.full()); } @@ -1289,9 +1301,9 @@ mod tests { let default_grpc_msg_len = msg_buf.cfg.max_grpc_send_msg_len as usize; let max_msg_len = default_grpc_msg_len - msg_buf.cfg.raft_client_grpc_send_msg_buffer; - msg_buf.push(new_test_msg(max_msg_len)); + msg_buf.push((new_test_msg(max_msg_len), Instant::now())); assert!(!msg_buf.full()); - msg_buf.push(new_test_msg(1)); + msg_buf.push((new_test_msg(1), Instant::now())); assert!(msg_buf.full()); // update config @@ -1304,10 +1316,10 @@ mod tests { let new_max_msg_len = default_grpc_msg_len * 2 - msg_buf.cfg.raft_client_grpc_send_msg_buffer; for _i in 0..2 { - msg_buf.push(new_test_msg(new_max_msg_len / 2 - 1)); + msg_buf.push((new_test_msg(new_max_msg_len / 2 - 1), Instant::now())); assert!(!msg_buf.full()); } - msg_buf.push(new_test_msg(2)); + msg_buf.push((new_test_msg(2), Instant::now())); assert!(msg_buf.full()); } @@ -1321,7 +1333,7 @@ mod tests { b.iter(|| { for _i in 0..10 { - msg_buf.push(test::black_box(new_test_msg(1024))); + msg_buf.push(test::black_box((new_test_msg(1024), Instant::now()))); } // run clear to mock flush. msg_buf.clear(); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index be895af869a..4dc65cec1b1 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -7,7 +7,7 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, }, - time::Duration, + time::{Duration, SystemTime, UNIX_EPOCH}, }; use api_version::KvFormat; @@ -42,7 +42,7 @@ use tikv_util::{ future::{paired_future_callback, poll_future_notify}, mpsc::future::{unbounded, BatchReceiver, Sender, WakePolicy}, sys::memory_usage_reaches_high_water, - time::Instant, + time::{nanos_to_secs, Instant}, worker::Scheduler, }; use tracker::{set_tls_tracker_token, RequestInfo, RequestType, Tracker, GLOBAL_TRACKERS}; @@ -759,6 +759,7 @@ impl Tikv for Service { let mut stream = stream.map_err(Error::from); while let Some(msg) = stream.try_next().await? { RAFT_MESSAGE_RECV_COUNTER.inc(); + let reject = needs_reject_raft_append(reject_messages_on_memory_ratio); if let Err(err @ RaftStoreError::StoreNotMatch { .. }) = Self::handle_raft_message(store_id, &ch, msg, reject) @@ -811,6 +812,13 @@ impl Tikv for Service { let res = async move { let mut stream = stream.map_err(Error::from); while let Some(mut batch_msg) = stream.try_next().await? { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos() as u64; + let elapsed = nanos_to_secs(now.saturating_sub(batch_msg.last_observed_time)); + RAFT_MESSAGE_DURATION.receive_delay.observe(elapsed); + let len = batch_msg.get_msgs().len(); RAFT_MESSAGE_RECV_COUNTER.inc_by(len as u64); RAFT_MESSAGE_BATCH_SIZE.observe(len as f64); From 5a47da9dec1a0e24a75c6c013cc6705037106fc8 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Sun, 24 Nov 2024 22:34:26 -0500 Subject: [PATCH 32/86] br: rename limiter to rate_limiter for better readability (#17795) close tikv/tikv#17796 rename limiter usage in BR to rate_limiter for better readability Signed-off-by: Wenqi Mou Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup/src/endpoint.rs | 70 +++++++++++++++++------------- components/backup/src/softlimit.rs | 4 +- components/backup/src/writer.rs | 22 +++++----- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 01c7b1bbd7a..56be0d128f1 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -66,7 +66,9 @@ struct Request { sub_ranges: Vec, start_ts: TimeStamp, end_ts: TimeStamp, - limiter: Limiter, + // cloning on Limiter will share the same underlying token bucket thus can be used as + // a global rate limiter + rate_limiter: Limiter, backend: StorageBackend, cancel: Arc, is_raw_kv: bool, @@ -113,9 +115,9 @@ impl Task { ) -> Result<(Task, Arc)> { let cancel = Arc::new(AtomicBool::new(false)); - let speed_limit = req.get_rate_limit(); - let limiter = Limiter::new(if speed_limit > 0 { - speed_limit as f64 + let rate_limit = req.get_rate_limit(); + let rate_limiter = Limiter::new(if rate_limit > 0 { + rate_limit as f64 } else { f64::INFINITY }); @@ -135,7 +137,7 @@ impl Task { start_ts: req.get_start_version().into(), end_ts: req.get_end_version().into(), backend: req.get_storage_backend().clone(), - limiter, + rate_limiter, cancel: cancel.clone(), is_raw_kv: req.get_is_raw_kv(), dst_api_ver: req.get_dst_api_version(), @@ -217,7 +219,7 @@ struct InMemBackupFiles { start_version: TimeStamp, end_version: TimeStamp, region: Region, - limiter: Option>, + resource_limiter: Option>, } async fn save_backup_file_worker( @@ -228,7 +230,9 @@ async fn save_backup_file_worker( ) { while let Ok(msg) = rx.recv().await { let files = if msg.files.need_flush_keys() { - match with_resource_limiter(msg.files.save(&storage), msg.limiter.clone()).await { + match with_resource_limiter(msg.files.save(&storage), msg.resource_limiter.clone()) + .await + { Ok(mut split_files) => { let mut has_err = false; for file in split_files.iter_mut() { @@ -431,7 +435,7 @@ impl BackupRange { start_version: begin_ts, end_version: backup_ts, region: self.region.clone(), - limiter: resource_limiter.clone(), + resource_limiter: resource_limiter.clone(), }; send_to_worker_with_metrics(&saver, msg).await?; next_file_start_key = this_end_key; @@ -478,7 +482,7 @@ impl BackupRange { start_version: begin_ts, end_version: backup_ts, region: self.region.clone(), - limiter: resource_limiter.clone(), + resource_limiter: resource_limiter.clone(), }; send_to_worker_with_metrics(&saver, msg).await?; @@ -550,7 +554,7 @@ impl BackupRange { &self, mut engine: E, db: E::Local, - limiter: &Limiter, + rate_limiter: &Limiter, file_name: String, cf: CfNameWrap, compression_type: Option, @@ -562,7 +566,7 @@ impl BackupRange { db, &file_name, cf, - limiter.clone(), + rate_limiter.clone(), compression_type, compression_level, cipher, @@ -612,7 +616,7 @@ impl BackupRange { start_version: TimeStamp::zero(), end_version: TimeStamp::zero(), region: self.region.clone(), - limiter: None, + resource_limiter: None, }; send_to_worker_with_metrics(&saver_tx, msg).await?; Ok(stat) @@ -635,6 +639,10 @@ impl ConfigManager { } } +/// SoftLimitKeeper can run in the background and adjust the number of threads +/// running based on CPU stats. +/// It only starts to work when enable_auto_tune is turned on in BackupConfig. +/// The initial number of threads is controlled by num_threads in BackupConfig. #[derive(Clone)] struct SoftLimitKeeper { limit: SoftLimit, @@ -681,7 +689,7 @@ impl SoftLimitKeeper { self.limit.resize(quota_val).await.map_err(|err| { warn!( - "error during appling the soft limit for backup."; + "error during applying the soft limit for backup."; "current_limit" => %self.limit.current_cap(), "to_set_value" => %quota_val, "err" => %err, @@ -708,7 +716,7 @@ pub struct Endpoint { tablets: LocalTablets, config_manager: ConfigManager, concurrency_manager: ConcurrencyManager, - softlimit: SoftLimitKeeper, + soft_limit_keeper: SoftLimitKeeper, api_version: ApiVersion, causal_ts_provider: Option>, // used in rawkv apiv2 only resource_ctl: Option>, @@ -885,8 +893,8 @@ impl Endpoint { ); let rt = utils::create_tokio_runtime(config.io_thread_size, "backup-io").unwrap(); let config_manager = ConfigManager(Arc::new(RwLock::new(config))); - let softlimit = SoftLimitKeeper::new(config_manager.clone()); - rt.spawn(softlimit.clone().run()); + let soft_limit_keeper = SoftLimitKeeper::new(config_manager.clone()); + rt.spawn(soft_limit_keeper.clone().run()); Endpoint { store_id, engine, @@ -894,7 +902,7 @@ impl Endpoint { pool: RefCell::new(pool), tablets, io_pool: rt, - softlimit, + soft_limit_keeper, config_manager, concurrency_manager, api_version, @@ -940,7 +948,7 @@ impl Endpoint { let concurrency_manager = self.concurrency_manager.clone(); let batch_size = self.config_manager.0.read().unwrap().batch_size; let sst_max_size = self.config_manager.0.read().unwrap().sst_max_size.0; - let limit = self.softlimit.limit(); + let soft_limit_keeper = self.soft_limit_keeper.limit(); let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { r.get_background_resource_limiter(&request.resource_group_name, &request.source_tag) }); @@ -953,7 +961,7 @@ impl Endpoint { // when get the guard, release it until we finish scanning a batch, // because if we were suspended during scanning, // the region info have higher possibility to change (then we must compensate that by the fine-grained backup). - let guard = limit.guard().await; + let guard = soft_limit_keeper.guard().await; if let Err(e) = guard { warn!("failed to retrieve limit guard, omitting."; "err" => %e); }; @@ -1008,7 +1016,7 @@ impl Endpoint { .backup_raw_kv_to_file( engine, db.into_owned(), - &request.limiter, + &request.rate_limiter, name, cf.into(), ct, @@ -1020,7 +1028,7 @@ impl Endpoint { } else { let writer_builder = BackupWriterBuilder::new( store_id, - request.limiter.clone(), + request.rate_limiter.clone(), brange.region.clone(), db.into_owned(), ct, @@ -1149,10 +1157,10 @@ impl Endpoint { } }; let backend = Arc::::from(backend); - let concurrency = self.config_manager.0.read().unwrap().num_threads; - self.pool.borrow_mut().adjust_with(concurrency); + let num_threads = self.config_manager.0.read().unwrap().num_threads; + self.pool.borrow_mut().adjust_with(num_threads); let (tx, rx) = async_channel::bounded(1); - for _ in 0..concurrency { + for _ in 0..num_threads { self.spawn_backup_worker( prs.clone(), request.clone(), @@ -1616,7 +1624,7 @@ pub mod tests { start_ts: 1.into(), end_ts: 1.into(), backend, - limiter: Limiter::new(f64::INFINITY), + rate_limiter: Limiter::new(f64::INFINITY), cancel: Arc::default(), is_raw_kv: false, dst_api_ver: ApiVersion::V1, @@ -1727,7 +1735,7 @@ pub mod tests { start_ts: 1.into(), end_ts: 1.into(), backend: backend.clone(), - limiter: Limiter::new(f64::INFINITY), + rate_limiter: Limiter::new(f64::INFINITY), cancel: Arc::default(), is_raw_kv: false, dst_api_ver: ApiVersion::V1, @@ -1758,7 +1766,7 @@ pub mod tests { start_ts: 1.into(), end_ts: 1.into(), backend, - limiter: Limiter::new(f64::INFINITY), + rate_limiter: Limiter::new(f64::INFINITY), cancel: Arc::default(), is_raw_kv: false, dst_api_ver: ApiVersion::V1, @@ -1868,7 +1876,7 @@ pub mod tests { start_ts: 1.into(), end_ts: 1.into(), backend, - limiter: Limiter::new(f64::INFINITY), + rate_limiter: Limiter::new(f64::INFINITY), cancel: Arc::default(), is_raw_kv: false, dst_api_ver: ApiVersion::V1, @@ -2097,9 +2105,9 @@ pub mod tests { let (mut task, _) = Task::new(req, tx).unwrap(); if len % 2 == 0 { // Make sure the rate limiter is set. - assert!(task.request.limiter.speed_limit().is_finite()); + assert!(task.request.rate_limiter.speed_limit().is_finite()); // Share the same rate limiter. - task.request.limiter = limiter.clone(); + task.request.rate_limiter = limiter.clone(); } endpoint.handle_backup_task(task); let (resp, rx) = block_on(rx.into_future()); @@ -2267,7 +2275,7 @@ pub mod tests { req.set_storage_backend(make_local_backend(&tmp1)); req.set_rate_limit(10 * 1024 * 1024); let (mut task, _) = Task::new(req, tx).unwrap(); - task.request.limiter = limiter; + task.request.rate_limiter = limiter; endpoint.handle_backup_task(task); let (resp, rx) = block_on(rx.into_future()); let resp = resp.unwrap(); diff --git a/components/backup/src/softlimit.rs b/components/backup/src/softlimit.rs index 6afd1f5b2a6..a3de91bb403 100644 --- a/components/backup/src/softlimit.rs +++ b/components/backup/src/softlimit.rs @@ -14,8 +14,8 @@ use tokio::sync::{Semaphore, SemaphorePermit}; use super::Result; -/// SoftLimit is an simple "worker pool" just for -/// restricting the number of workers can running concurrently. +/// SoftLimit is a simple "worker pool" just for +/// restricting the number of workers can run concurrently. /// It is simply a wrapper over [tokio::sync::Semaphore], /// with a counter recording the current permits already and would grant. struct SoftLimitInner { diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index a2d8a31f0ea..4e77f20350d 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -106,7 +106,7 @@ impl Writer { self, name: &str, cf: CfNameWrap, - limiter: Limiter, + rate_limiter: Limiter, storage: &dyn ExternalStorage, cipher: &CipherInfo, ) -> Result { @@ -132,7 +132,7 @@ impl Writer { .write( &file_name, // AllowStdIo here only introduces the Sha256 reader and an in-memory sst reader. - UnpinReader(Box::new(limiter.limit(AllowStdIo::new(reader)))), + UnpinReader(Box::new(rate_limiter.limit(AllowStdIo::new(reader)))), size, ) .await?; @@ -162,7 +162,7 @@ impl Writer { pub struct BackupWriterBuilder { store_id: u64, - limiter: Limiter, + rate_limiter: Limiter, region: Region, db: EK, compression_type: Option, @@ -174,7 +174,7 @@ pub struct BackupWriterBuilder { impl BackupWriterBuilder { pub fn new( store_id: u64, - limiter: Limiter, + rate_limiter: Limiter, region: Region, db: EK, compression_type: Option, @@ -184,7 +184,7 @@ impl BackupWriterBuilder { ) -> BackupWriterBuilder { Self { store_id, - limiter, + rate_limiter, region, db, compression_type, @@ -203,7 +203,7 @@ impl BackupWriterBuilder { &name, self.compression_type, self.compression_level, - self.limiter.clone(), + self.rate_limiter.clone(), self.sst_max_size, self.cipher.clone(), ) @@ -215,7 +215,7 @@ pub struct BackupWriter { name: String, default: Writer<::SstWriter>, write: Writer<::SstWriter>, - limiter: Limiter, + rate_limiter: Limiter, sst_max_size: u64, cipher: CipherInfo, } @@ -227,7 +227,7 @@ impl BackupWriter { name: &str, compression_type: Option, compression_level: i32, - limiter: Limiter, + rate_limiter: Limiter, sst_max_size: u64, cipher: CipherInfo, ) -> Result> { @@ -250,7 +250,7 @@ impl BackupWriter { name, default: Writer::new(default), write: Writer::new(write), - limiter, + rate_limiter, sst_max_size, cipher, }) @@ -298,7 +298,7 @@ impl BackupWriter { .save_and_build_file( &self.name, CF_DEFAULT.into(), - self.limiter.clone(), + self.rate_limiter.clone(), storage, &self.cipher, ) @@ -312,7 +312,7 @@ impl BackupWriter { .save_and_build_file( &self.name, CF_WRITE.into(), - self.limiter.clone(), + self.rate_limiter.clone(), storage, &self.cipher, ) From cf749a682b7c0f91834b0207a5075bf8586b41bb Mon Sep 17 00:00:00 2001 From: qupeng Date: Mon, 25 Nov 2024 15:22:59 +0800 Subject: [PATCH 33/86] cdc: skip loading old values for un-observed ranges (#17878) close tikv/tikv#17876, fix tikv/tikv#17876, close tikv/tikv#17877 cdc: skip loading old values for un-observed ranges Signed-off-by: qupeng Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/cdc/src/delegate.rs | 184 +++++++++--------- components/cdc/tests/integrations/test_cdc.rs | 2 +- 2 files changed, 89 insertions(+), 97 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index beff91f7b45..1fe747978cd 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -23,9 +23,7 @@ use kvproto::{ }, kvrpcpb::ExtraOp as TxnExtraOp, metapb::{Region, RegionEpoch}, - raft_cmdpb::{ - AdminCmdType, AdminRequest, AdminResponse, CmdType, DeleteRequest, PutRequest, Request, - }, + raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, CmdType, PutRequest, Request}, }; use raftstore::{ coprocessor::{Cmd, CmdBatch, ObserveHandle}, @@ -249,13 +247,13 @@ impl Downstream { // In `PendingLock`, `key` is encoded. pub enum PendingLock { Track { key: Key, start_ts: MiniLock }, - Untrack { key: Key }, + Untrack { key: Key, start_ts: TimeStamp }, } impl HeapSize for PendingLock { fn approximate_heap_size(&self) -> usize { match self { - PendingLock::Track { key, .. } | PendingLock::Untrack { key } => { + PendingLock::Track { key, .. } | PendingLock::Untrack { key, .. } => { key.approximate_heap_size() } } @@ -388,7 +386,7 @@ impl Delegate { Ok(lock_count_modify) } - fn pop_lock(&mut self, key: Key) -> Result { + fn pop_lock(&mut self, key: Key, start_ts: TimeStamp) -> Result { let mut lock_count_modify = 0; match &mut self.lock_tracker { LockTracker::Pending => unreachable!(), @@ -396,14 +394,17 @@ impl Delegate { let bytes = key.approximate_heap_size(); self.memory_quota.alloc(bytes)?; CDC_PENDING_BYTES_GAUGE.add(bytes as _); - locks.push(PendingLock::Untrack { key }); + locks.push(PendingLock::Untrack { key, start_ts }); } LockTracker::Prepared { locks, .. } => { - if let Some((key, _)) = locks.remove_entry(&key) { - let bytes = key.approximate_heap_size(); - self.memory_quota.free(bytes); - CDC_PENDING_BYTES_GAUGE.sub(bytes as _); - lock_count_modify = -1; + if let BTreeMapEntry::Occupied(x) = locks.entry(key) { + if x.get().ts == start_ts { + let (key, _) = x.remove_entry(); + let bytes = key.approximate_heap_size(); + self.memory_quota.free(bytes); + CDC_PENDING_BYTES_GAUGE.sub(bytes as _); + lock_count_modify = -1; + } } } } @@ -441,15 +442,13 @@ impl Delegate { assert!(x.get().generation <= start_ts.generation); } }, - PendingLock::Untrack { key } => match locks.entry(key.clone()) { - BTreeMapEntry::Vacant(..) => { - warn!("untrack lock not found when try to finish prepare lock tracker"; - "key" => %key); - } - BTreeMapEntry::Occupied(x) => { - x.remove(); + PendingLock::Untrack { key, start_ts } => { + if let BTreeMapEntry::Occupied(x) = locks.entry(key) { + if x.get().ts == start_ts { + x.remove(); + } } - }, + } } } self.memory_quota.free(free_bytes); @@ -858,7 +857,7 @@ impl Delegate { ) -> Result<()> { debug_assert_eq!(self.txn_extra_op.load(), TxnExtraOp::ReadOldValue); - let mut read_old_value = |row: &mut EventRow, read_old_ts| -> Result<()> { + let read_old_value = |row: &mut EventRow, read_old_ts| -> Result<()> { let key = Key::from_raw(&row.key).append_ts(row.start_ts.into()); let old_value = old_value_cb(key, read_old_ts, old_value_cache, statistics)?; row.old_value = old_value.unwrap_or_default(); @@ -869,10 +868,7 @@ impl Delegate { rows_builder.is_one_pc = flags.contains(WriteBatchFlags::ONE_PC); for mut req in requests { match req.get_cmd_type() { - CmdType::Put => { - self.sink_put(req.take_put(), &mut rows_builder, &mut read_old_value)? - } - CmdType::Delete => self.sink_delete(req.take_delete(), &mut rows_builder)?, + CmdType::Put => self.sink_put(req.take_put(), &mut rows_builder)?, _ => debug!("cdc skip other command"; "region_id" => self.region_id, "command" => ?req), @@ -881,7 +877,7 @@ impl Delegate { let (raws, txns) = rows_builder.finish_build(); self.sink_downstream_raw(raws, index)?; - self.sink_downstream_tidb(txns)?; + self.sink_downstream_tidb(txns, read_old_value)?; Ok(()) } @@ -921,7 +917,11 @@ impl Delegate { Ok(()) } - fn sink_downstream_tidb(&mut self, entries: Vec<(EventRow, isize)>) -> Result<()> { + fn sink_downstream_tidb( + &mut self, + mut entries: Vec, + mut read_old_value: impl FnMut(&mut EventRow, TimeStamp) -> Result<()>, + ) -> Result<()> { let mut downstreams = Vec::with_capacity(self.downstreams.len()); for d in &mut self.downstreams { if d.kv_api == ChangeDataRequestKvApi::TiDb && d.state.load().ready_for_change_events() @@ -935,14 +935,24 @@ impl Delegate { for downstream in downstreams { let mut filtered_entries = Vec::with_capacity(entries.len()); - for (entry, lock_count_modify) in &entries { - if !downstream.observed_range.contains_raw_key(&entry.key) { + for RowInBuilding { + v, + lock_count_modify, + needs_old_value, + .. + } in &mut entries + { + if !downstream.observed_range.contains_raw_key(&v.key) { continue; } + if let Some(read_old_ts) = needs_old_value { + read_old_value(v, *read_old_ts)?; + *needs_old_value = None; + } if *lock_count_modify != 0 && downstream.lock_heap.is_some() { let lock_heap = downstream.lock_heap.as_mut().unwrap(); - match lock_heap.entry(entry.start_ts.into()) { + match lock_heap.entry(v.start_ts.into()) { BTreeMapEntry::Vacant(x) => { x.insert(*lock_count_modify); } @@ -951,7 +961,7 @@ impl Delegate { assert!( *x.get() >= 0, "lock_count_modify should never be negative, start_ts: {}", - entry.start_ts + v.start_ts ); if *x.get() == 0 { x.remove(); @@ -960,14 +970,13 @@ impl Delegate { } } - if TxnSource::is_lossy_ddl_reorg_source_set(entry.txn_source) - || downstream.filter_loop - && TxnSource::is_cdc_write_source_set(entry.txn_source) + if TxnSource::is_lossy_ddl_reorg_source_set(v.txn_source) + || downstream.filter_loop && TxnSource::is_cdc_write_source_set(v.txn_source) { continue; } - filtered_entries.push(entry.clone()); + filtered_entries.push(v.clone()); } if filtered_entries.is_empty() { continue; @@ -986,17 +995,12 @@ impl Delegate { Ok(()) } - fn sink_put( - &mut self, - put: PutRequest, - rows_builder: &mut RowsBuilder, - read_old_value: impl FnMut(&mut EventRow, TimeStamp) -> Result<()>, - ) -> Result<()> { + fn sink_put(&mut self, put: PutRequest, rows_builder: &mut RowsBuilder) -> Result<()> { let key_mode = ApiV2::parse_key_mode(put.get_key()); if key_mode == KeyMode::Raw { self.sink_raw_put(put, rows_builder) } else { - self.sink_txn_put(put, read_old_value, rows_builder) + self.sink_txn_put(put, rows_builder) } } @@ -1007,16 +1011,11 @@ impl Delegate { Ok(()) } - fn sink_txn_put( - &mut self, - mut put: PutRequest, - mut read_old_value: impl FnMut(&mut EventRow, TimeStamp) -> Result<()>, - rows: &mut RowsBuilder, - ) -> Result<()> { + fn sink_txn_put(&mut self, mut put: PutRequest, rows: &mut RowsBuilder) -> Result<()> { match put.cf.as_str() { "write" => { let key = Key::from_encoded_slice(&put.key).truncate_ts().unwrap(); - let row = rows.txns_by_key.entry(key).or_default(); + let row = rows.txns_by_key.entry(key.clone()).or_default(); if decode_write( put.take_key(), &put.value, @@ -1028,9 +1027,14 @@ impl Delegate { } if rows.is_one_pc { + assert_eq!(row.v.r_type, EventLogType::Commit); set_event_row_type(&mut row.v, EventLogType::Committed); let read_old_ts = TimeStamp::from(row.v.commit_ts).prev(); - read_old_value(&mut row.v, read_old_ts)?; + row.needs_old_value = Some(read_old_ts); + } else { + assert_eq!(row.lock_count_modify, 0); + let start_ts = TimeStamp::from(row.v.start_ts); + row.lock_count_modify = self.pop_lock(key, start_ts)?; } } "lock" => { @@ -1048,9 +1052,8 @@ impl Delegate { assert_eq!(row.lock_count_modify, 0); let mini_lock = MiniLock::new(row.v.start_ts, txn_source, generation); row.lock_count_modify = self.push_lock(key, mini_lock)?; - let read_old_ts = std::cmp::max(for_update_ts, row.v.start_ts.into()); - read_old_value(&mut row.v, read_old_ts)?; + row.needs_old_value = Some(read_old_ts); } "" | "default" => { let key = Key::from_encoded(put.take_key()).truncate_ts().unwrap(); @@ -1062,27 +1065,6 @@ impl Delegate { Ok(()) } - fn sink_delete(&mut self, mut delete: DeleteRequest, rows: &mut RowsBuilder) -> Result<()> { - // RawKV (API v2, and only API v2 can use CDC) has no lock and will write to - // default cf only. - match delete.cf.as_str() { - "lock" => { - let key = Key::from_encoded(delete.take_key()); - let lock_count_modify = self.pop_lock(key.clone())?; - if lock_count_modify != 0 { - // If lock_count_modify isn't 0 it means the deletion must come from a commit - // or rollback, instead of any `Unlock` operations. - let row = rows.txns_by_key.get_mut(&key).unwrap(); - assert_eq!(row.lock_count_modify, 0); - row.lock_count_modify = lock_count_modify; - } - } - "" | "default" | "write" => {} - other => panic!("invalid cf {}", other), - } - Ok(()) - } - fn sink_admin(&mut self, request: AdminRequest, mut response: AdminResponse) -> Result<()> { let store_err = match request.get_cmd_type() { AdminCmdType::Split => RaftStoreError::EpochNotMatch( @@ -1158,7 +1140,6 @@ impl Delegate { #[derive(Default)] struct RowsBuilder { - // map[Key]->(row, has_value, lock_count_modify) txns_by_key: HashMap, raws: Vec, @@ -1171,25 +1152,23 @@ struct RowInBuilding { v: EventRow, has_value: bool, lock_count_modify: isize, + needs_old_value: Option, } impl RowsBuilder { - fn finish_build(self) -> (Vec, Vec<(EventRow, isize)>) { + fn finish_build(self) -> (Vec, Vec) { let mut txns = Vec::with_capacity(self.txns_by_key.len()); - for RowInBuilding { - v, - has_value, - lock_count_modify, - } in self.txns_by_key.into_values() - { - if v.r_type == EventLogType::Prewrite && v.op_type == EventRowOpType::Put && !has_value + for row in self.txns_by_key.into_values() { + if row.v.r_type == EventLogType::Prewrite + && row.v.op_type == EventRowOpType::Put + && !row.has_value { // It's possible that a prewrite command only contains lock but without // default. It's not documented by classic Percolator but introduced with // Large-Transaction. Those prewrites are not complete, we must skip them. continue; } - txns.push((v, lock_count_modify)); + txns.push(row); } (self.raws, txns) } @@ -1735,9 +1714,7 @@ mod tests { false, ) .to_bytes(); - delegate - .sink_txn_put(put, |_, _| Ok(()), &mut rows_builder) - .unwrap(); + delegate.sink_txn_put(put, &mut rows_builder).unwrap(); } assert_eq!(rows_builder.txns_by_key.len(), 5); @@ -1755,7 +1732,9 @@ mod tests { downstream.get_state().store(DownstreamState::Normal); delegate.add_downstream(downstream); let (_, entries) = rows_builder.finish_build(); - delegate.sink_downstream_tidb(entries).unwrap(); + delegate + .sink_downstream_tidb(entries, |_, _| Ok(())) + .unwrap(); let (mut tx, mut rx) = futures::channel::mpsc::unbounded(); let runtime = tokio::runtime::Runtime::new().unwrap(); @@ -1802,9 +1781,7 @@ mod tests { lock = lock.set_txn_source(txn_source.into()); } put.value = lock.to_bytes(); - delegate - .sink_txn_put(put, |_, _| Ok(()), &mut rows_builder) - .unwrap(); + delegate.sink_txn_put(put, &mut rows_builder).unwrap(); } assert_eq!(rows_builder.txns_by_key.len(), 5); @@ -1822,7 +1799,9 @@ mod tests { downstream.get_state().store(DownstreamState::Normal); delegate.add_downstream(downstream); let (_, entries) = rows_builder.finish_build(); - delegate.sink_downstream_tidb(entries).unwrap(); + delegate + .sink_downstream_tidb(entries, |_, _| Ok(())) + .unwrap(); let (mut tx, mut rx) = futures::channel::mpsc::unbounded(); let runtime = tokio::runtime::Runtime::new().unwrap(); @@ -1914,14 +1893,21 @@ mod tests { assert_eq!(delegate.push_lock(k1, MiniLock::from_ts(100)).unwrap(), 0); assert_eq!(quota.in_use(), 100); - delegate.pop_lock(Key::from_raw(b"key1")).unwrap(); + delegate + .pop_lock(Key::from_raw(b"key1"), TimeStamp::from(99)) + .unwrap(); assert_eq!(quota.in_use(), 117); + delegate + .pop_lock(Key::from_raw(b"key1"), TimeStamp::from(100)) + .unwrap(); + assert_eq!(quota.in_use(), 134); + let mut k2 = Vec::with_capacity(200); k2.extend_from_slice(Key::from_raw(b"key2").as_encoded()); let k2 = Key::from_encoded(k2); assert_eq!(delegate.push_lock(k2, MiniLock::from_ts(100)).unwrap(), 0); - assert_eq!(quota.in_use(), 317); + assert_eq!(quota.in_use(), 334); let mut scaned_locks = BTreeMap::default(); scaned_locks.insert(Key::from_raw(b"key1"), MiniLock::from_ts(100)); @@ -1932,8 +1918,12 @@ mod tests { .unwrap(); assert_eq!(quota.in_use(), 34); - delegate.pop_lock(Key::from_raw(b"key2")).unwrap(); - delegate.pop_lock(Key::from_raw(b"key3")).unwrap(); + delegate + .pop_lock(Key::from_raw(b"key2"), TimeStamp::from(100)) + .unwrap(); + delegate + .pop_lock(Key::from_raw(b"key3"), TimeStamp::from(100)) + .unwrap(); assert_eq!(quota.in_use(), 0); let v = delegate @@ -1955,7 +1945,9 @@ mod tests { assert!(delegate.init_lock_tracker()); assert!(!delegate.init_lock_tracker()); - delegate.pop_lock(Key::from_raw(b"key1")).unwrap(); + delegate + .pop_lock(Key::from_raw(b"key1"), TimeStamp::zero()) + .unwrap(); let mut scaned_locks = BTreeMap::default(); scaned_locks.insert(Key::from_raw(b"key2"), MiniLock::from_ts(100)); delegate diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index c6ccfff0ad5..77a3d240b2e 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -1131,8 +1131,8 @@ fn test_old_value_multi_changefeeds_impl() { let (mut req_tx_2, event_feed_wrap_2, receive_event_2) = new_event_feed(suite.get_region_cdc_client(1)); block_on(req_tx_2.send((req, WriteFlags::default()))).unwrap(); - sleep_ms(1000); + // Insert value let mut m1 = Mutation::default(); let k1 = b"xk1".to_vec(); From ed84f5789bb34f0bc79e1fc576fac2e4a6b9dbd8 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 25 Nov 2024 15:59:14 +0800 Subject: [PATCH 34/86] OWNERS: Auto Sync OWNERS files from community membership (#17705) Signed-off-by: Ti Chi Robot Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- OWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OWNERS b/OWNERS index f70f2cbf0ea..35985a0a7c6 100644 --- a/OWNERS +++ b/OWNERS @@ -41,6 +41,7 @@ approvers: - tabokie - TennyZhuang - tonyxuqqi + - v01dstar - yiwu-arbug - you06 - youjiali1995 @@ -73,7 +74,6 @@ reviewers: - rleungx - Rustin170506 - tier-cap - - v01dstar - wjhuang2016 - wshwsh12 - Xuanwo From 58f4a28b14ffbece6b6b57c38d543b4aefbd85aa Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Tue, 26 Nov 2024 22:06:54 -0800 Subject: [PATCH 35/86] metrics: Correct bloom prefix efficiency monitoring (#17872) close tikv/tikv#17866 Update RocksDB bloom prefix seek efficiency monitoring Signed-off-by: Yang Zhang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_rocks/src/rocks_metrics.rs | 28 ++++++++ .../engine_rocks/src/rocks_metrics_defs.rs | 4 ++ metrics/grafana/tikv_details.dashboard.py | 72 +++++++++++++++---- metrics/grafana/tikv_details.json | 4 +- metrics/grafana/tikv_details.json.sha256 | 2 +- 5 files changed, 93 insertions(+), 17 deletions(-) diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 438b108bb85..b3f7fd5a221 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -94,6 +94,10 @@ make_auto_flush_static_metric! { failure, success, trigger_next, + last_level_seek_filtered, + last_level_seek_filter_match, + non_last_level_seek_filtered, + non_last_level_seek_filter_match, } pub struct EngineTickerMetrics : LocalIntCounter { @@ -228,6 +232,30 @@ pub fn flush_engine_ticker_metrics(t: TickerType, value: u64, name: &str) { .bloom_useful .inc_by(value); } + TickerType::LastLevelSeekFiltered => { + STORE_ENGINE_BLOOM_EFFICIENCY + .get(name_enum) + .last_level_seek_filtered + .inc_by(value); + } + TickerType::LastLevelSeekFilterMatch => { + STORE_ENGINE_BLOOM_EFFICIENCY + .get(name_enum) + .last_level_seek_filter_match + .inc_by(value); + } + TickerType::NonLastLevelSeekFiltered => { + STORE_ENGINE_BLOOM_EFFICIENCY + .get(name_enum) + .non_last_level_seek_filtered + .inc_by(value); + } + TickerType::NonLastLevelSeekFilterMatch => { + STORE_ENGINE_BLOOM_EFFICIENCY + .get(name_enum) + .non_last_level_seek_filter_match + .inc_by(value); + } TickerType::MemtableHit => { STORE_ENGINE_MEMTABLE_EFFICIENCY .get(name_enum) diff --git a/components/engine_rocks/src/rocks_metrics_defs.rs b/components/engine_rocks/src/rocks_metrics_defs.rs index 2b70ff985c5..1796886d7b9 100644 --- a/components/engine_rocks/src/rocks_metrics_defs.rs +++ b/components/engine_rocks/src/rocks_metrics_defs.rs @@ -110,6 +110,10 @@ pub const ENGINE_TICKER_TYPES: &[TickerType] = &[ TickerType::FlushWriteBytes, TickerType::ReadAmpEstimateUsefulBytes, TickerType::ReadAmpTotalReadBytes, + TickerType::LastLevelSeekFiltered, + TickerType::LastLevelSeekFilterMatch, + TickerType::NonLastLevelSeekFiltered, + TickerType::NonLastLevelSeekFilterMatch, ]; pub const TITAN_ENGINE_TICKER_TYPES: &[TickerType] = &[ diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 0a9b619cd4b..e75efd9b28b 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -6023,22 +6023,66 @@ def RocksDB() -> RowPanel: ), target( expr=expr_operator( - expr_sum_rate( - "tikv_engine_bloom_efficiency", - label_selectors=[ - 'db="$db"', - 'type="bloom_prefix_useful"', - ], - by_labels=[], # override default by instance. + expr_operator( + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="last_level_seek_filtered"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="non_last_level_seek_filtered"', + ], + by_labels=[], # override default by instance. + ), ), "/", - expr_sum_rate( - "tikv_engine_bloom_efficiency", - label_selectors=[ - 'db="$db"', - 'type="bloom_prefix_checked"', - ], - by_labels=[], # override default by instance. + expr_operator( + expr_operator( + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="last_level_seek_filtered"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="non_last_level_seek_filtered"', + ], + by_labels=[], # override default by instance. + ), + ), + "+", + expr_operator( + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="last_level_seek_filter_match"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="non_last_level_seek_filter_match"', + ], + by_labels=[], # override default by instance. + ), + ), ), ), legend_format="bloom prefix", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 4d310b46d86..5ff6c19581f 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -31646,7 +31646,7 @@ }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", + "expr": "((sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"non_last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) ) / ((sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"non_last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) ) + (sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"last_level_seek_filter_match\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"non_last_level_seek_filter_match\"}\n [$__rate_interval]\n)) by ($additional_groupby) )))", "format": "time_series", "hide": false, "instant": false, @@ -31654,7 +31654,7 @@ "intervalFactor": 1, "legendFormat": "bloom prefix {{$additional_groupby}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) by ($additional_groupby) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) by ($additional_groupby) )", + "query": "((sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"non_last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) ) / ((sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"non_last_level_seek_filtered\"}\n [$__rate_interval]\n)) by ($additional_groupby) ) + (sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"last_level_seek_filter_match\"}\n [$__rate_interval]\n)) by ($additional_groupby) + sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"non_last_level_seek_filter_match\"}\n [$__rate_interval]\n)) by ($additional_groupby) )))", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index cc657439837..27eae77b44a 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -b4ba015c2532b3191788f5c1e12bcebec27d64560f85900f2f3fb2e3c05d88fb ./metrics/grafana/tikv_details.json +f772c8b34f29ca1c929b316051740cbbcfb99e8bc4b2693bd739b87487bfde64 ./metrics/grafana/tikv_details.json From b94584c08b3dc75aa7ccc9baeb5c50db8b21031a Mon Sep 17 00:00:00 2001 From: ris <79858083+RidRisR@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:15:44 +0800 Subject: [PATCH 36/86] Refactor Resizable Runtime from blocking TiKV shutting down. (#17784) close tikv/tikv#17807 A new version of resizable runtime is added, with same performance but won't block the tikv shutting down. Signed-off-by: RidRisR <79858083+RidRisR@users.noreply.github.com> --- .config/nextest.toml | 2 +- Cargo.lock | 50 ++- .../backup-stream/src/subscription_manager.rs | 20 +- components/backup/src/endpoint.rs | 13 +- components/sst_importer/src/config.rs | 19 +- components/sst_importer/src/import_mode.rs | 24 +- components/sst_importer/src/import_mode2.rs | 13 +- components/sst_importer/src/sst_importer.rs | 46 ++- components/tikv_util/Cargo.toml | 3 +- .../tikv_util/src/resizable_threadpool.rs | 379 +++++++++++++----- components/tikv_util/src/stream.rs | 3 +- src/import/sst_service.rs | 30 +- 12 files changed, 410 insertions(+), 192 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 2caec4b0c05..6f67aa5ecdb 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,7 +1,7 @@ [profile.ci] retries = 2 # Run at most 3 times fail-fast = false -slow-timeout = { period = "80s", terminate-after = 2 } # Timeout 2m +slow-timeout = { period = "60s", terminate-after = 2 } # Timeout 2m failure-output = "final" [profile.ci.junit] diff --git a/Cargo.lock b/Cargo.lock index 49f4541c393..806a892f76f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3145,7 +3145,7 @@ dependencies = [ "httpdate", "itoa 1.0.1", "pin-project-lite", - "socket2", + "socket2 0.4.7", "tokio", "tower-service", "tracing", @@ -3828,13 +3828,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ + "hermit-abi 0.3.9", "libc 0.2.151", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -4527,9 +4528,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -6127,6 +6128,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc 0.2.151", + "windows-sys 0.52.0", +] + [[package]] name = "sst_importer" version = "0.1.0" @@ -7298,6 +7309,7 @@ dependencies = [ "tokio", "tokio-executor", "tokio-timer", + "tokio-util", "toml", "tracker", "url", @@ -7390,21 +7402,20 @@ dependencies = [ [[package]] name = "tokio" -version = "1.25.3" +version = "1.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8666f87015685834a42aa61a391303d3bee0b1442dd9cf93e3adf4cbaf8de75a" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" dependencies = [ - "autocfg", + "backtrace", "bytes", "libc 0.2.151", - "mio 0.8.11", - "num_cpus", + "mio 1.0.2", "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.5.7", "tokio-macros", - "windows-sys 0.42.0", + "windows-sys 0.52.0", ] [[package]] @@ -7418,13 +7429,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "1.7.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 1.0.103", + "syn 2.0.79", ] [[package]] @@ -7473,17 +7484,18 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.2" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f988a1a1adc2fb21f9c12aa96441da33a1728193ae0b95d2be22dbd17fcb4e5c" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", "futures-io", "futures-sink", + "futures-util", + "hashbrown 0.14.0", "pin-project-lite", "tokio", - "tracing", ] [[package]] diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index e1803048d71..15903d11e72 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -920,17 +920,15 @@ mod test { fail::cfg("execute_scan_command_sleep_100", "return").unwrap(); for _ in 0..100 { let wg = wg.clone(); - assert!( - block_on(pool.request(ScanCmd { - region: Default::default(), - handle: Default::default(), - last_checkpoint: Default::default(), - feedback_channel: tx.clone(), - // Note: Maybe make here a Box or some other trait? - _work: wg.work(), - })) - .is_ok() - ) + block_on(pool.request(ScanCmd { + region: Default::default(), + handle: Default::default(), + last_checkpoint: Default::default(), + feedback_channel: tx.clone(), + // Note: Maybe make here a Box or some other trait? + _work: wg.work(), + })) + .unwrap(); } should_finish_in(move || drop(pool), Duration::from_secs(5)); diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 56be0d128f1..0bf0f5105f5 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -887,7 +887,8 @@ impl Endpoint { resource_ctl: Option>, ) -> Endpoint { let pool = ResizableRuntime::new( - "backup-worker", + config.num_threads, + "bkwkr", Box::new(utils::create_tokio_runtime), Box::new(|new_size| BACKUP_THREAD_POOL_SIZE_GAUGE.set(new_size as i64)), ); @@ -1514,11 +1515,11 @@ pub mod tests { let counter = Arc::new(AtomicU32::new(0)); let mut pool = ResizableRuntime::new( + 3, "bkwkr", Box::new(utils::create_tokio_runtime), Box::new(|new_size: usize| BACKUP_THREAD_POOL_SIZE_GAUGE.set(new_size as i64)), ); - pool.adjust_with(3); for i in 0..8 { let ctr = counter.clone(); @@ -2555,20 +2556,20 @@ pub mod tests { endpoint.get_config_manager().set_num_threads(15); let (task, _) = Task::new(req.clone(), tx.clone()).unwrap(); endpoint.handle_backup_task(task); - assert!(endpoint.pool.borrow().size == 15); + assert!(endpoint.pool.borrow().size() == 15); // shrink thread pool endpoint.get_config_manager().set_num_threads(10); req.set_start_key(vec![b'2']); let (task, _) = Task::new(req.clone(), tx.clone()).unwrap(); endpoint.handle_backup_task(task); - assert!(endpoint.pool.borrow().size == 10); + assert!(endpoint.pool.borrow().size() == 10); endpoint.get_config_manager().set_num_threads(3); req.set_start_key(vec![b'3']); let (task, _) = Task::new(req, tx).unwrap(); endpoint.handle_backup_task(task); - assert!(endpoint.pool.borrow().size == 3); + assert!(endpoint.pool.borrow().size() == 3); // make sure all tasks can finish properly. let responses = block_on(rx.collect::>()); @@ -2578,11 +2579,11 @@ pub mod tests { // but the panic must be checked manually. (It may panic at tokio runtime // threads) let mut pool = ResizableRuntime::new( + 1, "bkwkr", Box::new(utils::create_tokio_runtime), Box::new(|new_size: usize| BACKUP_THREAD_POOL_SIZE_GAUGE.set(new_size as i64)), ); - pool.adjust_with(1); pool.spawn(async { tokio::time::sleep(Duration::from_millis(100)).await }); pool.adjust_with(2); drop(pool); diff --git a/components/sst_importer/src/config.rs b/components/sst_importer/src/config.rs index 543ff765b80..741c90c8e65 100644 --- a/components/sst_importer/src/config.rs +++ b/components/sst_importer/src/config.rs @@ -1,15 +1,12 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. - use std::{ error::Error, result::Result, - sync::{Arc, RwLock}, + sync::{Arc, Mutex, RwLock, Weak}, }; use online_config::{self, OnlineConfig}; -use tikv_util::{ - config::ReadableDuration, resizable_threadpool::ResizableRuntimeHandle, HandyRwLock, -}; +use tikv_util::{config::ReadableDuration, resizable_threadpool::ResizableRuntime, HandyRwLock}; #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] @@ -65,14 +62,14 @@ impl Config { #[derive(Clone)] pub struct ConfigManager { pub config: Arc>, - threads: ResizableRuntimeHandle, + pool: Weak>, } impl ConfigManager { - pub fn new(cfg: Config, threads: ResizableRuntimeHandle) -> Self { + pub fn new(cfg: Config, pool: Weak>) -> Self { ConfigManager { config: Arc::new(RwLock::new(cfg)), - threads, + pool, } } } @@ -95,7 +92,11 @@ impl online_config::ConfigManager for ConfigManager { return Err(e); } - self.threads.adjust_with(cfg.num_threads); + if let Some(pool) = self.pool.upgrade() { + let mut pool = pool.lock().unwrap(); + pool.adjust_with(cfg.num_threads); + } + *self.wl() = cfg; Ok(()) } diff --git a/components/sst_importer/src/import_mode.rs b/components/sst_importer/src/import_mode.rs index 43da9eb3e70..0070462892d 100644 --- a/components/sst_importer/src/import_mode.rs +++ b/components/sst_importer/src/import_mode.rs @@ -11,7 +11,7 @@ use std::{ use engine_traits::{CfOptions, DbOptions, KvEngine}; use futures_util::compat::Future01CompatExt; use kvproto::import_sstpb::*; -use tikv_util::{resizable_threadpool::ResizableRuntimeHandle, timer::GLOBAL_TIMER_HANDLE}; +use tikv_util::{resizable_threadpool::DeamonRuntimeHandle, timer::GLOBAL_TIMER_HANDLE}; use super::{Config, Result}; @@ -88,7 +88,7 @@ impl ImportModeSwitcher { } // start_resizable_threads only serves for resizable runtime - pub fn start_resizable_threads(&self, executor: &ResizableRuntimeHandle, db: E) { + pub fn start_resizable_threads(&self, executor: &DeamonRuntimeHandle, db: E) { // spawn a background future to put TiKV back into normal mode after timeout let inner = self.inner.clone(); let switcher = Arc::downgrade(&inner); @@ -252,7 +252,7 @@ mod tests { use super::*; fn create_tokio_runtime(_: usize, _: &str) -> TokioResult { - tokio::runtime::Builder::new_current_thread() + tokio::runtime::Builder::new_multi_thread() .enable_all() .build() } @@ -314,11 +314,14 @@ mod tests { let cfg = Config::default(); - let mut threads = - ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); - threads.adjust_with(cfg.num_threads); + let threads = ResizableRuntime::new( + cfg.num_threads, + "test", + Box::new(create_tokio_runtime), + Box::new(|_| {}), + ); let switcher = ImportModeSwitcher::new(&cfg); - switcher.start_resizable_threads(&ResizableRuntimeHandle::new(threads), db.clone()); + switcher.start_resizable_threads(&threads.handle(), db.clone()); check_import_options(&db, &normal_db_options, &normal_cf_options); assert!(switcher.enter_import_mode(&db, mf).unwrap()); check_import_options(&db, &import_db_options, &import_cf_options); @@ -350,11 +353,10 @@ mod tests { ..Config::default() }; - let mut threads = - ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); - threads.adjust_with(4); + let threads = + ResizableRuntime::new(4, "test", Box::new(create_tokio_runtime), Box::new(|_| {})); let switcher = ImportModeSwitcher::new(&cfg); - let handle = ResizableRuntimeHandle::new(threads); + let handle = threads.handle(); switcher.start_resizable_threads(&handle, db.clone()); check_import_options(&db, &normal_db_options, &normal_cf_options); diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index ae5f72a5b00..fa4a3a189f5 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -8,7 +8,7 @@ use std::{ use collections::{HashMap, HashSet}; use futures_util::compat::Future01CompatExt; use kvproto::{import_sstpb::Range, metapb::Region}; -use tikv_util::{resizable_threadpool::ResizableRuntimeHandle, timer::GLOBAL_TIMER_HANDLE}; +use tikv_util::{resizable_threadpool::DeamonRuntimeHandle, timer::GLOBAL_TIMER_HANDLE}; use super::Config; @@ -55,7 +55,7 @@ impl ImportModeSwitcherV2 { ImportModeSwitcherV2 { inner } } - pub fn start_resizable_threads(&self, executor: &ResizableRuntimeHandle) { + pub fn start_resizable_threads(&self, executor: &DeamonRuntimeHandle) { // spawn a background future to put regions back into normal mode after timeout let inner = self.inner.clone(); let switcher = Arc::downgrade(&inner); @@ -162,7 +162,7 @@ mod test { type TokioResult = std::io::Result; fn create_tokio_runtime(_: usize, _: &str) -> TokioResult { - tokio::runtime::Builder::new_current_thread() + tokio::runtime::Builder::new_multi_thread() .enable_all() .build() } @@ -276,10 +276,9 @@ mod test { ..Config::default() }; - let mut threads = - ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); - threads.adjust_with(4); - let handle = ResizableRuntimeHandle::new(threads); + let threads = + ResizableRuntime::new(4, "test", Box::new(create_tokio_runtime), Box::new(|_| {})); + let handle = threads.handle(); let switcher = ImportModeSwitcherV2::new(&cfg); let mut region = Region::default(); diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index b3102d0c4b0..70206bba6d2 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -39,7 +39,7 @@ use tikv_util::{ }, future::RescheduleChecker, memory::{MemoryQuota, OwnedAllocated}, - resizable_threadpool::ResizableRuntimeHandle, + resizable_threadpool::DeamonRuntimeHandle, sys::{thread::ThreadBuildWrapper, SysQuota}, time::{Instant, Limiter}, Either, HandyRwLock, @@ -265,7 +265,7 @@ impl SstImporter { } } - pub fn start_switch_mode_check(&self, executor: &ResizableRuntimeHandle, db: Option) { + pub fn start_switch_mode_check(&self, executor: &DeamonRuntimeHandle, db: Option) { match &self.switcher { Either::Left(switcher) => switcher.start_resizable_threads(executor, db.unwrap()), Either::Right(switcher) => switcher.start_resizable_threads(executor), @@ -1616,7 +1616,10 @@ mod tests { use std::{ io::{self, Cursor}, ops::Sub, - sync::atomic::{AtomicUsize, Ordering}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Mutex, + }, usize, }; @@ -2299,12 +2302,17 @@ mod tests { }; let change = cfg.diff(&cfg_new); - let threads = - ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); - let handle = ResizableRuntimeHandle::new(threads); + let threads = ResizableRuntime::new( + cfg.num_threads, + "test", + Box::new(create_tokio_runtime), + Box::new(|_| {}), + ); + + let threads_clone = Arc::new(Mutex::new(threads)); // create config manager and update config. - let mut cfg_mgr = ImportConfigManager::new(cfg, handle); + let mut cfg_mgr = ImportConfigManager::new(cfg, Arc::downgrade(&threads_clone)); cfg_mgr.dispatch(change).unwrap(); importer.update_config_memory_use_ratio(&cfg_mgr); @@ -2328,29 +2336,35 @@ mod tests { }; let change = cfg.diff(&cfg_new); - let threads = - ResizableRuntime::new("test", Box::new(create_tokio_runtime), Box::new(|_| {})); - let handle = ResizableRuntimeHandle::new(threads); + let threads = ResizableRuntime::new( + cfg.num_threads, + "test", + Box::new(create_tokio_runtime), + Box::new(|_| {}), + ); + + let threads_clone = Arc::new(Mutex::new(threads)); - let mut cfg_mgr = ImportConfigManager::new(cfg, handle); + let mut cfg_mgr = ImportConfigManager::new(cfg, Arc::downgrade(&threads_clone)); let r = cfg_mgr.dispatch(change); assert!(r.is_err()); } #[test] fn test_update_import_num_threads() { - let mut threads = ResizableRuntime::new( + let cfg = Config::default(); + let threads = ResizableRuntime::new( + Config::default().num_threads, "test", Box::new(create_tokio_runtime), Box::new(|new_size: usize| { COUNTER.store(new_size, Ordering::SeqCst); }), ); - threads.adjust_with(Config::default().num_threads); - let handle = ResizableRuntimeHandle::new(threads); - let mut cfg_mgr = ImportConfigManager::new(Config::default(), handle); - assert_eq!(COUNTER.load(Ordering::SeqCst), cfg_mgr.rl().num_threads); + let threads_clone = Arc::new(Mutex::new(threads)); + let mut cfg_mgr = ImportConfigManager::new(cfg, Arc::downgrade(&threads_clone)); + assert_eq!(cfg_mgr.rl().num_threads, Config::default().num_threads); let cfg_new = Config { diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index e1ab04cc732..1ef223db4e9 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -60,9 +60,10 @@ sysinfo = "0.26" thiserror = "1.0" tikv_alloc = { workspace = true } time = { workspace = true } -tokio = { version = "1.5", features = ["rt-multi-thread"] } +tokio = { version = "1.5", features = ["rt-multi-thread","time", "rt", "macros", "sync", "full"] } tokio-executor = { workspace = true } tokio-timer = { workspace = true } +tokio-util = { version = "0.7", features = ["rt"] } tracker = { workspace = true } url = "2" yatp = { workspace = true } diff --git a/components/tikv_util/src/resizable_threadpool.rs b/components/tikv_util/src/resizable_threadpool.rs index ce3cf944115..a393dd9f8d2 100644 --- a/components/tikv_util/src/resizable_threadpool.rs +++ b/components/tikv_util/src/resizable_threadpool.rs @@ -1,161 +1,346 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::{Arc, RwLock}; +use std::sync::{Arc, Mutex, Weak}; use futures::Future; -use tokio::{io::Result as TokioResult, runtime::Runtime}; - -/// DaemonRuntime is a "background" runtime, which contains "daemon" tasks: -/// any task spawn into it would run until finish even the runtime isn't -/// referenced. -pub struct DaemonRuntime(Option); - -impl DaemonRuntime { - /// spawn a daemon task to the runtime. - pub fn spawn(self: &Arc, f: impl Future + Send + 'static) { - let wkr = self.clone(); - self.0.as_ref().unwrap().spawn(async move { - f.await; - drop(wkr) - }); - } +use tokio::{ + io::Result as TokioResult, + runtime::{Builder, Runtime}, +}; +use tokio_util::task::task_tracker::TaskTracker; + +struct DeamonRuntime { + inner: Option, + tracker: TaskTracker, +} - /// create a daemon runtime from some runtime. - pub fn from_runtime(rt: Runtime) -> Arc { - Arc::new(Self(Some(rt))) +impl DeamonRuntime { + pub fn spawn(&self, fut: Fut) + where + Fut: Future + Send + 'static, + { + self.inner + .as_ref() + .unwrap() + .spawn(self.tracker.track_future(fut)); } } -impl Drop for DaemonRuntime { +impl Drop for DeamonRuntime { fn drop(&mut self) { - if let Some(runtime) = self.0.take() { + if let Some(runtime) = self.inner.take() { runtime.shutdown_background(); } } } +#[derive(Clone)] +pub struct DeamonRuntimeHandle { + inner: Weak>, +} + +impl DeamonRuntimeHandle { + pub fn spawn(&self, fut: Fut) + where + Fut: Future + Send + 'static, + { + let runtime = match self.inner.upgrade() { + Some(runtime) => runtime, + None => { + error!("Daemon runtime has been dropped. Task will be ignored."); + return; + } + }; + + let (handle, tracker) = { + let lock_guard = runtime.lock().unwrap(); + let inner = lock_guard + .inner + .as_ref() + .expect("Runtime inner should exist"); + (inner.handle().clone(), lock_guard.tracker.clone()) + }; + + handle.spawn(tracker.track_future(fut)); + } + + pub fn block_on(&self, fut: Fut) + where + Fut: Future + Send + 'static, + { + let runtime = match self.inner.upgrade() { + Some(runtime) => runtime, + None => { + error!("Daemon runtime has been dropped. Task will be ignored."); + return; + } + }; + + let (handle, tracker) = { + let lock_guard = runtime.lock().unwrap(); + let inner = lock_guard + .inner + .as_ref() + .expect("Runtime inner should exist"); + (inner.handle().clone(), lock_guard.tracker.clone()) + }; + + handle.block_on(tracker.track_future(fut)); + } +} + pub struct ResizableRuntime { - pub size: usize, - thread_name: String, - pool: Option>, + size: usize, + version: usize, + thread_prefix: String, + gc_runtime: DeamonRuntime, + current_runtime: Arc>, replace_pool_rule: Box TokioResult + Send + Sync>, after_adjust: Box, } impl ResizableRuntime { pub fn new( - thread_name: &str, + thread_size: usize, + thread_prefix: &str, replace_pool_rule: Box TokioResult + Send + Sync>, after_adjust: Box, ) -> Self { + let init_name = format!("{}-v0", thread_prefix); + let keeper = Builder::new_multi_thread() + .worker_threads(1) + .thread_name("rtkp") + .enable_all() + .build() + .expect("Failed to create runtime-keeper"); + let new_runtime = (replace_pool_rule)(thread_size, &init_name) + .unwrap_or_else(|_| panic!("failed to create tokio runtime {}", thread_prefix)); + ResizableRuntime { - size: 0, - thread_name: thread_name.to_owned(), - pool: None, + size: thread_size, + version: 0, + thread_prefix: thread_prefix.to_owned(), + gc_runtime: DeamonRuntime { + inner: Some(keeper), + tracker: TaskTracker::new(), + }, + current_runtime: Arc::new(Mutex::new(DeamonRuntime { + inner: Some(new_runtime), + tracker: TaskTracker::new(), + })), replace_pool_rule, after_adjust, } } + pub fn size(&self) -> usize { + self.size + } + pub fn spawn(&self, fut: Fut) where Fut: Future + Send + 'static, { - self.pool - .as_ref() - .expect("ResizableRuntime: please call adjust_with() before spawn()") - .spawn(fut); + let handle = self.handle(); + handle.spawn(fut); } - /// Lazily adjust the thread pool's size + pub fn block_on(&self, fut: Fut) + where + Fut: Future + Send + 'static, + { + let handle = self.handle(); + handle.block_on(fut); + } + + pub fn handle(&self) -> DeamonRuntimeHandle { + DeamonRuntimeHandle { + inner: Arc::downgrade(&self.current_runtime), + } + } + + // TODO: after tokio supports adjusting thread pool size(https://github.com/tokio-rs/tokio/issues/3329), + // adapt it. pub fn adjust_with(&mut self, new_size: usize) { if self.size == new_size { return; } - // TODO: after tokio supports adjusting thread pool size(https://github.com/tokio-rs/tokio/issues/3329), - // adapt it. - let pool = (self.replace_pool_rule)(new_size, &self.thread_name) - .expect("failed to create tokio runtime for backup worker."); - self.pool = Some(DaemonRuntime::from_runtime(pool)); - self.size = new_size; - (self.after_adjust)(new_size); - } - pub fn block_on(&self, f: F) -> F::Output - where - F: Future, - { - self.pool - .as_ref() - .expect("ResizableRuntime: please call adjust_with() before block_on()") - .0 - .as_ref() - .unwrap() - .block_on(f) - } -} + self.version += 1; + let thread_name = format!("{}-v{}", self.thread_prefix, self.version); + let new_runtime = (self.replace_pool_rule)(new_size, &thread_name) + .unwrap_or_else(|_| panic!("failed to create tokio runtime {}", thread_name)); -#[derive(Clone)] -pub struct ResizableRuntimeHandle { - inner: Arc>, -} + let old_runtime: DeamonRuntime; + { + let mut runtime_guard = self.current_runtime.lock().unwrap(); -impl ResizableRuntimeHandle { - pub fn new(runtime: ResizableRuntime) -> Self { - ResizableRuntimeHandle { - inner: Arc::new(RwLock::new(runtime)), + old_runtime = std::mem::replace( + &mut *runtime_guard, + DeamonRuntime { + inner: Some(new_runtime), + tracker: TaskTracker::new(), + }, + ); } - } - pub fn spawn(&self, fut: Fut) - where - Fut: Future + Send + 'static, - { - let inner = self.inner.read().unwrap(); - inner.spawn(fut); - } - - pub fn adjust_with(&self, new_size: usize) { - let mut inner = self.inner.write().unwrap(); - inner.adjust_with(new_size); - } + self.size = new_size; - pub fn block_on(&self, f: F) -> F::Output - where - F: Future, - { - let inner = self.inner.read().unwrap(); - inner.block_on(f) + info!( + "Resizing thread pool"; + "thread_name" => &thread_name, + "new_size" => new_size + ); + self.gc_runtime.spawn(async move { + old_runtime.tracker.close(); + old_runtime.tracker.wait().await; + drop(old_runtime); + }); + (self.after_adjust)(new_size); } } #[cfg(test)] mod test { - use std::sync::atomic::{AtomicUsize, Ordering}; + use std::{ + future, + sync::atomic::{AtomicUsize, Ordering}, + thread::{self, sleep}, + time::Duration, + }; use super::*; + use crate::time::Instant; - static COUNTER: AtomicUsize = AtomicUsize::new(0); + fn replace_pool_rule(thread_count: usize, thread_name: &str) -> TokioResult { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(thread_count) + .thread_name(thread_name) + .enable_all() + .build() + .unwrap(); + Ok(rt) + } #[test] fn test_adjust_thread_num() { - let replace_pool_rule = |thread_count: usize, thread_name: &str| { - let rt = tokio::runtime::Builder::new_multi_thread() - .worker_threads(thread_count) - .thread_name(thread_name) - .enable_all() - .build() - .unwrap(); - Ok(rt) + static COUNTER: AtomicUsize = AtomicUsize::new(4); + let after_adjust = |new_size: usize| { + COUNTER.store(new_size, Ordering::SeqCst); }; + let mut threads = ResizableRuntime::new( + COUNTER.load(Ordering::SeqCst), + "test", + Box::new(replace_pool_rule), + Box::new(after_adjust), + ); + assert_eq!(COUNTER.load(Ordering::SeqCst), threads.size()); + + let handle = threads.handle(); + handle.block_on(async { + COUNTER.fetch_add(1, Ordering::SeqCst); + }); + assert_eq!(COUNTER.load(Ordering::SeqCst), threads.size() + 1); + + threads.adjust_with(8); + assert!(!threads.gc_runtime.tracker.is_empty()); + assert_eq!(COUNTER.load(Ordering::SeqCst), threads.size()); + + sleep(Duration::from_secs(1)); + assert!(threads.gc_runtime.tracker.is_empty()); + + // New task should be scheduled to the new runtime + handle.block_on(async { + COUNTER.fetch_add(1, Ordering::SeqCst); + }); + assert_eq!(COUNTER.load(Ordering::SeqCst), threads.size() + 1); + } + + #[test] + fn test_infinite_loop() { + static COUNTER: AtomicUsize = AtomicUsize::new(4); let after_adjust = |new_size: usize| { COUNTER.store(new_size, Ordering::SeqCst); }; - let mut threads = - ResizableRuntime::new("test", Box::new(replace_pool_rule), Box::new(after_adjust)); - threads.adjust_with(4); - assert_eq!(COUNTER.load(Ordering::SeqCst), 4); + let mut threads = ResizableRuntime::new( + COUNTER.load(Ordering::SeqCst), + "test", + Box::new(replace_pool_rule), + Box::new(after_adjust), + ); + assert_eq!(COUNTER.load(Ordering::SeqCst), threads.size()); + + let handle = threads.handle(); + // infinite loop should not be cleaned + handle.spawn(async { + future::pending::<()>().await; + }); + threads.adjust_with(8); - assert_eq!(COUNTER.load(Ordering::SeqCst), 8); + sleep(Duration::from_secs(1)); + assert!(!threads.gc_runtime.tracker.is_empty()); + assert_eq!(COUNTER.load(Ordering::SeqCst), threads.size()); + } + + #[test] + fn test_drop() { + let start = Instant::now(); + let threads = + ResizableRuntime::new(4, "test", Box::new(replace_pool_rule), Box::new(|_| {})); + let handle = threads.handle(); + let handle_clone = handle.clone(); + handle.spawn(async { + future::pending::<()>().await; + }); + let thread = thread::spawn(move || { + handle_clone.block_on(async { + future::pending::<()>().await; + }); + }); + drop(threads); + handle.spawn(async { + future::pending::<()>().await; + }); + handle.block_on(async { + future::pending::<()>().await; + }); + thread.join().unwrap(); + + assert!(Instant::now() - start < Duration::from_secs(5)); + } + + #[test] + fn test_multi_tasks() { + let threads = Arc::new(ResizableRuntime::new( + 8, + "test", + Box::new(replace_pool_rule), + Box::new(|_| {}), + )); + let handle = threads.handle(); + + let handles: Vec<_> = (0..2000) + .map(|i| { + let runtime_handle = handle.clone(); + thread::spawn(move || { + if i % 2 == 0 { + runtime_handle.block_on(async move { + sleep(Duration::from_millis(500)); + println!("Thread {} finished", i); + }); + } else { + runtime_handle.spawn(async move { + sleep(Duration::from_millis(500)); + println!("Thread {} finished", i); + }) + } + }) + }) + .collect(); + + // Wait for all threads to complete + for handle in handles { + handle.join().expect("Thread panicked"); + } } } diff --git a/components/tikv_util/src/stream.rs b/components/tikv_util/src/stream.rs index 4f16a75ef57..84b8eede06d 100644 --- a/components/tikv_util/src/stream.rs +++ b/components/tikv_util/src/stream.rs @@ -76,8 +76,7 @@ pub fn error_stream(e: io::Error) -> impl Stream> + Unp pub fn block_on_external_io(f: F) -> F::Output { // we need a Tokio runtime, Tokio futures require Tokio executor. Builder::new_current_thread() - .enable_io() - .enable_time() + .enable_all() .build() .expect("failed to create Tokio runtime") .block_on(f) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 20fbb09294e..fae821b27aa 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -38,7 +38,7 @@ use tikv_kv::{Engine, LocalTablets, Modify, WriteData}; use tikv_util::{ config::ReadableSize, future::{create_stream_with_buffer, paired_future_callback}, - resizable_threadpool::{ResizableRuntime, ResizableRuntimeHandle}, + resizable_threadpool::{DeamonRuntimeHandle, ResizableRuntime}, sys::disk::{get_disk_status, DiskUsage}, time::{Instant, Limiter}, HandyRwLock, @@ -119,8 +119,10 @@ pub struct ImportSstService { cfg: ConfigManager, tablets: LocalTablets, engine: E, - // TODO: (Ris) change to ResizableRuntime - threads: ResizableRuntimeHandle, + threads: DeamonRuntimeHandle, + // threads_ref is for safely cleanning + #[allow(dead_code)] + threads_ref: Arc>, importer: Arc>, limiter: Limiter, ingest_latch: Arc, @@ -344,17 +346,20 @@ impl ImportSstService { .build() }; - let mut threads = - ResizableRuntime::new("import", Box::new(create_tokio_runtime), Box::new(|_| ())); - // There would be 4 initial threads running forever. - threads.adjust_with(4); - let handle = ResizableRuntimeHandle::new(threads); + let threads = ResizableRuntime::new( + 4, + "impwkr", + Box::new(create_tokio_runtime), + Box::new(|_| ()), + ); + + let handle = threads.handle(); + let threads_clone = Arc::new(Mutex::new(threads)); if let LocalTablets::Singleton(tablet) = &tablets { importer.start_switch_mode_check(&handle.clone(), Some(tablet.clone())); } else { importer.start_switch_mode_check(&handle.clone(), None); } - let writer = raft_writer::ThrottledTlsEngineWriter::default(); let gc_handle = writer.clone(); handle.spawn(async move { @@ -362,16 +367,17 @@ impl ImportSstService { tokio::time::sleep(WRITER_GC_INTERVAL).await; } }); - - let cfg_mgr = ConfigManager::new(cfg, handle.clone()); + let num_threads = cfg.num_threads; + let cfg_mgr = ConfigManager::new(cfg, Arc::downgrade(&threads_clone)); handle.spawn(Self::tick(importer.clone(), cfg_mgr.clone())); // Drop the initial pool to accept new tasks - handle.adjust_with(cfg_mgr.rl().num_threads); + threads_clone.lock().unwrap().adjust_with(num_threads); ImportSstService { cfg: cfg_mgr, tablets, threads: handle.clone(), + threads_ref: threads_clone, engine, importer, limiter: Limiter::new(f64::INFINITY), From 43e63b5614c96119e4126d8c2a29342e47b95d3d Mon Sep 17 00:00:00 2001 From: lucasliang Date: Fri, 29 Nov 2024 16:47:17 +0800 Subject: [PATCH 37/86] raftstore: calculate the slow score by considering individual disk performance factors (#17801) close tikv/tikv#17884 This pr introduces an extra and individual inspector to detect whether there exists I/O hung issues on kvdb disk, if the kvdb is deployed with a separate mount path. Signed-off-by: lucasliang Co-authored-by: Bisheng Huang --- components/health_controller/src/lib.rs | 2 + components/health_controller/src/reporters.rs | 104 ++++++-- .../health_controller/src/slow_score.rs | 79 ++++++- components/health_controller/src/types.rs | 16 ++ components/raftstore-v2/src/worker/pd/mod.rs | 5 +- components/raftstore/src/store/config.rs | 61 ++++- components/raftstore/src/store/fsm/store.rs | 65 +++-- components/raftstore/src/store/metrics.rs | 7 +- components/raftstore/src/store/mod.rs | 17 +- components/raftstore/src/store/msg.rs | 3 +- .../raftstore/src/store/worker/disk_check.rs | 178 ++++++++++++++ components/raftstore/src/store/worker/mod.rs | 2 + components/raftstore/src/store/worker/pd.rs | 223 +++++++++++------- components/server/src/server.rs | 14 +- components/test_raftstore/src/node.rs | 1 + components/test_raftstore/src/server.rs | 6 +- metrics/grafana/tikv_details.dashboard.py | 1 + metrics/grafana/tikv_details.json | 6 +- metrics/grafana/tikv_details.json.sha256 | 2 +- src/server/raft_server.rs | 8 +- .../integrations/config/dynamic/raftstore.rs | 3 +- .../integrations/raftstore/test_bootstrap.rs | 6 +- .../raftstore/test_status_command.rs | 67 +++--- tests/integrations/server/kv_service.rs | 3 +- 24 files changed, 712 insertions(+), 167 deletions(-) create mode 100644 components/raftstore/src/store/worker/disk_check.rs diff --git a/components/health_controller/src/lib.rs b/components/health_controller/src/lib.rs index baf7f794b85..75427cd8e7c 100644 --- a/components/health_controller/src/lib.rs +++ b/components/health_controller/src/lib.rs @@ -30,6 +30,8 @@ //! that are specific to different modules, increasing the complexity and //! possibility to misuse of `HealthController`. +#![feature(div_duration)] + pub mod reporters; pub mod slow_score; pub mod trend; diff --git a/components/health_controller/src/reporters.rs b/components/health_controller/src/reporters.rs index 96514cf5414..56624c37d64 100644 --- a/components/health_controller/src/reporters.rs +++ b/components/health_controller/src/reporters.rs @@ -12,6 +12,7 @@ use prometheus::IntGauge; use crate::{ slow_score::{SlowScore, SlowScoreTickResult}, trend::{RequestPerSecRecorder, Trend}, + types::InspectFactor, HealthController, HealthControllerInner, RaftstoreDuration, }; @@ -27,6 +28,7 @@ pub struct RaftstoreReporterConfig { /// worker) is expected to tick it. But the interval is necessary in /// some internal calculations. pub inspect_interval: Duration, + pub inspect_kvdb_interval: Duration, pub unsensitive_cause: f64, pub unsensitive_result: f64, @@ -43,9 +45,72 @@ pub struct RaftstoreReporterConfig { pub result_l2_gap_gauges: IntGauge, } +/// A unified slow score that combines multiple slow scores. +/// +/// It calculates the final slow score of a store by picking the maximum +/// score among multiple factors. Each factor represents a different aspect of +/// the store's performance. Typically, we have two factors: Raft Disk I/O and +/// KvDB Disk I/O. If there are more factors in the future, we can add them +/// here. +#[derive(Default)] +pub struct UnifiedSlowScore { + factors: Vec, +} + +impl UnifiedSlowScore { + pub fn new(cfg: &RaftstoreReporterConfig) -> Self { + let mut unified_slow_score = UnifiedSlowScore::default(); + // The first factor is for Raft Disk I/O. + unified_slow_score + .factors + .push(SlowScore::new(cfg.inspect_interval)); + // The second factor is for KvDB Disk I/O. + unified_slow_score + .factors + .push(SlowScore::new_with_extra_config( + cfg.inspect_kvdb_interval, + 0.6, + )); + unified_slow_score + } + + #[inline] + pub fn record( + &mut self, + id: u64, + factor: InspectFactor, + duration: &RaftstoreDuration, + not_busy: bool, + ) { + self.factors[factor as usize].record(id, duration.delays_on_disk_io(false), not_busy); + } + + #[inline] + pub fn get(&self, factor: InspectFactor) -> &SlowScore { + &self.factors[factor as usize] + } + + #[inline] + pub fn get_mut(&mut self, factor: InspectFactor) -> &mut SlowScore { + &mut self.factors[factor as usize] + } + + // Returns the maximum score of all factors. + pub fn get_score(&self) -> f64 { + self.factors + .iter() + .map(|factor| factor.get()) + .fold(1.0, f64::max) + } + + pub fn last_tick_finished(&self) -> bool { + self.factors.iter().all(SlowScore::last_tick_finished) + } +} + pub struct RaftstoreReporter { health_controller_inner: Arc, - slow_score: SlowScore, + slow_score: UnifiedSlowScore, slow_trend: SlowTrendStatistics, is_healthy: bool, } @@ -56,18 +121,14 @@ impl RaftstoreReporter { pub fn new(health_controller: &HealthController, cfg: RaftstoreReporterConfig) -> Self { Self { health_controller_inner: health_controller.inner.clone(), - slow_score: SlowScore::new(cfg.inspect_interval), + slow_score: UnifiedSlowScore::new(&cfg), slow_trend: SlowTrendStatistics::new(cfg), is_healthy: true, } } - pub fn get_tick_interval(&self) -> Duration { - self.slow_score.get_inspect_interval() - } - pub fn get_slow_score(&self) -> f64 { - self.slow_score.get() + self.slow_score.get_score() } pub fn get_slow_trend(&self) -> &SlowTrendStatistics { @@ -77,17 +138,18 @@ impl RaftstoreReporter { pub fn record_raftstore_duration( &mut self, id: u64, + factor: InspectFactor, duration: RaftstoreDuration, store_not_busy: bool, ) { // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. self.slow_score - .record(id, duration.delays_on_disk_io(false), store_not_busy); + .record(id, factor, &duration, store_not_busy); self.slow_trend.record(duration); // Publish slow score to health controller self.health_controller_inner - .update_raftstore_slow_score(self.slow_score.get()); + .update_raftstore_slow_score(self.slow_score.get_score()); } fn is_healthy(&self) -> bool { @@ -109,34 +171,42 @@ impl RaftstoreReporter { } } - pub fn tick(&mut self, store_maybe_busy: bool) -> SlowScoreTickResult { + pub fn tick(&mut self, store_maybe_busy: bool, factor: InspectFactor) -> SlowScoreTickResult { // Record a fairly great value when timeout self.slow_trend.slow_cause.record(500_000, Instant::now()); + // healthy: The health status of the current store. + // all_ticks_finished: The last tick of all factors is finished. + // factor_tick_finished: The last tick of the current factor is finished. + let (healthy, all_ticks_finished, factor_tick_finished) = ( + self.is_healthy(), + self.slow_score.last_tick_finished(), + self.slow_score.get(factor).last_tick_finished(), + ); // The health status is recovered to serving as long as any tick // does not timeout. - if !self.is_healthy() && self.slow_score.last_tick_finished() { + if !healthy && all_ticks_finished { self.set_is_healthy(true); } - if !self.slow_score.last_tick_finished() { + if !all_ticks_finished { // If the last tick is not finished, it means that the current store might // be busy on handling requests or delayed on I/O operations. And only when // the current store is not busy, it should record the last_tick as a timeout. - if !store_maybe_busy { - self.slow_score.record_timeout(); + if !store_maybe_busy && !factor_tick_finished { + self.slow_score.get_mut(factor).record_timeout(); } } - let slow_score_tick_result = self.slow_score.tick(); + let slow_score_tick_result = self.slow_score.get_mut(factor).tick(); if slow_score_tick_result.updated_score.is_some() && !slow_score_tick_result.has_new_record { self.set_is_healthy(false); } // Publish the slow score to health controller - if let Some(slow_score_value) = slow_score_tick_result.updated_score { + if slow_score_tick_result.updated_score.is_some() { self.health_controller_inner - .update_raftstore_slow_score(slow_score_value); + .update_raftstore_slow_score(self.slow_score.get_score()); } slow_score_tick_result diff --git a/components/health_controller/src/slow_score.rs b/components/health_controller/src/slow_score.rs index 12e043b5668..846e3f98517 100644 --- a/components/health_controller/src/slow_score.rs +++ b/components/health_controller/src/slow_score.rs @@ -7,6 +7,12 @@ use std::{ use ordered_float::OrderedFloat; +/// Interval for updating the slow score. +const UPDATE_INTERVALS: Duration = Duration::from_secs(10); +/// Recovery intervals for the slow score. +/// If the score has reached 100 and there is no timeout inspecting requests +/// during this interval, the score will go back to 1 after 5min. +const RECOVERY_INTERVALS: Duration = Duration::from_secs(60 * 5); // Slow score is a value that represents the speed of a store and ranges in [1, // 100]. It is maintained in the AIMD way. // If there are some inspecting requests timeout during a round, by default the @@ -45,7 +51,7 @@ impl SlowScore { inspect_interval, ratio_thresh: OrderedFloat(0.1), - min_ttr: Duration::from_secs(5 * 60), + min_ttr: RECOVERY_INTERVALS, last_record_time: Instant::now(), last_update_time: Instant::now(), round_ticks: 30, @@ -54,6 +60,29 @@ impl SlowScore { } } + // Only for kvdb. + pub fn new_with_extra_config(inspect_interval: Duration, timeout_ratio: f64) -> SlowScore { + SlowScore { + value: OrderedFloat(1.0), + + timeout_requests: 0, + total_requests: 0, + + inspect_interval, + ratio_thresh: OrderedFloat(timeout_ratio), + min_ttr: RECOVERY_INTERVALS, + last_record_time: Instant::now(), + last_update_time: Instant::now(), + // The minimal round ticks is 1 for kvdb. + round_ticks: cmp::max( + UPDATE_INTERVALS.div_duration_f64(inspect_interval) as u64, + 1_u64, + ), + last_tick_id: 0, + last_tick_finished: true, + } + } + pub fn record(&mut self, id: u64, duration: Duration, not_busy: bool) { self.last_record_time = Instant::now(); if id != self.last_tick_id { @@ -207,4 +236,52 @@ mod tests { slow_score.update_impl(Duration::from_secs(57)) ); } + + #[test] + fn test_slow_score_extra() { + let mut slow_score = SlowScore::new_with_extra_config(Duration::from_millis(1000), 0.6); + slow_score.timeout_requests = 1; + slow_score.total_requests = 10; + let score = slow_score.update_impl(Duration::from_secs(10)); + assert!(score > OrderedFloat(1.16)); + assert!(score < OrderedFloat(1.17)); + + slow_score.timeout_requests = 2; + slow_score.total_requests = 10; + let score = slow_score.update_impl(Duration::from_secs(10)); + assert!(score > OrderedFloat(1.5)); + assert!(score < OrderedFloat(1.6)); + + slow_score.timeout_requests = 0; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(1.0), + slow_score.update_impl(Duration::from_secs(57)) + ); + + slow_score.timeout_requests = 3; + slow_score.total_requests = 10; + assert_eq!( + OrderedFloat(1.5), + slow_score.update_impl(Duration::from_secs(10)) + ); + + slow_score.timeout_requests = 6; + slow_score.total_requests = 10; + assert_eq!( + OrderedFloat(3.0), + slow_score.update_impl(Duration::from_secs(10)) + ); + + slow_score.timeout_requests = 10; + slow_score.total_requests = 10; + assert_eq!( + OrderedFloat(6.0), + slow_score.update_impl(Duration::from_secs(10)) + ); + + // Test too large inspect interval. + let slow_score = SlowScore::new_with_extra_config(Duration::from_secs(11), 0.1); + assert_eq!(slow_score.round_ticks, 1); + } } diff --git a/components/health_controller/src/types.rs b/components/health_controller/src/types.rs index 5cbf5490511..7342273e972 100644 --- a/components/health_controller/src/types.rs +++ b/components/health_controller/src/types.rs @@ -50,6 +50,22 @@ impl RaftstoreDuration { } } +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum InspectFactor { + RaftDisk = 0, + KvDisk, + // TODO: Add more factors, like network io. +} + +impl InspectFactor { + pub fn as_str(&self) -> &str { + match *self { + InspectFactor::RaftDisk => "raft", + InspectFactor::KvDisk => "kvdb", + } + } +} + /// Used to inspect the latency of all stages of raftstore. pub struct LatencyInspector { id: u64, diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 3ae31083d9f..7917ed5cd73 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -9,7 +9,7 @@ use causal_ts::CausalTsProviderImpl; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; -use health_controller::types::{LatencyInspector, RaftstoreDuration}; +use health_controller::types::{InspectFactor, LatencyInspector, RaftstoreDuration}; use kvproto::{metapb, pdpb}; use pd_client::{BucketStat, PdClient}; use raftstore::store::{ @@ -254,6 +254,7 @@ where let mut stats_monitor = PdStatsMonitor::new( store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, cfg.value().inspect_interval.0, + std::time::Duration::default(), PdReporter::new(pd_scheduler, logger.clone()), ); stats_monitor.start(auto_split_controller, collector_reg_handle)?; @@ -428,7 +429,7 @@ impl StoreStatsReporter for PdReporter { } } - fn update_latency_stats(&self, timer_tick: u64) { + fn update_latency_stats(&self, timer_tick: u64, _factor: InspectFactor) { // Tick slowness statistics. { if let Err(e) = self.scheduler.schedule(Task::TickSlownessStats) { diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 005896ef6de..3832adac060 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -367,16 +367,30 @@ pub struct Config { #[deprecated = "The configuration has been removed. The time to clean stale peer safely can be decided based on RocksDB snapshot sequence number."] pub clean_stale_peer_delay: ReadableDuration, - // Interval to inspect the latency of raftstore for slow store detection. + #[online_config(hidden)] + // Interval to inspect the latency of flushing raft logs for slow store detection. pub inspect_interval: ReadableDuration, + // Interval to inspect the latency of flushes on kvdb for slow store detection. + // If the kvdb uses the same mount path with raftdb, the default value will be + // optimized to `0` to avoid duplicated inspection. + #[doc(hidden)] + #[online_config(hidden)] + pub inspect_kvdb_interval: ReadableDuration, /// Threshold of CPU utilization to inspect for slow store detection. #[doc(hidden)] + #[online_config(hidden)] pub inspect_cpu_util_thd: f64, + #[doc(hidden)] + #[online_config(hidden)] // The unsensitive(increase it to reduce sensitiveness) of the cause-trend detection pub slow_trend_unsensitive_cause: f64, + #[doc(hidden)] + #[online_config(hidden)] // The unsensitive(increase it to reduce sensitiveness) of the result-trend detection pub slow_trend_unsensitive_result: f64, + #[doc(hidden)] + #[online_config(hidden)] // The sensitiveness of slowness on network-io. pub slow_trend_network_io_factor: f64, @@ -552,6 +566,7 @@ impl Default for Config { region_split_size: ReadableSize(0), clean_stale_peer_delay: ReadableDuration::minutes(0), inspect_interval: ReadableDuration::millis(100), + inspect_kvdb_interval: ReadableDuration::secs(2), // The default value of `inspect_cpu_util_thd` is 0.4, which means // when the cpu utilization is greater than 40%, the store might be // regarded as a slow node if there exists delayed inspected messages. @@ -685,6 +700,29 @@ impl Config { } } + /// Optimize the interval of different inspectors according to the + /// configuration. + pub fn optimize_inspector(&mut self, separated_raft_mount_path: bool) { + // If the kvdb uses the same mount path with raftdb, the health status + // of kvdb will be inspected by raftstore automatically. So it's not necessary + // to inspect kvdb. + if !separated_raft_mount_path { + self.inspect_kvdb_interval = ReadableDuration::ZERO; + } else { + // If the inspect_kvdb_interval is less than inspect_interval, it should + // use `inspect_interval` * 10 as an empirical inspect interval for KvDB Disk + // I/O. + let inspect_kvdb_interval = if self.inspect_kvdb_interval < self.inspect_interval + && self.inspect_kvdb_interval != ReadableDuration::ZERO + { + self.inspect_interval * 10 + } else { + self.inspect_kvdb_interval + }; + self.inspect_kvdb_interval = inspect_kvdb_interval; + } + } + pub fn validate( &mut self, region_split_size: ReadableSize, @@ -1629,5 +1667,26 @@ mod tests { cfg.raft_write_wait_duration = ReadableDuration::micros(1001); cfg.validate(split_size, true, split_size / 20, false) .unwrap_err(); + + cfg = Config::new(); + cfg.optimize_inspector(false); + assert_eq!(cfg.inspect_kvdb_interval, ReadableDuration::ZERO); + + cfg = Config::new(); + cfg.inspect_kvdb_interval = ReadableDuration::secs(1); + cfg.optimize_inspector(false); + assert_eq!(cfg.inspect_kvdb_interval, ReadableDuration::ZERO); + cfg.optimize_inspector(true); + assert_eq!(cfg.inspect_kvdb_interval, ReadableDuration::ZERO); + + cfg.inspect_kvdb_interval = ReadableDuration::secs(1); + cfg.optimize_inspector(true); + assert_eq!(cfg.inspect_kvdb_interval, ReadableDuration::secs(1)); + + cfg = Config::new(); + cfg.inspect_kvdb_interval = ReadableDuration::millis(1); + cfg.inspect_interval = ReadableDuration::millis(100); + cfg.optimize_inspector(true); + assert_eq!(cfg.inspect_kvdb_interval, ReadableDuration::secs(1)); } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index bb12e8c0ed7..25faa03ae72 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -33,7 +33,10 @@ use engine_traits::{ use fail::fail_point; use file_system::{IoType, WithIoType}; use futures::{compat::Future01CompatExt, FutureExt}; -use health_controller::{types::LatencyInspector, HealthController}; +use health_controller::{ + types::{InspectFactor, LatencyInspector}, + HealthController, +}; use itertools::Itertools; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ @@ -105,9 +108,10 @@ use crate::{ worker::{ AutoSplitController, CleanupRunner, CleanupSstRunner, CleanupSstTask, CleanupTask, CompactRunner, CompactTask, ConsistencyCheckRunner, ConsistencyCheckTask, - GcSnapshotRunner, GcSnapshotTask, PdRunner, RaftlogGcRunner, RaftlogGcTask, - ReadDelegate, RefreshConfigRunner, RefreshConfigTask, RegionRunner, RegionTask, - SnapGenRunner, SnapGenTask, SplitCheckTask, SNAP_GENERATOR_MAX_POOL_SIZE, + DiskCheckRunner, DiskCheckTask, GcSnapshotRunner, GcSnapshotTask, PdRunner, + RaftlogGcRunner, RaftlogGcTask, ReadDelegate, RefreshConfigRunner, RefreshConfigTask, + RegionRunner, RegionTask, SnapGenRunner, SnapGenTask, SplitCheckTask, + SNAP_GENERATOR_MAX_POOL_SIZE, }, worker_metrics::PROCESS_STAT_CPU_USAGE, Callback, CasualMessage, CompactThreshold, FullCompactController, GlobalReplicationState, @@ -564,6 +568,7 @@ where pub raftlog_gc_scheduler: Scheduler, pub raftlog_fetch_scheduler: Scheduler>, pub region_scheduler: Scheduler, + pub disk_check_scheduler: Scheduler, pub apply_router: ApplyRouter, pub router: RaftRouter, pub importer: Arc>, @@ -886,19 +891,38 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> #[cfg(any(test, feature = "testexport"))] StoreMsg::Validate(f) => f(&self.ctx.cfg), StoreMsg::LatencyInspect { + factor, send_time, mut inspector, } => { - inspector.record_store_wait(send_time.saturating_elapsed()); - inspector.record_store_commit( - self.ctx - .raft_metrics - .health_stats - .avg(InspectIoType::Network), - ); - // Reset the health_stats and wait it to be refreshed in the next tick. - self.ctx.raft_metrics.health_stats.reset(); - self.ctx.pending_latency_inspect.push(inspector); + match factor { + InspectFactor::RaftDisk => { + inspector.record_store_wait(send_time.saturating_elapsed()); + inspector.record_store_commit( + self.ctx + .raft_metrics + .health_stats + .avg(InspectIoType::Network), + ); + // Reset the health_stats and wait it to be refreshed in the next tick. + self.ctx.raft_metrics.health_stats.reset(); + self.ctx.pending_latency_inspect.push(inspector); + } + InspectFactor::KvDisk => { + // Send LatencyInspector to disk_check_scheduler to inspect latency. + if let Err(e) = self + .ctx + .disk_check_scheduler + .schedule(DiskCheckTask::InspectLatency { inspector }) + { + warn!( + "Failed to schedule disk check task"; + "error" => ?e, + "store_id" => self.fsm.store.id + ); + } + } + } } StoreMsg::UnsafeRecoveryReport(report) => self.store_heartbeat_pd(Some(report)), StoreMsg::UnsafeRecoveryCreatePeer { syncer, create } => { @@ -1258,6 +1282,7 @@ pub struct RaftPollerBuilder { raftlog_gc_scheduler: Scheduler, raftlog_fetch_scheduler: Scheduler>, pub snap_gen_scheduler: Scheduler>, + disk_check_scheduler: Scheduler, pub region_scheduler: Scheduler, apply_router: ApplyRouter, pub router: RaftRouter, @@ -1493,6 +1518,7 @@ where store: self.store.clone(), pd_scheduler: self.pd_scheduler.clone(), consistency_check_scheduler: self.consistency_check_scheduler.clone(), + disk_check_scheduler: self.disk_check_scheduler.clone(), split_check_scheduler: self.split_check_scheduler.clone(), region_scheduler: self.region_scheduler.clone(), apply_router: self.apply_router.clone(), @@ -1572,6 +1598,7 @@ where raftlog_gc_scheduler: self.raftlog_gc_scheduler.clone(), raftlog_fetch_scheduler: self.raftlog_fetch_scheduler.clone(), snap_gen_scheduler: self.snap_gen_scheduler.clone(), + disk_check_scheduler: self.disk_check_scheduler.clone(), region_scheduler: self.region_scheduler.clone(), apply_router: self.apply_router.clone(), router: self.router.clone(), @@ -1663,6 +1690,7 @@ impl RaftBatchSystem { collector_reg_handle: CollectorRegHandle, health_controller: HealthController, causal_ts_provider: Option>, // used for rawkv apiv2 + mut disk_check_runner: DiskCheckRunner, grpc_service_mgr: GrpcServiceManager, safe_point: Arc, ) -> Result<()> { @@ -1771,6 +1799,12 @@ impl RaftBatchSystem { let consistency_check_scheduler = workers .background_worker .start("consistency-check", consistency_check_runner); + // The scheduler dedicated to health checking the KvEngine disk when it's using + // a separate disk from RaftEngine. + disk_check_runner.bind_background_worker(workers.background_worker.clone()); + let disk_check_scheduler = workers + .background_worker + .start("disk-check-worker", disk_check_runner); self.store_writers.spawn( meta.get_id(), @@ -1789,6 +1823,7 @@ impl RaftBatchSystem { split_check_scheduler, region_scheduler, snap_gen_scheduler, + disk_check_scheduler, pd_scheduler: workers.pd_worker.scheduler(), consistency_check_scheduler, cleanup_scheduler, @@ -1932,7 +1967,7 @@ impl RaftBatchSystem { causal_ts_provider, grpc_service_mgr, ); - assert!(workers.pd_worker.start_with_timer(pd_runner)); + assert!(workers.pd_worker.start(pd_runner)); if let Err(e) = sys_util::thread::set_priority(sys_util::HIGH_PRI) { warn!("set thread priority for raftstore failed"; "error" => ?e); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 762ce4d3001..9428c5025db 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -858,8 +858,11 @@ lazy_static! { exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); - pub static ref STORE_SLOW_SCORE_GAUGE: Gauge = - register_gauge!("tikv_raftstore_slow_score", "Slow score of the store.").unwrap(); + pub static ref STORE_SLOW_SCORE_GAUGE: IntGaugeVec = register_int_gauge_vec!( + "tikv_raftstore_slow_score", + "Slow score of the store.", + &["type"] + ).unwrap(); pub static ref STORE_SLOW_TREND_GAUGE: Gauge = register_gauge!("tikv_raftstore_slow_trend", "Slow trend changing rate.").unwrap(); diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 9e8e66b7522..2c9c92ebbe3 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -88,13 +88,14 @@ pub use self::{ worker::{ metrics as worker_metrics, need_compact, AutoSplitController, BatchComponent, Bucket, BucketRange, BucketStatsInfo, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, - CompactThreshold, FlowStatistics, FlowStatsReporter, FullCompactController, KeyEntry, - LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, - ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, - SnapGenTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, - StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, WriterContoller, - BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, - DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, - NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + CompactThreshold, DiskCheckRunner, FlowStatistics, FlowStatsReporter, + FullCompactController, KeyEntry, LocalReadContext, LocalReader, LocalReaderCore, + PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, + ReadStats, RefreshConfigTask, RegionTask, SnapGenTask, SplitCheckRunner, SplitCheckTask, + SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, + TrackVer, WriteStats, WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, + DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + REGION_CPU_OVERLOAD_THRESHOLD_RATIO, }, }; diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 4a2229949f2..dc88e881b20 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -8,7 +8,7 @@ use std::{borrow::Cow, fmt}; use collections::HashSet; use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; -use health_controller::types::LatencyInspector; +use health_controller::types::{InspectFactor, LatencyInspector}; use kvproto::{ brpb::CheckAdminResponse, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, @@ -961,6 +961,7 @@ where /// Inspect the latency of raftstore. LatencyInspect { + factor: InspectFactor, send_time: Instant, inspector: LatencyInspector, }, diff --git a/components/raftstore/src/store/worker/disk_check.rs b/components/raftstore/src/store/worker/disk_check.rs new file mode 100644 index 00000000000..44c66892041 --- /dev/null +++ b/components/raftstore/src/store/worker/disk_check.rs @@ -0,0 +1,178 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fmt::{self, Display, Formatter}, + io::Write, + path::PathBuf, + time::Duration, +}; + +use crossbeam::channel::{bounded, Receiver, Sender}; +use health_controller::types::LatencyInspector; +use tikv_util::{ + time::Instant, + warn, + worker::{Runnable, Worker}, +}; + +#[derive(Debug)] +pub enum Task { + InspectLatency { inspector: LatencyInspector }, +} + +impl Display for Task { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match *self { + Task::InspectLatency { .. } => write!(f, "InspectLatency"), + } + } +} + +#[derive(Clone)] +/// A simple inspector to measure the latency of disk IO. +/// +/// This is used to measure the latency of disk IO, which is used to determine +/// the health status of the TiKV server. +/// The inspector writes a file to the disk and measures the time it takes to +/// complete the write operation. +pub struct Runner { + target: PathBuf, + notifier: Sender, + receiver: Receiver, + bg_worker: Option, +} + +impl Runner { + /// The filename to write to the disk to measure the latency. + const DISK_IO_LATENCY_INSPECT_FILENAME: &'static str = ".disk_latency_inspector.tmp"; + /// The content to write to the file to measure the latency. + const DISK_IO_LATENCY_INSPECT_FLUSH_STR: &'static [u8] = b"inspect disk io latency"; + + #[inline] + fn build(target: PathBuf) -> Self { + // The disk check mechanism only cares about the latency of the most + // recent request; older requests become stale and irrelevant. To avoid + // unnecessary accumulation of multiple requests, we set a small + // `capacity` for the disk check worker. + let (notifier, receiver) = bounded(3); + Self { + target, + notifier, + receiver, + bg_worker: None, + } + } + + #[inline] + pub fn new(inspect_dir: PathBuf) -> Self { + Self::build(inspect_dir.join(Self::DISK_IO_LATENCY_INSPECT_FILENAME)) + } + + #[inline] + /// Only for test. + /// Generate a dummy Runner. + pub fn dummy() -> Self { + Self::build(PathBuf::from("./").join(Self::DISK_IO_LATENCY_INSPECT_FILENAME)) + } + + #[inline] + pub fn bind_background_worker(&mut self, bg_worker: Worker) { + self.bg_worker = Some(bg_worker); + } + + fn inspect(&self) -> Option { + let mut file = std::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(&self.target) + .ok()?; + + let start = Instant::now(); + // Ignore the error + file.write_all(Self::DISK_IO_LATENCY_INSPECT_FLUSH_STR) + .ok()?; + file.sync_all().ok()?; + Some(start.saturating_elapsed()) + } + + fn execute(&self) { + if let Ok(task) = self.receiver.try_recv() { + match task { + Task::InspectLatency { mut inspector } => { + if let Some(latency) = self.inspect() { + inspector.record_apply_process(latency); + inspector.finish(); + } else { + warn!("failed to inspect disk io latency"); + } + } + } + } + } +} + +impl Runnable for Runner { + type Task = Task; + + fn run(&mut self, task: Task) { + // Send the task to the limited capacity channel. + if let Err(e) = self.notifier.try_send(task) { + warn!("failed to send task to disk check bg_worker: {:?}", e); + } else { + let runner = self.clone(); + if let Some(bg_worker) = self.bg_worker.as_ref() { + bg_worker.spawn_async_task(async move { + runner.execute(); + }); + } + } + } +} + +#[cfg(test)] +mod tests { + use tikv_util::worker::Builder; + + use super::*; + + #[test] + fn test_disk_check_runner() { + let background_worker = Builder::new("disk-check-worker") + .pending_capacity(256) + .create(); + let (tx, rx) = std::sync::mpsc::sync_channel(1); + let mut runner = Runner::dummy(); + runner.bind_background_worker(background_worker); + // Validate the disk check runner. + { + let tx_1 = tx.clone(); + let inspector = LatencyInspector::new( + 1, + Box::new(move |_, duration| { + let dur = duration.sum(); + tx_1.send(dur).unwrap(); + }), + ); + runner.run(Task::InspectLatency { inspector }); + let latency = rx.recv().unwrap(); + assert!(latency > Duration::from_secs(0)); + } + // Invalid bg_worker and out of capacity + { + runner.bg_worker = None; + for i in 2..=10 { + let tx_2 = tx.clone(); + let inspector = LatencyInspector::new( + i as u64, + Box::new(move |_, duration| { + let dur = duration.sum(); + tx_2.send(dur).unwrap(); + }), + ); + runner.run(Task::InspectLatency { inspector }); + rx.recv_timeout(Duration::from_secs(1)).unwrap_err(); + } + } + } +} diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index af620bdef6e..3cd4534b8f0 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -6,6 +6,7 @@ mod cleanup_snapshot; mod cleanup_sst; mod compact; mod consistency_check; +mod disk_check; pub mod metrics; mod pd; mod raftlog_gc; @@ -27,6 +28,7 @@ pub use self::{ Task as CompactTask, }, consistency_check::{Runner as ConsistencyCheckRunner, Task as ConsistencyCheckTask}, + disk_check::{Runner as DiskCheckRunner, Task as DiskCheckTask}, pd::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, Runner as PdRunner, StatsMonitor as PdStatsMonitor, StoreStatsReporter, Task as PdTask, diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 4f4d6b85034..5665318c259 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -22,7 +22,7 @@ use fail::fail_point; use futures::{compat::Future01CompatExt, FutureExt}; use health_controller::{ reporters::{RaftstoreReporter, RaftstoreReporterConfig}, - types::{LatencyInspector, RaftstoreDuration}, + types::{InspectFactor, LatencyInspector, RaftstoreDuration}, HealthController, }; use kvproto::{ @@ -50,7 +50,7 @@ use tikv_util::{ timer::GLOBAL_TIMER_HANDLE, topn::TopN, warn, - worker::{Runnable, RunnableWithTimer, ScheduleError, Scheduler}, + worker::{Runnable, ScheduleError, Scheduler}, }; use txn_types::TimeStamp; use yatp::Remote; @@ -201,6 +201,7 @@ where }, UpdateSlowScore { id: u64, + factor: InspectFactor, duration: RaftstoreDuration, }, RegionCpuRecords(Arc), @@ -210,6 +211,9 @@ where }, ReportBuckets(BucketStat), ControlGrpcServer(pdpb::ControlGrpcEvent), + InspectLatency { + factor: InspectFactor, + }, } pub struct StoreStat { @@ -449,8 +453,16 @@ where Task::QueryRegionLeader { region_id } => { write!(f, "query the leader of region {}", region_id) } - Task::UpdateSlowScore { id, ref duration } => { - write!(f, "compute slow score: id {}, duration {:?}", id, duration) + Task::UpdateSlowScore { + id, + factor, + ref duration, + } => { + write!( + f, + "compute slow score: id {}, factor: {:?}, duration {:?}", + id, factor, duration + ) } Task::RegionCpuRecords(ref cpu_records) => { write!(f, "get region cpu records: {:?}", cpu_records) @@ -471,6 +483,9 @@ where Task::ControlGrpcServer(ref event) => { write!(f, "control grpc server: {:?}", event) } + Task::InspectLatency { factor } => { + write!(f, "inspect raftstore latency: {:?}", factor) + } } } } @@ -519,7 +534,7 @@ pub trait StoreStatsReporter: Send + Clone + Sync + 'static + Collector { ); fn report_min_resolved_ts(&self, store_id: u64, min_resolved_ts: u64); fn auto_split(&self, split_infos: Vec); - fn update_latency_stats(&self, timer_tick: u64); + fn update_latency_stats(&self, timer_tick: u64, factor: InspectFactor); } impl StoreStatsReporter for WrappedScheduler @@ -569,9 +584,16 @@ where } } - fn update_latency_stats(&self, timer_tick: u64) { - debug!("update latency statistics not implemented for raftstore-v1"; + fn update_latency_stats(&self, timer_tick: u64, factor: InspectFactor) { + debug!("update latency statistics for raftstore-v1"; "tick" => timer_tick); + let task = Task::InspectLatency { factor }; + if let Err(e) = self.0.schedule(task) { + error!( + "failed to send inspect raftstore latency task to pd worker"; + "err" => ?e, + ); + } } } @@ -588,13 +610,19 @@ where load_base_split_check_interval: Duration, collect_tick_interval: Duration, inspect_latency_interval: Duration, + inspect_kvdb_latency_interval: Duration, } impl StatsMonitor where T: StoreStatsReporter, { - pub fn new(interval: Duration, inspect_latency_interval: Duration, reporter: T) -> Self { + pub fn new( + interval: Duration, + inspect_latency_interval: Duration, + inspect_kvdb_latency_interval: Duration, + reporter: T, + ) -> Self { StatsMonitor { reporter, handle: None, @@ -612,6 +640,7 @@ where cmp::min(default_collect_tick_interval(), interval), ), inspect_latency_interval, + inspect_kvdb_latency_interval, } } @@ -641,9 +670,12 @@ where let load_base_split_check_interval = self .load_base_split_check_interval .div_duration_f64(tick_interval) as u64; - let update_latency_stats_interval = self - .inspect_latency_interval - .div_duration_f64(tick_interval) as u64; + let update_raftdisk_latency_stats_interval = + self.inspect_latency_interval + .div_duration_f64(tick_interval) as u64; + let update_kvdisk_latency_stats_interval = + self.inspect_kvdb_latency_interval + .div_duration_f64(tick_interval) as u64; let (timer_tx, timer_rx) = mpsc::channel(); self.timer = Some(timer_tx); @@ -704,8 +736,11 @@ where &mut region_cpu_records_collector, ); } - if is_enable_tick(timer_cnt, update_latency_stats_interval) { - reporter.update_latency_stats(timer_cnt); + if is_enable_tick(timer_cnt, update_raftdisk_latency_stats_interval) { + reporter.update_latency_stats(timer_cnt, InspectFactor::RaftDisk); + } + if is_enable_tick(timer_cnt, update_kvdisk_latency_stats_interval) { + reporter.update_latency_stats(timer_cnt, InspectFactor::KvDisk); } timer_cnt += 1; } @@ -895,6 +930,7 @@ where let mut stats_monitor = StatsMonitor::new( interval, cfg.inspect_interval.0, + cfg.inspect_kvdb_interval.0, WrappedScheduler(scheduler.clone()), ); if let Err(e) = stats_monitor.start(auto_split_controller, collector_reg_handle) { @@ -903,6 +939,7 @@ where let health_reporter_config = RaftstoreReporterConfig { inspect_interval: cfg.inspect_interval.0, + inspect_kvdb_interval: cfg.inspect_kvdb_interval.0, unsensitive_cause: cfg.slow_trend_unsensitive_cause, unsensitive_result: cfg.slow_trend_unsensitive_result, @@ -1890,6 +1927,89 @@ where } } } + + fn handle_inspect_latency(&mut self, factor: InspectFactor) { + let slow_score_tick_result = self + .health_reporter + .tick(self.store_stat.maybe_busy(), factor); + if let Some(score) = slow_score_tick_result.updated_score { + STORE_SLOW_SCORE_GAUGE + .with_label_values(&[factor.as_str()]) + .set(score as i64); + } + let id = slow_score_tick_result.tick_id; + let scheduler = self.scheduler.clone(); + let inspector = { + match factor { + InspectFactor::RaftDisk => { + // If the last slow_score already reached abnormal state and was delayed for + // reporting by `store-heartbeat` to PD, we should report it here manually as + // a FAKE `store-heartbeat`. + if slow_score_tick_result.should_force_report_slow_store + && self.is_store_heartbeat_delayed() + { + self.handle_fake_store_heartbeat(); + } + LatencyInspector::new( + id, + Box::new(move |id, duration| { + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["store_wait"]) + .observe(tikv_util::time::duration_to_sec( + duration.store_wait_duration.unwrap_or_default(), + )); + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["store_commit"]) + .observe(tikv_util::time::duration_to_sec( + duration.store_commit_duration.unwrap_or_default(), + )); + + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["all"]) + .observe(tikv_util::time::duration_to_sec(duration.sum())); + if let Err(e) = scheduler.schedule(Task::UpdateSlowScore { + id, + factor, + duration, + }) { + warn!("schedule pd task failed"; "err" => ?e); + } + }), + ) + } + InspectFactor::KvDisk => LatencyInspector::new( + id, + Box::new(move |id, duration| { + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["apply_wait"]) + .observe(tikv_util::time::duration_to_sec( + duration.apply_wait_duration.unwrap_or_default(), + )); + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["apply_process"]) + .observe(tikv_util::time::duration_to_sec( + duration.apply_process_duration.unwrap_or_default(), + )); + if let Err(e) = scheduler.schedule(Task::UpdateSlowScore { + id, + factor, + duration, + }) { + warn!("schedule pd task failed"; "err" => ?e); + } + }), + ), + } + }; + let msg = StoreMsg::LatencyInspect { + factor, + send_time: TiInstant::now(), + inspector, + }; + if let Err(e) = self.router.send_control(msg) { + warn!("pd worker send latency inspecter failed"; "err" => ?e); + } + } } fn calculate_region_cpu_records( @@ -2140,9 +2260,14 @@ where txn_ext, } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), - Task::UpdateSlowScore { id, duration } => { + Task::UpdateSlowScore { + id, + factor, + duration, + } => { self.health_reporter.record_raftstore_duration( id, + factor, duration, !self.store_stat.maybe_busy(), ); @@ -2158,6 +2283,9 @@ where Task::ControlGrpcServer(event) => { self.handle_control_grpc_server(event); } + Task::InspectLatency { factor } => { + self.handle_inspect_latency(factor); + } }; } @@ -2166,71 +2294,6 @@ where } } -impl RunnableWithTimer for Runner -where - EK: KvEngine, - ER: RaftEngine, - T: PdClient + 'static, -{ - fn on_timeout(&mut self) { - let slow_score_tick_result = self.health_reporter.tick(self.store_stat.maybe_busy()); - if let Some(score) = slow_score_tick_result.updated_score { - STORE_SLOW_SCORE_GAUGE.set(score); - } - - // If the last slow_score already reached abnormal state and was delayed for - // reporting by `store-heartbeat` to PD, we should report it here manually as - // a FAKE `store-heartbeat`. - if slow_score_tick_result.should_force_report_slow_store - && self.is_store_heartbeat_delayed() - { - self.handle_fake_store_heartbeat(); - } - - let id = slow_score_tick_result.tick_id; - - let scheduler = self.scheduler.clone(); - let inspector = LatencyInspector::new( - id, - Box::new(move |id, duration| { - STORE_INSPECT_DURATION_HISTOGRAM - .with_label_values(&["store_process"]) - .observe(tikv_util::time::duration_to_sec( - duration.store_process_duration.unwrap_or_default(), - )); - STORE_INSPECT_DURATION_HISTOGRAM - .with_label_values(&["store_wait"]) - .observe(tikv_util::time::duration_to_sec( - duration.store_wait_duration.unwrap_or_default(), - )); - STORE_INSPECT_DURATION_HISTOGRAM - .with_label_values(&["store_commit"]) - .observe(tikv_util::time::duration_to_sec( - duration.store_commit_duration.unwrap_or_default(), - )); - - STORE_INSPECT_DURATION_HISTOGRAM - .with_label_values(&["all"]) - .observe(tikv_util::time::duration_to_sec(duration.sum())); - if let Err(e) = scheduler.schedule(Task::UpdateSlowScore { id, duration }) { - warn!("schedule pd task failed"; "err" => ?e); - } - }), - ); - let msg = StoreMsg::LatencyInspect { - send_time: TiInstant::now(), - inspector, - }; - if let Err(e) = self.router.send_control(msg) { - warn!("pd worker send latency inspecter failed"; "err" => ?e); - } - } - - fn get_interval(&self) -> Duration { - self.health_reporter.get_tick_interval() - } -} - fn new_change_peer_request(change_type: ConfChangeType, peer: metapb::Peer) -> AdminRequest { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::ChangePeer); @@ -2519,6 +2582,7 @@ mod tests { let mut stats_monitor = StatsMonitor::new( Duration::from_secs(interval), Duration::from_secs(interval), + Duration::default(), WrappedScheduler(scheduler), ); if let Err(e) = stats_monitor.start( @@ -2767,6 +2831,7 @@ mod tests { let mut stats_monitor = StatsMonitor::new( Duration::from_secs(interval), Duration::from_secs(interval), + Duration::default(), WrappedScheduler(pd_worker.scheduler()), ); stats_monitor diff --git a/components/server/src/server.rs b/components/server/src/server.rs index da6a7a85b76..35f160de2fd 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -74,8 +74,8 @@ use raftstore::{ }, memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, snapshot_backup::PrepareDiskSnapObserver, - AutoSplitController, CheckLeaderRunner, LocalReader, SnapManager, SnapManagerBuilder, - SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, + AutoSplitController, CheckLeaderRunner, DiskCheckRunner, LocalReader, SnapManager, + SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, }, RaftRouterCompactedEventSender, }; @@ -811,6 +811,13 @@ where let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); self.core.config.raft_store.optimize_for(false); + self.core + .config + .raft_store + .optimize_inspector(path_in_diff_mount_point( + engines.engines.raft.get_engine_path().to_string().as_str(), + engines.engines.kv.path(), + )); self.core .config .raft_store @@ -1024,6 +1031,8 @@ where .registry .register_consistency_check_observer(100, observer); + let disk_check_runner = DiskCheckRunner::new(self.core.store_path.clone()); + raft_server .start( engines.engines.clone(), @@ -1038,6 +1047,7 @@ where self.concurrency_manager.clone(), collector_reg_handle, self.causal_ts_provider.clone(), + disk_check_runner, self.grpc_service_mgr.clone(), safe_point.clone(), ) diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 26319d43e27..c87609a0c02 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -346,6 +346,7 @@ impl Simulator for NodeCluster { cm, CollectorRegHandle::new_for_test(), None, + DiskCheckRunner::dummy(), GrpcServiceManager::dummy(), Arc::new(AtomicU64::new(0)), )?; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index d73157c51ac..a50e226f640 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -42,8 +42,9 @@ use raftstore::{ store::{ fsm::{store::StoreMeta, ApplyRouter, RaftBatchSystem, RaftRouter}, msg::RaftCmdExtraOpts, - AutoSplitController, Callback, CheckLeaderRunner, LocalReader, RegionSnapshot, SnapManager, - SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, + AutoSplitController, Callback, CheckLeaderRunner, DiskCheckRunner, LocalReader, + RegionSnapshot, SnapManager, SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, + StoreMetaDelegate, }, Result, }; @@ -681,6 +682,7 @@ impl ServerCluster { concurrency_manager.clone(), collector_reg_handle, causal_ts_provider, + DiskCheckRunner::dummy(), GrpcServiceManager::dummy(), Arc::new(AtomicU64::new(0)), )?; diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index e75efd9b28b..95b915d6173 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -9865,6 +9865,7 @@ def SlowTrendStatistics() -> RowPanel: target( expr=expr_sum( "tikv_raftstore_slow_score", + by_labels=["instance", "type"], ), ), ], diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 5ff6c19581f..b0f8311ac6e 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -57795,15 +57795,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-{{type}}", "metric": "", - "query": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 27eae77b44a..0c7f268dfa0 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -f772c8b34f29ca1c929b316051740cbbcfb99e8bc4b2693bd739b87487bfde64 ./metrics/grafana/tikv_details.json +e93faab944914bbca21c74daee0223604dd57ba37115c7239d8b707468d5d8db ./metrics/grafana/tikv_details.json diff --git a/src/server/raft_server.rs b/src/server/raft_server.rs index 36c0cab22fc..15e98441583 100644 --- a/src/server/raft_server.rs +++ b/src/server/raft_server.rs @@ -20,8 +20,8 @@ use raftstore::{ store::{ self, fsm::{store::StoreMeta, ApplyRouter, RaftBatchSystem, RaftRouter}, - initial_region, AutoSplitController, Config as StoreConfig, GlobalReplicationState, PdTask, - RefreshConfigTask, SnapManager, SplitCheckTask, Transport, + initial_region, AutoSplitController, Config as StoreConfig, DiskCheckRunner, + GlobalReplicationState, PdTask, RefreshConfigTask, SnapManager, SplitCheckTask, Transport, }, }; use resource_metering::CollectorRegHandle; @@ -172,6 +172,7 @@ where concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, causal_ts_provider: Option>, // used for rawkv apiv2 + disk_check_runner: DiskCheckRunner, grpc_service_mgr: GrpcServiceManager, safe_point: Arc, ) -> Result<()> @@ -211,6 +212,7 @@ where concurrency_manager, collector_reg_handle, causal_ts_provider, + disk_check_runner, grpc_service_mgr, safe_point, )?; @@ -460,6 +462,7 @@ where concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, causal_ts_provider: Option>, // used for rawkv apiv2 + disk_check_runner: DiskCheckRunner, grpc_service_mgr: GrpcServiceManager, safe_point: Arc, ) -> Result<()> @@ -495,6 +498,7 @@ where collector_reg_handle, self.health_controller.clone(), causal_ts_provider, + disk_check_runner, grpc_service_mgr, safe_point, )?; diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 003f9851642..7c39487d4dd 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -16,7 +16,7 @@ use raftstore::{ store::{ config::{Config, RaftstoreConfigManager}, fsm::{StoreMeta, *}, - AutoSplitController, SnapManager, StoreMsg, Transport, + AutoSplitController, DiskCheckRunner, SnapManager, StoreMsg, Transport, }, Result, }; @@ -114,6 +114,7 @@ fn start_raftstore( CollectorRegHandle::new_for_test(), HealthController::new(), None, + DiskCheckRunner::dummy(), GrpcServiceManager::dummy(), Arc::new(AtomicU64::new(0)), ) diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 99ad19c21c1..e994b7cfc68 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -14,7 +14,10 @@ use health_controller::HealthController; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb::RegionLocalState}; use raftstore::{ coprocessor::CoprocessorHost, - store::{bootstrap_store, fsm, fsm::store::StoreMeta, AutoSplitController, SnapManager}, + store::{ + bootstrap_store, fsm, fsm::store::StoreMeta, AutoSplitController, DiskCheckRunner, + SnapManager, + }, }; use raftstore_v2::router::PeerMsg; use resource_metering::CollectorRegHandle; @@ -122,6 +125,7 @@ fn test_node_bootstrap_with_prepared_data() { ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), None, + DiskCheckRunner::dummy(), GrpcServiceManager::dummy(), Arc::new(AtomicU64::new(0)), ) diff --git a/tests/integrations/raftstore/test_status_command.rs b/tests/integrations/raftstore/test_status_command.rs index 37e78de3d50..0d42c1ec869 100644 --- a/tests/integrations/raftstore/test_status_command.rs +++ b/tests/integrations/raftstore/test_status_command.rs @@ -1,11 +1,11 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use health_controller::types::LatencyInspector; +use health_controller::types::{InspectFactor, LatencyInspector}; use raftstore::store::msg::StoreMsg as StoreMsgV1; use raftstore_v2::router::StoreMsg as StoreMsgV2; use test_raftstore::Simulator as S1; use test_raftstore_v2::Simulator as S2; -use tikv_util::{time::Instant, HandyRwLock}; +use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; #[test] fn test_region_detail() { @@ -33,6 +33,7 @@ fn test_region_detail() { fn test_latency_inspect() { let mut cluster_v1 = test_raftstore::new_node_cluster(0, 1); cluster_v1.cfg.raft_store.store_io_pool_size = 2; + cluster_v1.cfg.raft_store.inspect_kvdb_interval = ReadableDuration::millis(500); cluster_v1.run(); let mut cluster_v2 = test_raftstore_v2::new_node_cluster(0, 1); cluster_v2.run(); @@ -43,19 +44,24 @@ fn test_latency_inspect() { { // Test send LatencyInspect to V1. let (tx, rx) = std::sync::mpsc::sync_channel(10); - let inspector = LatencyInspector::new( - 1, - Box::new(move |_, duration| { - let dur = duration.sum(); - tx.send(dur).unwrap(); - }), - ); - let msg = StoreMsgV1::LatencyInspect { - send_time: Instant::now(), - inspector, - }; - router_v1.send_control(msg).unwrap(); - rx.recv_timeout(std::time::Duration::from_secs(2)).unwrap(); + // Inspect different factors. + for factor in [InspectFactor::RaftDisk, InspectFactor::KvDisk].iter() { + let cloned_tx = tx.clone(); + let inspector = LatencyInspector::new( + 1, + Box::new(move |_, duration| { + let dur = duration.sum(); + cloned_tx.send(dur).unwrap(); + }), + ); + let msg = StoreMsgV1::LatencyInspect { + factor: *factor, + send_time: Instant::now(), + inspector, + }; + router_v1.send_control(msg).unwrap(); + rx.recv_timeout(std::time::Duration::from_secs(2)).unwrap(); + } } { // Test send LatencyInspect to V2. @@ -83,17 +89,22 @@ fn test_sync_latency_inspect() { cluster.run(); let router = cluster.sim.wl().get_router(1).unwrap(); let (tx, rx) = std::sync::mpsc::sync_channel(10); - let inspector = LatencyInspector::new( - 1, - Box::new(move |_, duration| { - let dur = duration.sum(); - tx.send(dur).unwrap(); - }), - ); - let msg = StoreMsgV1::LatencyInspect { - send_time: Instant::now(), - inspector, - }; - router.send_control(msg).unwrap(); - rx.recv_timeout(std::time::Duration::from_secs(2)).unwrap(); + // Inspect different factors. + for factor in [InspectFactor::RaftDisk, InspectFactor::KvDisk].iter() { + let cloned_tx = tx.clone(); + let inspector = LatencyInspector::new( + 1, + Box::new(move |_, duration| { + let dur = duration.sum(); + cloned_tx.send(dur).unwrap(); + }), + ); + let msg = StoreMsgV1::LatencyInspect { + factor: *factor, + send_time: Instant::now(), + inspector, + }; + router.send_control(msg).unwrap(); + rx.recv_timeout(std::time::Duration::from_secs(2)).unwrap(); + } } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 3fc08306688..52eb3563dff 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -30,7 +30,7 @@ use pd_client::PdClient; use raft::eraftpb; use raftstore::{ coprocessor::CoprocessorHost, - store::{fsm::store::StoreMeta, AutoSplitController, SnapManager}, + store::{fsm::store::StoreMeta, AutoSplitController, DiskCheckRunner, SnapManager}, }; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; @@ -1411,6 +1411,7 @@ fn test_double_run_node() { ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), None, + DiskCheckRunner::dummy(), GrpcServiceManager::dummy(), Arc::new(AtomicU64::new(0)), ) From a6ec27d6b91a6bf9344e6da9e208a34f0b104664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 3 Dec 2024 11:23:41 +0800 Subject: [PATCH 38/86] added `compact-log-bakcup` to `tikv-ctl` (#17845) close tikv/tikv#17844 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 3 + cmd/tikv-ctl/Cargo.toml | 3 + cmd/tikv-ctl/src/cmd.rs | 61 ++++- cmd/tikv-ctl/src/main.rs | 130 ++++++++- cmd/tikv-ctl/src/util.rs | 8 +- .../compact-log-backup/src/compaction/exec.rs | 4 +- .../compact-log-backup/src/compaction/meta.rs | 4 +- .../compact-log-backup/src/execute/mod.rs | 14 +- components/compact-log-backup/src/storage.rs | 29 +- src/server/status_server/lite.rs | 248 ++++++++++++++++++ src/server/status_server/mod.rs | 54 ++-- 11 files changed, 522 insertions(+), 36 deletions(-) create mode 100644 src/server/status_server/lite.rs diff --git a/Cargo.lock b/Cargo.lock index 806a892f76f..59324a44902 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7114,9 +7114,11 @@ name = "tikv-ctl" version = "0.0.1" dependencies = [ "api_version", + "base64 0.13.0", "cc", "clap 2.33.0", "collections", + "compact-log-backup", "crypto", "encryption_export", "engine_rocks", @@ -7144,6 +7146,7 @@ dependencies = [ "slog", "slog-global", "structopt", + "tempfile", "tikv", "tikv_util", "time 0.1.43", diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index fc5865eece2..1896634d2b2 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -33,8 +33,10 @@ nortcheck = ["engine_rocks/nortcheck"] [dependencies] api_version = { workspace = true } +base64 = "0.13.0" clap = { workspace = true } collections = { workspace = true } +compact-log-backup = { workspace = true } crypto = { workspace = true } encryption_export = { workspace = true } engine_rocks = { workspace = true } @@ -62,6 +64,7 @@ server = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } structopt = "0.3" +tempfile = "3.0" tikv = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "time"] } diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 1fafa33f5a7..d11c76c4090 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -3,7 +3,7 @@ use std::{borrow::ToOwned, str, string::ToString, sync::LazyLock, u64}; use clap::{crate_authors, AppSettings}; -use engine_traits::CF_DEFAULT; +use engine_traits::{SstCompressionType, CF_DEFAULT}; use structopt::StructOpt; const RAW_KEY_HINT: &str = "Raw key (generally starts with \"z\") in escaped form"; @@ -30,6 +30,9 @@ pub struct Opt { /// Set the log level pub log_level: String, + #[structopt(long, default_value = "text")] + pub log_format: String, + #[structopt(long)] /// Set the remote host pub host: Option, @@ -627,6 +630,62 @@ pub enum Cmd { /// hex end key end: String, }, + CompactLogBackup { + #[structopt( + short, + long, + default_value = "compaction", + help( + "name of the compaction, register this will help you find the compaction easier." + ) + )] + name: String, + #[structopt(long = "from", help("from when we need to compact."))] + from_ts: u64, + #[structopt( + long = "until", + help( + "until when we need to compact. \ + Also note that records out of the [--from, --until) range may also be compacted \ + if their neighbour in the same file needs to be compacted." + ) + )] + until_ts: u64, + #[structopt( + short = "N", + long = "concurrency", + default_value = "32", + help("how many compactions can be executed concurrently.") + )] + max_concurrent_compactions: u64, + #[structopt( + short = "s", + long = "storage-base64", + help( + "the base-64 encoded protocol buffer message `StorageBackend`. \ + `br` CLI should provide a subcommand that converts an URL to it." + ) + )] + storage_base64: String, + #[structopt( + long, + default_value = "lz4", + help( + "the compression method will use when generating SSTs. (hint: zstd | lz4 | snappy)" + ) + )] + compression: SstCompressionType, + #[structopt( + long, + help( + "the compression level. it definition and effect varies by the algorithm we choose." + ) + )] + compression_level: Option, + + #[structopt(long, help("Don't try to skip already finished compactions."))] + force_regenerate: bool, + }, /// Get the state of a region's RegionReadProgress. GetRegionReadProgress { #[structopt(short = "r", long)] diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 6fb558e7601..eeb8bb63f0a 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -20,17 +20,19 @@ use std::{ }; use collections::HashMap; +use compact_log_backup::{exec_hooks as compact_log_hooks, execute as compact_log, TraceResultExt}; use crypto::fips; use encryption_export::{ create_backend, data_key_manager_from_config, DataKeyManager, DecrypterReader, Iv, }; -use engine_rocks::get_env; +use engine_rocks::{get_env, util::new_engine_opt, RocksEngine}; use engine_traits::Peekable; use file_system::calc_crc32; use futures::{executor::block_on, future::try_join_all}; use gag::BufferRedirect; use grpcio::{CallOption, ChannelBuilder, Environment}; use kvproto::{ + brpb, debugpb::{Db as DbType, *}, encryptionpb::EncryptionMethod, kvrpcpb::SplitRegionRequest, @@ -45,6 +47,7 @@ use raftstore::store::util::build_key_range; use regex::Regex; use security::{SecurityConfig, SecurityManager}; use structopt::{clap::ErrorKind, StructOpt}; +use tempfile::TempDir; use tikv::{ config::TikvConfig, server::{debug::BottommostLevelCompaction, KvEngineFactoryBuilder}, @@ -67,7 +70,7 @@ fn main() { let opt = Opt::from_args(); // Initialize logger. - init_ctl_logger(&opt.log_level); + init_ctl_logger(&opt.log_level, &opt.log_format); // Print OpenSSL FIPS mode status. fips::log_status(); @@ -385,6 +388,92 @@ fn main() { end_key, ); } + Cmd::CompactLogBackup { + from_ts, + until_ts, + max_concurrent_compactions: max_compaction_num, + storage_base64, + compression, + compression_level, + name, + force_regenerate, + } => { + let tmp_engine = + TemporaryRocks::new(&cfg).expect("failed to create temp engine for writing SSTs."); + let maybe_external_storage = base64::decode(storage_base64) + .map_err(|err| format!("cannot parse base64: {}", err)) + .and_then(|storage_bytes| { + let mut ext_storage = brpb::StorageBackend::new(); + ext_storage + .merge_from_bytes(&storage_bytes) + .map_err(|err| format!("cannot parse bytes as StorageBackend: {}", err))?; + Result::Ok(ext_storage) + }); + let external_storage = match maybe_external_storage { + Ok(s) => s, + Err(err) => { + clap::Error { + message: format!("(-s, --storage-base64) is invalid: {:?}", err), + kind: ErrorKind::InvalidValue, + info: None, + } + .exit(); + } + }; + let ccfg = compact_log::ExecutionConfig { + from_ts, + until_ts, + compression, + compression_level, + }; + let exec = compact_log::Execution { + out_prefix: ccfg.recommended_prefix(&name), + cfg: ccfg, + max_concurrent_subcompaction: max_compaction_num, + external_storage, + db: Some(tmp_engine.rocks), + }; + + use tikv::server::status_server::lite::Server as StatusServerLite; + struct ExportTiKVInfo { + cfg: TikvConfig, + } + impl compact_log::hooking::ExecHooks for ExportTiKVInfo { + async fn before_execution_started( + &mut self, + cx: compact_log::hooking::BeforeStartCtx<'_>, + ) -> compact_log_backup::Result<()> { + use compact_log_backup::OtherErrExt; + tikv_util::info!("Welcome to TiKV control: compact log backup."); + tikv_util::info!("TiKV version info."; "info_string" => tikv::tikv_version_info(None)); + + let srv = StatusServerLite::new(Arc::new(self.cfg.security.clone())); + let _enter = cx.async_rt.enter(); + let hnd = srv + .start(&self.cfg.server.status_addr) + .adapt_err() + .annotate("failed to start status server lite")?; + tikv_util::info!("Started status server lite."; "at" => %hnd.address()); + Ok(()) + } + } + + let log_to_term = compact_log_hooks::observability::Observability::default(); + let save_meta = compact_log_hooks::save_meta::SaveMeta::default(); + let with_lock = compact_log_hooks::consistency::StorageConsistencyGuard::default(); + let with_status_server = ExportTiKVInfo { cfg: cfg.clone() }; + let checkpoint = if force_regenerate { + None + } else { + Some(compact_log_hooks::checkpoint::Checkpoint::default()) + }; + let hooks = ( + ((log_to_term, checkpoint), with_status_server), + (save_meta, with_lock), + ); + exec.run(hooks) + .expect("failed to execute compact-log-backup") + } // Commands below requires either the data dir or the host. cmd => { let data_dir = opt.data_dir.as_deref(); @@ -1080,6 +1169,43 @@ fn read_fail_file(path: &str) -> Vec<(String, String)> { list } +/// A temporary RocksDB instance. +/// Its content will be saved at a temp dir, so don't put too many stuffs into +/// it. The configurations are loaded to this instance, so it can be used for +/// constructing / reading SST files. +struct TemporaryRocks { + rocks: RocksEngine, + #[allow(dead_code)] + tmp: TempDir, +} + +impl TemporaryRocks { + fn new(cfg: &TikvConfig) -> Result { + let tmp = TempDir::new().map_err(|v| format!("failed to create tmp dir: {}", v))?; + let opt = build_rocks_opts(cfg); + let cf_opts = cfg.rocksdb.build_cf_opts( + &cfg.rocksdb + .build_cf_resources(cfg.storage.block_cache.build_shared_cache()), + None, + cfg.storage.api_version(), + None, + cfg.storage.engine, + ); + let rocks = new_engine_opt( + tmp.path().to_str().ok_or_else(|| { + format!( + "temp path isn't valid utf-8 string: {}", + tmp.path().display() + ) + })?, + opt, + cf_opts, + ) + .map_err(|v| format!("failed to build engine: {}", v))?; + Ok(Self { rocks, tmp }) + } +} + fn build_rocks_opts(cfg: &TikvConfig) -> engine_rocks::RocksDbOptions { let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .unwrap() diff --git a/cmd/tikv-ctl/src/util.rs b/cmd/tikv-ctl/src/util.rs index 6d17ba67652..90322b6000d 100644 --- a/cmd/tikv-ctl/src/util.rs +++ b/cmd/tikv-ctl/src/util.rs @@ -5,15 +5,21 @@ use std::{borrow::ToOwned, error::Error, str, str::FromStr, u64}; use kvproto::kvrpcpb::KeyRange; use server::setup::initial_logger; use tikv::config::TikvConfig; +use tikv_util::config::LogFormat; const LOG_DIR: &str = "./ctl-engine-info-log"; #[allow(clippy::field_reassign_with_default)] -pub fn init_ctl_logger(level: &str) { +pub fn init_ctl_logger(level: &str, format: &str) { let mut cfg = TikvConfig::default(); cfg.log.level = slog::Level::from_str(level).unwrap().into(); cfg.rocksdb.info_log_dir = LOG_DIR.to_owned(); cfg.raftdb.info_log_dir = LOG_DIR.to_owned(); + cfg.log.format = match format { + "json" => LogFormat::Json, + "text" => LogFormat::Text, + fmt => panic!("unknown log format {}", fmt), + }; initial_logger(&cfg); } diff --git a/components/compact-log-backup/src/compaction/exec.rs b/components/compact-log-backup/src/compaction/exec.rs index 3fd14e480f2..14d47f15f3b 100644 --- a/components/compact-log-backup/src/compaction/exec.rs +++ b/components/compact-log-backup/src/compaction/exec.rs @@ -23,7 +23,7 @@ use crate::{ errors::{OtherErrExt, Result, TraceResultExt}, source::{Record, Source}, statistic::{prom::*, LoadStatistic, SubcompactStatistic}, - storage::COMPACTION_OUT_PREFIX, + storage::DEFAULT_COMPACTION_OUT_PREFIX, util::{self, Cooperate, ExecuteAllExt}, }; @@ -80,7 +80,7 @@ impl From> for SubcompactionExec { output: value.storage, out_prefix: value .out_prefix - .unwrap_or_else(|| Path::new(COMPACTION_OUT_PREFIX).to_owned()), + .unwrap_or_else(|| Path::new(DEFAULT_COMPACTION_OUT_PREFIX).to_owned()), db: value.db, co: Default::default(), diff --git a/components/compact-log-backup/src/compaction/meta.rs b/components/compact-log-backup/src/compaction/meta.rs index 75e53f18750..62d500528a7 100644 --- a/components/compact-log-backup/src/compaction/meta.rs +++ b/components/compact-log-backup/src/compaction/meta.rs @@ -239,12 +239,12 @@ impl CompactionRunInfoBuilder { pub async fn write_migration(&self, s: &dyn ExternalStorage) -> Result<()> { let migration = self.migration_of(self.find_expiring_files(s).await?); let wrapped_storage = MigartionStorageWrapper::new(s); - wrapped_storage.write(migration).await?; + wrapped_storage.write(migration.into()).await?; Ok(()) } pub fn migration_of(&self, metas: Vec) -> brpb::Migration { - let mut migration = brpb::Migration::new(); + let mut migration = brpb::Migration::default(); for files in metas { let mut medit = brpb::MetaEdit::new(); medit.set_path(files.meta_path.to_string()); diff --git a/components/compact-log-backup/src/execute/mod.rs b/components/compact-log-backup/src/execute/mod.rs index d1eff289d44..612a6c78f7a 100644 --- a/components/compact-log-backup/src/execute/mod.rs +++ b/components/compact-log-backup/src/execute/mod.rs @@ -36,6 +36,8 @@ use crate::{ util, ErrorKind, }; +const COMPACTION_V1_PREFIX: &str = "v1/compactions"; + /// The config for an execution of a compaction. /// /// This structure itself fully defines what work the compaction need to do. @@ -99,7 +101,12 @@ impl ExecutionConfig { hasher.write(&util::compression_type_to_u8(self.compression).to_le_bytes()); hasher.write(&self.compression_level.unwrap_or(0).to_le_bytes()); - format!("{}_{}", name, util::aligned_u64(hasher.sum64())) + format!( + "{}/{}_{}", + COMPACTION_V1_PREFIX, + name, + util::aligned_u64(hasher.sum64()) + ) } } @@ -112,7 +119,10 @@ pub struct Execution { pub max_concurrent_subcompaction: u64, /// The external storage for input and output. pub external_storage: StorageBackend, - /// The RocksDB instance for generating SST. + /// The RocksDB instance for creating `SstWriter`. + /// By design little or no data will be written to the instance, for now + /// this is only used for loading the user collected properties + /// configuration. pub db: Option, /// The prefix of the artifices. pub out_prefix: String, diff --git a/components/compact-log-backup/src/storage.rs b/components/compact-log-backup/src/storage.rs index 24311d69b45..2e55a831e42 100644 --- a/components/compact-log-backup/src/storage.rs +++ b/components/compact-log-backup/src/storage.rs @@ -37,7 +37,7 @@ use super::{ use crate::{compaction::EpochHint, errors::ErrorKind, util}; pub const METADATA_PREFIX: &str = "v1/backupmeta"; -pub const COMPACTION_OUT_PREFIX: &str = "compaction_out"; +pub const DEFAULT_COMPACTION_OUT_PREFIX: &str = "v1/compaction_out"; pub const MIGRATION_PREFIX: &str = "v1/migrations"; pub const LOCK_PREFIX: &str = "v1/LOCK"; @@ -535,6 +535,29 @@ impl LogFile { } } +#[derive(derive_more::Deref, derive_more::DerefMut, Debug)] +/// A migration with version and creator info. +/// Preferring use this instead of directly create `Migration`. +pub struct VersionedMigration(Migration); + +impl From for VersionedMigration { + fn from(mut mig: Migration) -> Self { + mig.set_version(brpb::MigrationVersion::M1); + mig.set_creator(format!( + "tikv;commit={};branch={}", + option_env!("TIKV_BUILD_GIT_HASH").unwrap_or("UNKNOWN"), + option_env!("TIKV_BUILD_GIT_BRANCH").unwrap_or("UNKNOWN"), + )); + Self(mig) + } +} + +impl Default for VersionedMigration { + fn default() -> Self { + Self::from(Migration::default()) + } +} + pub struct MigartionStorageWrapper<'a> { storage: &'a dyn ExternalStorage, migartions_prefix: &'a str, @@ -548,8 +571,10 @@ impl<'a> MigartionStorageWrapper<'a> { } } - pub async fn write(&self, migration: Migration) -> Result<()> { + pub async fn write(&self, migration: VersionedMigration) -> Result<()> { use protobuf::Message; + + let migration = migration.0; let id = self.largest_id().await?; // Note: perhaps we need to verify that there isn't concurrency writing in the // future. diff --git a/src/server/status_server/lite.rs b/src/server/status_server/lite.rs new file mode 100644 index 00000000000..68ca68cf3c5 --- /dev/null +++ b/src/server/status_server/lite.rs @@ -0,0 +1,248 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +//! A striped version for the status server. It supports a subset of the status +//! server. Basically, it exports the mertrics and process information. But +//! won't provide TiKV server related stuffs, like reloading config or dump +//! region info. +//! +//! This will be used to improve the of obserbility some short-term tasks of +//! `tikv-ctl`. + +use std::{error::Error as StdError, net::SocketAddr, str::FromStr, sync::Arc}; + +use futures_util::future::TryFutureExt; +use http::{Method, Request, Response, StatusCode}; +use hyper::{ + server::{accept::Accept, conn::AddrIncoming}, + service::service_fn, + Body, Server as HyperSrv, +}; +use openssl::x509::X509; +use security::SecurityConfig; +use tokio::io::{AsyncRead, AsyncWrite}; + +use super::{make_response, tls_incoming, StatusServer}; +use crate::server::Result; + +/// Svc is a type alias that help us to call static methods in the full status +/// server. +type Svc = StatusServer<()>; + +/// Server manages how we accept the incoming requests and how we handle them. +/// +/// After creating and configurating this, you may use [`start`] to start +/// serving and detach this. You can control the server then by the [`Handle`]. +pub struct Server { + security_config: Arc, +} + +/// Handle is the controller for the sever. +pub struct Handle { + addr: SocketAddr, +} + +impl Handle { + /// Return the bound address of the server. + pub fn address(&self) -> &SocketAddr { + &self.addr + } +} + +impl Server { + pub fn new(sec: Arc) -> Self { + Server { + security_config: sec, + } + } + + /// Start the server. + /// + /// # Panic + /// + /// This must be run in a tokio context. Or this will panic. + pub fn start(self, status_addr: &str) -> Result { + let addr = SocketAddr::from_str(status_addr)?; + + let incoming = AddrIncoming::bind(&addr)?; + let hnd = Handle { + addr: incoming.local_addr(), + }; + if !self.security_config.cert_path.is_empty() + && !self.security_config.key_path.is_empty() + && !self.security_config.ca_path.is_empty() + { + let tls_incoming = tls_incoming(self.security_config.clone(), incoming)?; + let server = HyperSrv::builder(tls_incoming); + self.start_serve(server); + } else { + let server = HyperSrv::builder(incoming); + self.start_serve(server); + } + Ok(hnd) + } + + fn start_serve(self, builder: hyper::server::Builder) + where + I: Accept + Send + 'static, + I::Error: Into>, + I::Conn: AsyncRead + AsyncWrite + Unpin + Send + 'static, + C: super::ServerConnection, + { + let mut svc = LiteService; + + let server = builder.serve(super::make_service_fn(move |conn: &C| { + let client_cert = conn.get_x509(); + let security = self.security_config.clone(); + std::future::ready(hyper::Result::Ok(service_fn(move |req| { + let client_cert = client_cert.clone(); + let security = security.clone(); + async move { + svc.call(RequestCtx { + req, + client_cert, + security, + }) + .await + } + }))) + })); + + let svc = + server.map_err(|err| warn!("status server lite encountered error"; "err" => %err)); + tokio::spawn(svc); + } +} + +/// A light-weighted status server for batch tasks without full bootstraped +/// TiKV in `tikv-ctl`. +/// +/// This exports a subset of the status, including: +/// - metrics +/// - CPU / memory profiling +/// - async tasks tracing +#[derive(Copy, Clone)] +pub struct LiteService; + +struct RequestCtx { + req: Request, + client_cert: Option, + security: Arc, +} + +impl LiteService { + async fn call(&mut self, cx: RequestCtx) -> std::result::Result, hyper::Error> { + let path = cx.req.uri().path().to_owned(); + let method = cx.req.method().to_owned(); + + let should_check_cert = !matches!( + (&method, path.as_ref()), + (&Method::GET, "/metrics") | (&Method::GET, "/debug/pprof/profile") + ); + + if should_check_cert && !super::check_cert(cx.security, cx.client_cert) { + return Ok(make_response( + StatusCode::FORBIDDEN, + "certificate role error", + )); + } + + match (&method, path.as_str()) { + (&Method::GET, "/metrics") => Svc::metrics_to_resp(cx.req, true), + (&Method::GET, "/debug/pprof/profile") => Svc::dump_cpu_prof_to_resp(cx.req).await, + (&Method::GET, "/async_tasks") => Svc::dump_async_trace(), + (&Method::GET, "/debug/pprof/heap") => Svc::dump_heap_prof_to_resp(cx.req), + _ => Ok(make_response(StatusCode::NOT_FOUND, "path not found")), + } + } +} + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use hyper::{Body, Request, StatusCode}; + use security::SecurityConfig; + + use super::*; + + impl super::Server { + fn insecure() -> Self { + Self::new(Arc::default()) + } + } + + #[tokio::test] + async fn test_server_start_insecure() { + let server = Server::insecure(); + let handle = server.start("127.0.0.1:0").unwrap(); + assert!(handle.address().is_ipv4()); + } + + #[tokio::test] + async fn test_lite_service_call_metrics() { + let test_series = prometheus::register_counter!( + "tikv_test_series", + "This is used for testing that the series was registered.", + ) + .unwrap(); + test_series.inc(); + + let mut service = LiteService; + let req = Request::builder() + .method("GET") + .uri("/metrics") + .body(Body::empty()) + .unwrap(); + let ctx = RequestCtx { + req, + client_cert: None, + security: Arc::new(SecurityConfig::default()), + }; + let resp = service.call(ctx).await.unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + let body = hyper::body::to_bytes(resp.into_body()).await.unwrap(); + let body = String::from_utf8(body.to_vec()).unwrap(); + assert!(body.contains("tikv_test_series 1"), "it is \n{}", body); + } + + #[tokio::test] + async fn test_lite_service_call_profile() { + let mut service = LiteService; + let req = Request::builder() + .method("GET") + .uri("/debug/pprof/profile?seconds=1") + .body(Body::empty()) + .unwrap(); + let ctx = RequestCtx { + req, + client_cert: None, + security: Arc::new(SecurityConfig::default()), + }; + let resp = service.call(ctx).await.unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!( + resp.headers() + .get("content-type") + .map(|h| h.to_str().unwrap()), + Some(mime::IMAGE_SVG.as_ref()) + ); + } + + #[tokio::test] + async fn test_lite_service_call_not_found() { + let mut service = LiteService; + let req = Request::builder() + .method("GET") + .uri("/not-found") + .body(Body::empty()) + .unwrap(); + let ctx = RequestCtx { + req, + client_cert: None, + security: Arc::new(SecurityConfig::default()), + }; + let resp = service.call(ctx).await.unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } +} diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 5c20befb2fe..ccd763803d2 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -4,6 +4,8 @@ mod metrics; /// Provides profilers for TiKV. mod profile; +pub mod lite; + use std::{ env::args, error::Error as StdError, @@ -457,13 +459,8 @@ where pub fn listening_addr(&self) -> SocketAddr { self.addr.unwrap() } -} -impl StatusServer -where - R: 'static + Send + RaftExtension + Clone, -{ - fn dump_async_trace() -> hyper::Result> { + pub fn dump_async_trace() -> hyper::Result> { Ok(make_response( StatusCode::OK, tracing_active_tree::layer::global().fmt_bytes_with(|t, buf| { @@ -474,6 +471,32 @@ where )) } + fn metrics_to_resp(req: Request, should_simplify: bool) -> hyper::Result> { + let gz_encoding = client_accept_gzip(&req); + let metrics = if gz_encoding { + // gzip can reduce the body size to less than 1/10. + let mut encoder = GzEncoder::new(vec![], Compression::default()); + dump_to(&mut encoder, should_simplify); + encoder.finish().unwrap() + } else { + dump(should_simplify).into_bytes() + }; + let mut resp = Response::new(metrics.into()); + resp.headers_mut() + .insert(CONTENT_TYPE, HeaderValue::from_static(TEXT_FORMAT)); + if gz_encoding { + resp.headers_mut() + .insert(CONTENT_ENCODING, HeaderValue::from_static("gzip")); + } + + Ok(resp) + } +} + +impl StatusServer +where + R: 'static + Send + RaftExtension + Clone, +{ fn handle_pause_grpc( mut grpc_service_mgr: GrpcServiceManager, ) -> hyper::Result> { @@ -585,24 +608,7 @@ where mgr: &ConfigController, ) -> hyper::Result> { let should_simplify = mgr.get_current().server.simplify_metrics; - let gz_encoding = client_accept_gzip(&req); - let metrics = if gz_encoding { - // gzip can reduce the body size to less than 1/10. - let mut encoder = GzEncoder::new(vec![], Compression::default()); - dump_to(&mut encoder, should_simplify); - encoder.finish().unwrap() - } else { - dump(should_simplify).into_bytes() - }; - let mut resp = Response::new(metrics.into()); - resp.headers_mut() - .insert(CONTENT_TYPE, HeaderValue::from_static(TEXT_FORMAT)); - if gz_encoding { - resp.headers_mut() - .insert(CONTENT_ENCODING, HeaderValue::from_static("gzip")); - } - - Ok(resp) + Self::metrics_to_resp(req, should_simplify) } fn start_serve(&mut self, builder: HyperBuilder) From 81941fc5a0c33423c39781d2bf9e15206de8ce0e Mon Sep 17 00:00:00 2001 From: Ling Jin <7138436+3AceShowHand@users.noreply.github.com> Date: Tue, 3 Dec 2024 12:40:05 +0800 Subject: [PATCH 39/86] cdc: Lightning physical import ignore by cdc (#17895) close tikv/tikv#17897 * TiKV-CDC skip the row if the 17th bit of the txn_source is set, which indicate it comes from the Lightning physical import * sst-service initialize the txn_sst_writer also set the txn_source Signed-off-by: 3AceShowHand Signed-off-by: Ling Jin <7138436+3AceShowHand@users.noreply.github.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: Neil Shen --- components/cdc/src/delegate.rs | 13 ++++++-- components/cdc/src/initializer.rs | 7 +++++ components/cdc/src/txn_source.rs | 33 +++++++++++++++++---- components/sst_importer/src/sst_importer.rs | 12 ++++++-- components/sst_importer/src/sst_writer.rs | 25 ++++++++++++++-- src/import/sst_service.rs | 7 +++-- 6 files changed, 82 insertions(+), 15 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 1fe747978cd..6672e6e6137 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -814,7 +814,8 @@ impl Delegate { row_size = 0; } } - if TxnSource::is_lossy_ddl_reorg_source_set(row.txn_source) + if TxnSource::is_lightning_physical_import(row.txn_source) + || TxnSource::is_lossy_ddl_reorg_source_set(row.txn_source) || filter_loop && TxnSource::is_cdc_write_source_set(row.txn_source) { continue; @@ -970,7 +971,8 @@ impl Delegate { } } - if TxnSource::is_lossy_ddl_reorg_source_set(v.txn_source) + if TxnSource::is_lightning_physical_import(v.txn_source) + || TxnSource::is_lossy_ddl_reorg_source_set(v.txn_source) || downstream.filter_loop && TxnSource::is_cdc_write_source_set(v.txn_source) { continue; @@ -1822,6 +1824,13 @@ mod tests { test_downstream_txn_source_filter(txn_source, true); } + #[test] + fn test_downstream_filter_lightning_physical_import() { + let mut txn_source = TxnSource::default(); + txn_source.set_lightning_physical_import(); + test_downstream_txn_source_filter(txn_source, false); + } + #[test] fn test_downstream_filter_lossy_ddl_entires() { let mut txn_source = TxnSource::default(); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index c38cb850f48..7017cf36ba3 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -909,6 +909,13 @@ mod tests { test_initializer_txn_source_filter(txn_source, true); } + #[test] + fn test_initializer_lightning_physical_import_filter() { + let mut txn_source = TxnSource::default(); + txn_source.set_lightning_physical_import(); + test_initializer_txn_source_filter(txn_source, false); + } + #[test] fn test_initializer_lossy_ddl_filter() { let mut txn_source = TxnSource::default(); diff --git a/components/cdc/src/txn_source.rs b/components/cdc/src/txn_source.rs index 81dc9f95096..5cec5acf75d 100644 --- a/components/cdc/src/txn_source.rs +++ b/components/cdc/src/txn_source.rs @@ -1,8 +1,8 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. // The bitmap: -// |RESERVED|LOSSY_DDL_REORG_SOURCE_BITS|CDC_WRITE_SOURCE_BITS| -// | 48 | 8 | 4(RESERVED) | 4 | +// |RESERVED|LIGHTNING_PHYSICAL_IMPORT_BIT|LOSSY_DDL_REORG_SOURCE_BITS|CDC_WRITE_SOURCE_BITS| +// | 47 | 1 | 8 | 4(RESERVED) | 4 | // // TiCDC uses 1 - 255 to indicate the source of TiDB. // For now, 1 - 15 are reserved for TiCDC to implement BDR synchronization. @@ -20,11 +20,15 @@ const LOSSY_DDL_COLUMN_REORG_SOURCE: u64 = 1; const LOSSY_DDL_REORG_SOURCE_MAX: u64 = (1 << LOSSY_DDL_REORG_SOURCE_BITS) - 1; const LOSSY_DDL_REORG_SOURCE_SHIFT: u64 = CDC_WRITE_SOURCE_BITS; +// The 17th bit is reserved for the lightning physical import mode. +const LIGHTNING_PHYSICAL_IMPORT_SHIFT: u64 = 16; + /// For kv.TxnSource /// We use an uint64 to represent the source of a transaction. -/// The first 8 bits are reserved for TiCDC, and the next 8 bits are reserved -/// for Lossy DDL reorg Backfill job. The remaining 48 bits are reserved for -/// extendability. +/// The first 8 bits are reserved for TiCDC. +/// The second 8 bits are reserved for Lossy DDL reorg Backfill job. +/// The 17th bit is reserved for the lightning physical import mode. +/// The remaining 47 bits are reserved for extendability. #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] pub(crate) struct TxnSource(u64); @@ -62,6 +66,15 @@ impl TxnSource { pub(crate) fn is_lossy_ddl_reorg_source_set(txn_source: u64) -> bool { (txn_source >> LOSSY_DDL_REORG_SOURCE_SHIFT) != 0 } + + #[cfg(test)] + pub(crate) fn set_lightning_physical_import(&mut self) { + self.0 |= 1 << LIGHTNING_PHYSICAL_IMPORT_SHIFT; + } + + pub(crate) fn is_lightning_physical_import(txn_source: u64) -> bool { + (txn_source & (1 << LIGHTNING_PHYSICAL_IMPORT_SHIFT)) != 0 + } } impl From for u64 { @@ -113,4 +126,14 @@ mod tests { false ); } + + #[test] + fn test_is_lightning_physical_import() { + let mut txn_source = TxnSource::default(); + + assert_eq!(TxnSource::is_lightning_physical_import(txn_source.0), false); + + txn_source.set_lightning_physical_import(); + assert_eq!(TxnSource::is_lightning_physical_import(txn_source.0), true); + } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 70206bba6d2..b40bf442aea 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -1427,7 +1427,12 @@ impl SstImporter { self.dir.list_ssts() } - pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { + pub fn new_txn_writer( + &self, + db: &E, + meta: SstMeta, + txn_source: u64, + ) -> Result> { let mut default_meta = meta.clone(); default_meta.set_cf_name(CF_DEFAULT.to_owned()); let default_path = self.dir.join_for_write(&default_meta)?; @@ -1457,10 +1462,11 @@ impl SstImporter { write_meta, self.key_manager.clone(), self.api_version, + txn_source, )) } - pub fn new_raw_writer(&self, db: &E, mut meta: SstMeta) -> Result> { + pub fn new_raw_writer(&self, db: &E, mut meta: SstMeta, _: u64) -> Result> { meta.set_cf_name(CF_DEFAULT.to_owned()); let default_path = self.dir.join_for_write(&meta)?; let default = E::SstWriterBuilder::new() @@ -3393,7 +3399,7 @@ mod tests { let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); - let mut w = importer.new_txn_writer(&db, meta).unwrap(); + let mut w = importer.new_txn_writer(&db, meta, 0).unwrap(); let mut batch = WriteBatch::default(); let mut pairs = vec![]; diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index 1c6b06902a4..aa1064ae577 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -30,6 +30,7 @@ pub struct TxnSstWriter { write_meta: SstMeta, key_manager: Option>, api_version: ApiVersion, + txn_source: u64, } impl TxnSstWriter { @@ -42,6 +43,7 @@ impl TxnSstWriter { write_meta: SstMeta, key_manager: Option>, api_version: ApiVersion, + txn_source: u64, ) -> Self { TxnSstWriter { default, @@ -56,6 +58,7 @@ impl TxnSstWriter { write_meta, key_manager, api_version, + txn_source, } } @@ -102,6 +105,7 @@ impl TxnSstWriter { KvWrite::new(WriteType::Put, commit_ts, None) } }; + let w = w.set_txn_source(self.txn_source); let write = w.as_ref().to_bytes(); self.write.put(&k, &write)?; self.write_entries += 1; @@ -301,7 +305,7 @@ mod tests { use crate::{Config, SstImporter}; // Return the temp dir path to avoid it drop out of the scope. - fn new_writer, &RocksEngine, SstMeta) -> Result>( + fn new_writer, &RocksEngine, SstMeta, u64) -> Result>( f: F, api_version: ApiVersion, ) -> (W, TempDir) { @@ -314,7 +318,24 @@ mod tests { SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); - (f(&importer, &db, meta).unwrap(), importer_dir) + (f(&importer, &db, meta, 0).unwrap(), importer_dir) + } + + #[test] + fn test_new_txn_writer_with_lightning_txn_source() { + let importer_dir = tempfile::tempdir().unwrap(); + let cfg = Config::default(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); + let db_path = importer_dir.path().join("db"); + let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); + + let mut meta = SstMeta::default(); + meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); + + let writer = SstImporter::new_txn_writer(&importer, &db, meta, 1 << 16); + assert_eq!(writer.unwrap().txn_source, 1 << 16); } #[test] diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index fae821b27aa..7ceeeb6339c 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -551,7 +551,7 @@ macro_rules! impl_write { Ok(r) => r, Err(e) => return (Err(e), Some(rx)), }; - let (meta, resource_limiter) = match first_req { + let (meta, resource_limiter, txn_source) = match first_req { Some(r) => { let limiter = resource_manager.as_ref().and_then(|m| { m.get_background_resource_limiter( @@ -561,8 +561,9 @@ macro_rules! impl_write { r.get_context().get_request_source(), ) }); + let txn_source = r.get_context().get_txn_source(); match r.chunk { - Some($chunk_ty::Meta(m)) => (m, limiter), + Some($chunk_ty::Meta(m)) => (m, limiter, txn_source), _ => return (Err(Error::InvalidChunk), Some(rx)), } } @@ -609,7 +610,7 @@ macro_rules! impl_write { } }; - let writer = match import.$writer_fn(&*tablet, meta) { + let writer = match import.$writer_fn(&*tablet, meta, txn_source) { Ok(w) => w, Err(e) => { error!("build writer failed {:?}", e); From e7a810b8947ba95117d42f45bf14248eea06a5b5 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 3 Dec 2024 21:28:17 +0800 Subject: [PATCH 40/86] in_memory_engine: disable in-memory-engine snapshot for replica read request (#17927) ref tikv/tikv#17018 Temporarily disable ime snapshot for replica read request to workaround the potential bugs that related to ime snapshot. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/raftkv/mod.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 1ff79691846..697ffbe3228 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -606,13 +606,19 @@ where type IMSnap = RegionSnapshot>; type IMSnapshotRes = impl Future> + Send; fn async_in_memory_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::IMSnapshotRes { - async_snapshot(&mut self.router, ctx).map_ok(|region_snap| { + let replica_read = ctx.pb_ctx.get_replica_read(); + async_snapshot(&mut self.router, ctx).map_ok(move |region_snap| { // TODO: Remove replace_snapshot. Taking a snapshot and replacing it // with a new one is a bit confusing. // A better way to build an in-memory snapshot is to return // `HybridEngineSnapshot, RegionCacheMemoryEngine>>;` // so the `replace_snapshot` can be removed. - region_snap.replace_snapshot(move |disk_snap, pinned| { + region_snap.replace_snapshot(move |disk_snap, mut pinned| { + // Disable in-memory-engine snapshot for now as there may be some bugs. + // TODO: we may remove this restriction once we fix the related bug. + if replica_read { + pinned = None; + } HybridEngineSnapshot::from_observed_snapshot(disk_snap, pinned) }) }) From 0659fcc574901dbce94b5ddf6d03d04b539f40f0 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 3 Dec 2024 21:51:35 +0800 Subject: [PATCH 41/86] clippy: Fix cargo-deny install for make clippy (#17929) close tikv/tikv#17928 Fix cargo-deny install for make clippy Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- scripts/deny | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/deny b/scripts/deny index 593f4ce32bd..f1e48771e57 100755 --- a/scripts/deny +++ b/scripts/deny @@ -2,7 +2,7 @@ set -euo pipefail -cargo install cargo-deny@0.14.3 2> /dev/null || echo "Install cargo-deny failed" +cargo install --locked cargo-deny@0.14.3 2> /dev/null || echo "Install cargo-deny failed" cargo deny -V cargo deny fetch all cargo deny check --show-stats From 004490cba39a63036917bdd964fb6f929bf2a1e0 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 5 Dec 2024 18:21:29 +0800 Subject: [PATCH 42/86] *: upgrade pprof to 0.14 to fix https://github.com/rusticata/pcap-parser/issues/46 (#17942) close rusticata/pcap-parser#46, fix rusticata/pcap-parser#46, ref tikv/tikv#15990 upgrade pprof to 0.14 to fix https://github.com/rusticata/pcap-parser/issues/46. Signed-off-by: lucasliang --- Cargo.lock | 48 +++++++++++++++++------- Cargo.toml | 5 ++- components/compact-log-backup/Cargo.toml | 24 +++++++++--- 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59324a44902..4745e6838b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned-vec" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a" +dependencies = [ + "equator", +] + [[package]] name = "allocator-api2" version = "0.2.14" @@ -2188,6 +2197,26 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equator" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea" +dependencies = [ + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -3145,7 +3174,7 @@ dependencies = [ "httpdate", "itoa 1.0.1", "pin-project-lite", - "socket2 0.4.7", + "socket2", "tokio", "tower-service", "tracing", @@ -4604,10 +4633,11 @@ dependencies = [ [[package]] name = "pprof" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb" +checksum = "ebbe2f8898beba44815fdc9e5a4ae9c929e21c5dc29b0c774a15555f7f58d6d0" dependencies = [ + "aligned-vec", "backtrace", "cfg-if 1.0.0", "findshlibs", @@ -6118,16 +6148,6 @@ dependencies = [ "cmake", ] -[[package]] -name = "socket2" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" -dependencies = [ - "libc 0.2.151", - "winapi 0.3.9", -] - [[package]] name = "socket2" version = "0.5.7" @@ -7416,7 +7436,7 @@ dependencies = [ "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.7", + "socket2", "tokio-macros", "windows-sys 0.52.0", ] diff --git a/Cargo.toml b/Cargo.toml index aecf767227a..2bcdc7d9af1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -131,7 +131,7 @@ paste = "1.0" pd_client = { workspace = true } pin-project = "1.0" pnet_datalink = "0.23" -pprof = { version = "0.13", default-features = false, features = [ +pprof = { version = "0.14", default-features = false, features = [ "flamegraph", "protobuf-codec", ] } @@ -244,7 +244,8 @@ members = [ "components/cloud/azure", "components/cloud/gcp", "components/codec", - "components/collections", "components/compact-log-backup", + "components/collections", + "components/compact-log-backup", "components/concurrency_manager", "components/coprocessor_plugin_api", "components/crossbeam-skiplist", diff --git a/components/compact-log-backup/Cargo.toml b/components/compact-log-backup/Cargo.toml index 25f630a5008..65de6a80fff 100644 --- a/components/compact-log-backup/Cargo.toml +++ b/components/compact-log-backup/Cargo.toml @@ -8,7 +8,11 @@ edition = "2021" failpoints = ["fail/failpoints"] [dependencies] -async-compression = { version = "0.4.12", features = ["tokio", "futures-io", "zstd"] } +async-compression = { version = "0.4.12", features = [ + "tokio", + "futures-io", + "zstd", +] } bytes = "1" chrono = { workspace = true } codec = { workspace = true } @@ -28,17 +32,25 @@ keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.4" pin-project = "1.0" -prometheus = { version = "0.13", default-features = false, features = ["nightly"] } +prometheus = { version = "0.13", default-features = false, features = [ + "nightly", +] } protobuf = { version = "2.8", features = ["bytes"] } serde = "1.0" serde_json = "1.0" slog = { workspace = true } -slog-global ={ workspace = true } +slog-global = { workspace = true } thiserror = "1" tidb_query_datatype = { workspace = true } tikv_alloc = { workspace = true } tikv_util = { workspace = true } -tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync", "signal"] } +tokio = { version = "1.5", features = [ + "rt-multi-thread", + "macros", + "time", + "sync", + "signal", +] } tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["compat"] } tracing = { workspace = true } @@ -49,9 +61,9 @@ txn_types = { workspace = true } zstd = "0.11" [dev-dependencies] -pprof = { version = "0.13", default-features = false, features = [ +pprof = { version = "0.14", default-features = false, features = [ "flamegraph", "protobuf-codec", -] } +] } tempdir = "0.3" test_util = { workspace = true } From 5f60963f26dcfedc9973c236122dc54ad03c8893 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Thu, 5 Dec 2024 18:44:26 +0800 Subject: [PATCH 43/86] raftstore: add more duplicate entry check before proposing write command and batching commands (#17899) ref tikv/tikv#16818 Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 72 ++++++++++++++++++++++ src/server/raftkv/mod.rs | 17 ++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 4d5a9a36b2d..11238f3c698 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -201,6 +201,11 @@ where propose_checked: Option, request: Option, callbacks: Vec>, + + // Ref: https://github.com/tikv/tikv/issues/16818. + // Check for duplicate key entries batching proposed commands. + // TODO: remove this field when the cause of issue 16818 is located. + lock_cf_keys: HashSet>, } impl Drop for PeerFsm @@ -439,6 +444,7 @@ where propose_checked: None, request: None, callbacks: vec![], + lock_cf_keys: HashSet::default(), } } @@ -479,6 +485,21 @@ where mut callback, .. } = cmd; + // Ref: https://github.com/tikv/tikv/issues/16818. + // Check for duplicate key entries batching proposed commands. + // TODO: remove this check when the cause of issue 16818 is located. + for req in request.get_requests() { + if req.has_put() && req.get_put().get_cf() == CF_LOCK { + let key = req.get_put().get_key(); + if !self.lock_cf_keys.insert(key.to_vec()) { + panic!( + "found duplicate key in Lock CF PUT request between batched requests. \ + key: {:?}, existing batch request: {:?}, new request to add: {:?}", + key, self.request, request + ); + } + } + } if let Some(batch_req) = self.request.as_mut() { let requests: Vec<_> = request.take_requests().into(); for q in requests { @@ -521,6 +542,7 @@ where self.batch_req_size = 0; self.has_proposed_cb = false; self.propose_checked = None; + self.lock_cf_keys = HashSet::default(); if self.callbacks.len() == 1 { let cb = self.callbacks.pop().unwrap(); return Some((req, cb)); @@ -691,6 +713,22 @@ where continue; } + // Ref: https://github.com/tikv/tikv/issues/16818. + // Check for duplicate key entries within the to be proposed raft cmd. + // TODO: remove this check when the cause of issue 16818 is located. + let mut keys_set = std::collections::HashSet::new(); + for req in cmd.request.get_requests() { + if req.has_put() && req.get_put().get_cf() == CF_LOCK { + let key = req.get_put().get_key(); + if !keys_set.insert(key.to_vec()) { + panic!( + "found duplicate key in Lock CF PUT request, key: {:?}, cmd: {:?}", + key, cmd + ); + } + } + } + let req_size = cmd.request.compute_size(); if self.ctx.cfg.cmd_batch && self.fsm.batch_req_builder.can_batch(&self.ctx.cfg, &cmd.request, req_size) @@ -7659,4 +7697,38 @@ mod tests { let req_size = req.compute_size(); assert!(!builder.can_batch(&cfg, &req, req_size)); } + + #[test] + #[should_panic] + fn test_batch_build_with_duplicate_lock_cf_keys() { + let mut builder = BatchRaftCmdRequestBuilder::::new(); + + // Create first request. + let mut req1 = RaftCmdRequest::default(); + let mut put1 = Request::default(); + let mut put_req1 = PutRequest::default(); + put_req1.set_cf(CF_LOCK.to_string()); + put_req1.set_key(b"key1".to_vec()); + put_req1.set_value(b"value1".to_vec()); + put1.set_cmd_type(CmdType::Put); + put1.set_put(put_req1); + req1.mut_requests().push(put1); + + // Create second request with same key in Lock CF. + let mut req2 = RaftCmdRequest::default(); + let mut put2 = Request::default(); + let mut put_req2 = PutRequest::default(); + put_req2.set_cf(CF_LOCK.to_string()); + put_req2.set_key(b"key1".to_vec()); + put_req2.set_value(b"value2".to_vec()); + put2.set_cmd_type(CmdType::Put); + put2.set_put(put_req2); + req2.mut_requests().push(put2); + + // Add both requests to batch builder, should cause panic. + let size = req1.compute_size(); + builder.add(RaftCommand::new(req1, Callback::None), size); + let size = req2.compute_size(); + builder.add(RaftCommand::new(req2, Callback::None), size); + } } diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 697ffbe3228..3adb5d1381f 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -22,7 +22,7 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; -use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; +use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot, CF_LOCK}; use futures::{future::BoxFuture, task::AtomicWaker, Future, Stream, StreamExt, TryFutureExt}; use hybrid_engine::HybridEngineSnapshot; use in_memory_engine::RegionCacheMemoryEngine; @@ -506,6 +506,21 @@ where } let reqs: Vec = batch.modifies.into_iter().map(Into::into).collect(); + // Ref: https://github.com/tikv/tikv/issues/16818. + // Check for duplicate key entries before proposing commands. + // TODO: remove this check when the cause of issue 16818 is located. + let mut keys_set = std::collections::HashSet::new(); + for req in &reqs { + if req.has_put() && req.get_put().get_cf() == CF_LOCK { + let key = req.get_put().get_key(); + if !keys_set.insert(key.to_vec()) { + panic!( + "found duplicate key in Lock CF PUT request, key: {:?}, extra: {:?}, ctx: {:?}, reqs: {:?}, avoid_batch:{:?}", + key, batch.extra, ctx, reqs, batch.avoid_batch + ); + } + } + } let txn_extra = batch.extra; let mut header = new_request_header(ctx); if batch.avoid_batch { From f3c0995eba8179ea5a2666a80e388fa9461066ad Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 9 Dec 2024 18:24:33 +0800 Subject: [PATCH 44/86] raftstore: do not try to fold replica reads' read index (#17950) close tikv/tikv#17018 Remove the logic that tries to adjust replica reads' read index in `advance_replica_reads`. This optimization is no correct anymore after 1pc/async-commit is introduced because an earlier readIndex request can rely on a higher committed index due to async-commit lock, thus decrease the read index can cause follower read request reading out-dated data. Signed-off-by: glorv --- components/raftstore/src/store/read_queue.rs | 85 +------------------- 1 file changed, 4 insertions(+), 81 deletions(-) diff --git a/components/raftstore/src/store/read_queue.rs b/components/raftstore/src/store/read_queue.rs index bde49b4ed30..161a2357ab4 100644 --- a/components/raftstore/src/store/read_queue.rs +++ b/components/raftstore/src/store/read_queue.rs @@ -289,28 +289,10 @@ impl ReadIndexQueue { if min_changed_offset != usize::MAX { self.ready_cnt = cmp::max(self.ready_cnt, max_changed_offset + 1); } - if max_changed_offset > 0 { - self.fold(min_changed_offset, max_changed_offset); - } - } - - fn fold(&mut self, min_changed_offset: usize, max_changed_offset: usize) { - let mut r_idx = self.reads[max_changed_offset].read_index.unwrap(); - let mut check_offset = max_changed_offset - 1; - loop { - let l_idx = self.reads[check_offset].read_index.unwrap_or(u64::MAX); - if l_idx > r_idx { - self.reads[check_offset].read_index = Some(r_idx); - } else if check_offset < min_changed_offset { - break; - } else { - r_idx = l_idx; - } - if check_offset == 0 { - break; - } - check_offset -= 1; - } + // NOTE: We should not try to fold these read index requests anymore, + // an earlier request can rely a higher committed index due to txn + // lock when 1pc/async-commit is used. + // See https://github.com/tikv/tikv/issues/17018 for more details. } pub fn gc(&mut self) { @@ -507,65 +489,6 @@ mod tests { use super::*; use crate::store::Callback; - #[test] - fn test_read_queue_fold() { - let mut queue = ReadIndexQueue::> { - handled_cnt: 125, - ..Default::default() - }; - for _ in 0..100 { - let id = Uuid::new_v4(); - queue.reads.push_back(ReadIndexRequest::with_command( - id, - RaftCmdRequest::default(), - Callback::None, - Timespec::new(0, 0), - )); - - let offset = queue.handled_cnt + queue.reads.len() - 1; - queue.contexts.insert(id, offset); - } - - queue.advance_replica_reads(Vec::new()); - assert_eq!(queue.ready_cnt, 0); - - queue.advance_replica_reads(vec![(queue.reads[0].id, None, 100)]); - assert_eq!(queue.ready_cnt, 1); - - queue.advance_replica_reads(vec![(queue.reads[1].id, None, 100)]); - assert_eq!(queue.ready_cnt, 2); - - queue.advance_replica_reads(vec![ - (queue.reads[80].id, None, 80), - (queue.reads[84].id, None, 100), - (queue.reads[82].id, None, 70), - (queue.reads[78].id, None, 120), - (queue.reads[77].id, None, 40), - ]); - assert_eq!(queue.ready_cnt, 85); - - queue.advance_replica_reads(vec![ - (queue.reads[20].id, None, 80), - (queue.reads[24].id, None, 100), - (queue.reads[22].id, None, 70), - (queue.reads[18].id, None, 120), - (queue.reads[17].id, None, 40), - ]); - assert_eq!(queue.ready_cnt, 85); - - for i in 0..78 { - assert_eq!(queue.reads[i].read_index.unwrap(), 40, "#{} failed", i); - } - for i in 78..83 { - assert_eq!(queue.reads[i].read_index.unwrap(), 70, "#{} failed", i); - } - for i in 84..85 { - assert_eq!(queue.reads[i].read_index.unwrap(), 100, "#{} failed", i); - } - - queue.clear_all(None); - } - #[test] fn test_become_leader_then_become_follower() { let mut queue = ReadIndexQueue::> { From c1e506eff743ea7236e76b2af64a1257233d2771 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Mon, 9 Dec 2024 19:25:57 +0800 Subject: [PATCH 45/86] raftstore: offloading I/O Operations from pd_worker to bg_worker. (#17947) close tikv/tikv#17679, close tikv/tikv#17939 This PR is used to shift I/O operations from the `pd_worker` to mitigate the risk of it becoming blocked on I/O tasks. Signed-off-by: lucasliang --- components/raftstore-v2/src/worker/pd/mod.rs | 3 + .../raftstore-v2/src/worker/pd/store.rs | 68 +++----- components/raftstore/src/store/fsm/mod.rs | 5 +- components/raftstore/src/store/fsm/store.rs | 27 +-- components/raftstore/src/store/mod.rs | 2 +- components/raftstore/src/store/peer.rs | 2 +- components/raftstore/src/store/worker/pd.rs | 159 +++++------------- components/server/src/server.rs | 14 ++ components/server/src/server2.rs | 14 ++ components/test_raftstore/src/node.rs | 22 ++- components/tikv_util/src/sys/disk.rs | 40 +++++ src/server/raft_server.rs | 4 +- 12 files changed, 169 insertions(+), 191 deletions(-) diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 7917ed5cd73..133598a10f6 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -191,7 +191,9 @@ where { store_id: u64, pd_client: Arc, + #[allow(dead_code)] raft_engine: ER, + #[allow(dead_code)] tablet_registry: TabletRegistry, snap_mgr: TabletSnapManager, router: StoreRouter, @@ -223,6 +225,7 @@ where logger: Logger, shutdown: Arc, + #[allow(dead_code)] cfg: Arc>, } diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index 226fef08d11..3502e0ffb85 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -10,7 +10,7 @@ use kvproto::pdpb; use pd_client::{ metrics::{ REGION_READ_BYTES_HISTOGRAM, REGION_READ_KEYS_HISTOGRAM, REGION_WRITTEN_BYTES_HISTOGRAM, - REGION_WRITTEN_KEYS_HISTOGRAM, STORE_SIZE_EVENT_INT_VEC, + REGION_WRITTEN_KEYS_HISTOGRAM, }, PdClient, }; @@ -23,7 +23,7 @@ use slog::{error, info, warn}; use tikv_util::{ metrics::RecordPairVec, store::QueryStats, - sys::disk::get_disk_space_stats, + sys::disk, time::{Duration, Instant as TiInstant, UnixSecs}, topn::TopN, }; @@ -213,7 +213,7 @@ where } stats = collect_report_read_peer_stats(HOTSPOT_REPORT_CAPACITY, report_peers, stats); - let (capacity, used_size, available) = self.collect_engine_size().unwrap_or_default(); + let (capacity, used_size, available) = self.collect_engine_size(); if available == 0 { warn!(self.logger, "no available space"); } @@ -265,10 +265,6 @@ where self.store_stat.region_bytes_read.flush(); self.store_stat.region_keys_read.flush(); - STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); - STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); - STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); - // Update slowness statistics self.update_slowness_in_store_stats(&mut stats, last_query_sum); @@ -442,47 +438,23 @@ where } /// Returns (capacity, used, available). - fn collect_engine_size(&self) -> Option<(u64, u64, u64)> { - let (disk_cap, disk_avail) = match get_disk_space_stats(self.tablet_registry.tablet_root()) + fn collect_engine_size(&self) -> (u64, u64, u64) { + // For test purpose, directly set the disk capacity, used size and available + // size manually. + #[cfg(any(test, feature = "testexport"))] { - Err(e) => { - error!( - self.logger, - "get disk stat for rocksdb failed"; - "engine_path" => self.tablet_registry.tablet_root().display(), - "err" => ?e - ); - return None; - } - Ok((total_size, available_size)) => (total_size, available_size), - }; - let capacity = if self.cfg.value().capacity.0 == 0 { - disk_cap - } else { - std::cmp::min(disk_cap, self.cfg.value().capacity.0) - }; - let mut kv_size = 0; - self.tablet_registry.for_each_opened_tablet(|_, cached| { - if let Some(tablet) = cached.latest() { - kv_size += tablet.get_engine_used_size().unwrap_or(0); - } - true - }); - let snap_size = self.snap_mgr.total_snap_size().unwrap(); - let raft_size = self - .raft_engine - .get_engine_size() - .expect("engine used size"); - - STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); - STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); - STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_size as i64); - - let used_size = snap_size + kv_size + raft_size; - let mut available = capacity.checked_sub(used_size).unwrap_or_default(); - // We only care about rocksdb SST file size, so we should check disk available - // here. - available = cmp::min(available, disk_avail); - Some((capacity, used_size, available)) + let (capacity, available) = disk::get_disk_space_stats("./").unwrap(); + + disk::set_disk_capacity(capacity); + disk::set_disk_used_size(capacity - available); + disk::set_disk_available_size(available); + return (capacity, capacity - available, available); + } + #[allow(unreachable_code)] + ( + disk::get_disk_capacity(), + disk::get_disk_used_size(), + disk::get_disk_available_size(), + ) } } diff --git a/components/raftstore/src/store/fsm/mod.rs b/components/raftstore/src/store/fsm/mod.rs index f342c1ec733..e0cd7235d97 100644 --- a/components/raftstore/src/store/fsm/mod.rs +++ b/components/raftstore/src/store/fsm/mod.rs @@ -21,8 +21,5 @@ pub use self::{ peer::{ new_admin_request, new_read_index_request, DestroyPeerJob, PeerFsm, MAX_PROPOSAL_SIZE_RATIO, }, - store::{ - create_raft_batch_system, RaftBatchSystem, RaftPollerBuilder, RaftRouter, StoreInfo, - StoreMeta, - }, + store::{create_raft_batch_system, RaftBatchSystem, RaftPollerBuilder, RaftRouter, StoreMeta}, }; diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 25faa03ae72..729d9339e42 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -142,12 +142,6 @@ const STORE_CHECK_PENDING_APPLY_DURATION: Duration = Duration::from_secs(5 * 60) // the threshold, can the raftstore supply service. const STORE_CHECK_COMPLETE_APPLY_REGIONS_PERCENT: u64 = 99; -pub struct StoreInfo { - pub kv_engine: EK, - pub raft_engine: ER, - pub capacity: u64, -} - /// A trait that provide the meta information that can be accessed outside /// of raftstore. pub trait StoreRegionMeta: Send { @@ -560,7 +554,7 @@ where { pub cfg: Config, pub store: metapb::Store, - pub pd_scheduler: Scheduler>, + pub pd_scheduler: Scheduler>, pub consistency_check_scheduler: Scheduler>, pub split_check_scheduler: Scheduler, // handle Compact, CleanupSst task @@ -1275,7 +1269,7 @@ impl PollHandler, St pub struct RaftPollerBuilder { pub cfg: Arc>, pub store: metapb::Store, - pd_scheduler: Scheduler>, + pd_scheduler: Scheduler>, consistency_check_scheduler: Scheduler>, split_check_scheduler: Scheduler, cleanup_scheduler: Scheduler, @@ -1619,8 +1613,8 @@ where } } -struct Workers { - pd_worker: LazyWorker>, +struct Workers { + pd_worker: LazyWorker>, background_worker: Worker, // Both of cleanup tasks and region tasks get their own workers, instead of reusing @@ -1646,7 +1640,7 @@ pub struct RaftBatchSystem { apply_router: ApplyRouter, apply_system: ApplyBatchSystem, router: RaftRouter, - workers: Option>, + workers: Option>, store_writers: StoreWriters, node_start_time: Timespec, // monotonic_raw_now } @@ -1678,7 +1672,7 @@ impl RaftBatchSystem { trans: T, pd_client: Arc, mgr: SnapManager, - pd_worker: LazyWorker>, + pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, importer: Arc>, @@ -1863,7 +1857,7 @@ impl RaftBatchSystem { fn start_system( &mut self, - mut workers: Workers, + mut workers: Workers, region_peers: Vec>, builder: RaftPollerBuilder, auto_split_controller: AutoSplitController, @@ -2980,15 +2974,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER ); stats.set_query_stats(query_stats); - let store_info = Some(StoreInfo { - kv_engine: self.ctx.engines.kv.clone(), - raft_engine: self.ctx.engines.raft.clone(), - capacity: self.ctx.cfg.capacity.0, - }); - let task = PdTask::StoreHeartbeat { stats, - store_info, report, dr_autosync_status: self .ctx diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 2c9c92ebbe3..bb985f01909 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -47,7 +47,7 @@ pub use self::{ compaction_guard::CompactionGuardGeneratorFactory, config::Config, entry_storage::{EntryStorage, RaftlogFetchResult, MAX_INIT_ENTRY_COUNT}, - fsm::{check_sst_for_ingestion, DestroyPeerJob, RaftRouter, StoreInfo}, + fsm::{check_sst_for_ingestion, DestroyPeerJob, RaftRouter}, hibernate_state::{GroupState, HibernateState}, memory::*, metrics::RAFT_ENTRY_FETCHES_VEC, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index f066c41d6c6..57ed792de85 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5888,7 +5888,7 @@ where self.send_extra_message(extra_msg, &mut ctx.trans, &to_peer); } - pub fn require_updating_max_ts(&self, pd_scheduler: &Scheduler>) { + pub fn require_updating_max_ts(&self, pd_scheduler: &Scheduler>) { let epoch = self.region().get_region_epoch(); let term_low_bits = self.term() & ((1 << 32) - 1); // 32 bits let version_lot_bits = epoch.get_version() & ((1 << 31) - 1); // 31 bits diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 5665318c259..16c4fec66f0 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -44,7 +44,7 @@ use tikv_util::{ box_err, debug, error, info, metrics::ThreadInfoStatistics, store::QueryStats, - sys::{disk::get_disk_space_stats, thread::StdThreadBuildWrapper, SysQuota}, + sys::{disk, thread::StdThreadBuildWrapper, SysQuota}, thd_name, time::{Instant as TiInstant, UnixSecs}, timer::GLOBAL_TIMER_HANDLE, @@ -71,7 +71,7 @@ use crate::{ AutoSplitController, ReadStats, SplitConfigChange, WriteStats, }, Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, - SnapManager, StoreInfo, StoreMsg, TxnExt, + SnapManager, StoreMsg, TxnExt, }, }; @@ -104,10 +104,9 @@ pub trait FlowStatsReporter: Send + Clone + Sync + 'static { fn report_write_stats(&self, write_stats: WriteStats); } -impl FlowStatsReporter for Scheduler> +impl FlowStatsReporter for Scheduler> where EK: KvEngine, - ER: RaftEngine, { fn report_read_stats(&self, read_stats: ReadStats) { if let Err(e) = self.schedule(Task::ReadStats { read_stats }) { @@ -137,10 +136,9 @@ pub struct HeartbeatTask { } /// Uses an asynchronous thread to tell PD something. -pub enum Task +pub enum Task where EK: KvEngine, - ER: RaftEngine, { AskSplit { region: metapb::Region, @@ -166,7 +164,6 @@ where Heartbeat(HeartbeatTask), StoreHeartbeat { stats: pdpb::StoreStats, - store_info: Option>, report: Option, dr_autosync_status: Option, }, @@ -382,10 +379,9 @@ impl PartialOrd for PeerCmpReadStat { } } -impl Display for Task +impl Display for Task where EK: KvEngine, - ER: RaftEngine, { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { @@ -513,12 +509,11 @@ fn convert_record_pairs(m: HashMap) -> RecordPairVec { } #[derive(Clone)] -pub struct WrappedScheduler(Scheduler>); +pub struct WrappedScheduler(Scheduler>); -impl Collector for WrappedScheduler +impl Collector for WrappedScheduler where EK: KvEngine, - ER: RaftEngine, { fn collect(&self, records: Arc) { self.0.schedule(Task::RegionCpuRecords(records)).ok(); @@ -537,10 +532,9 @@ pub trait StoreStatsReporter: Send + Clone + Sync + 'static + Collector { fn update_latency_stats(&self, timer_tick: u64, factor: InspectFactor); } -impl StoreStatsReporter for WrappedScheduler +impl StoreStatsReporter for WrappedScheduler where EK: KvEngine, - ER: RaftEngine, { fn report_store_infos( &self, @@ -880,8 +874,8 @@ where // use for Runner inner handle function to send Task to itself // actually it is the sender connected to Runner's Worker which // calls Runner's run() on Task received. - scheduler: Scheduler>, - stats_monitor: StatsMonitor>, + scheduler: Scheduler>, + stats_monitor: StatsMonitor>, store_heartbeat_interval: Duration, // region_id -> total_cpu_time_ms (since last region heartbeat) @@ -912,7 +906,7 @@ where store_id: u64, pd_client: Arc, router: RaftRouter, - scheduler: Scheduler>, + scheduler: Scheduler>, auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, snap_mgr: SnapManager, @@ -1047,7 +1041,7 @@ where // be called in an asynchronous context. fn handle_ask_batch_split( router: RaftRouter, - scheduler: Scheduler>, + scheduler: Scheduler>, pd_client: Arc, mut region: metapb::Region, mut split_keys: Vec>, @@ -1184,7 +1178,7 @@ where fn handle_store_heartbeat( &mut self, mut stats: pdpb::StoreStats, - store_info: Option>, + is_fake_heartbeat: bool, store_report: Option, dr_autosync_status: Option, ) { @@ -1215,35 +1209,17 @@ where } stats = collect_report_read_peer_stats(HOTSPOT_REPORT_CAPACITY, report_peers, stats); - let (capacity, used_size, available) = if store_info.is_some() { - match collect_engine_size( - &self.coprocessor_host, - store_info.as_ref(), - self.snap_mgr.get_total_snap_size().unwrap(), - ) { - Some((capacity, used_size, available)) => { - // Update last reported infos on engine_size. - self.store_stat.engine_last_capacity_size = capacity; - self.store_stat.engine_last_used_size = used_size; - self.store_stat.engine_last_available_size = available; - (capacity, used_size, available) - } - None => return, - } - } else { - ( - self.store_stat.engine_last_capacity_size, - self.store_stat.engine_last_used_size, - self.store_stat.engine_last_available_size, - ) - }; - - stats.set_capacity(capacity); - stats.set_used_size(used_size); - + // Fetch all size infos and update last reported infos on engine_size. + let (capacity, used_size, available) = collect_engine_size(&self.coprocessor_host); if available == 0 { warn!("no available space"); } + self.store_stat.engine_last_capacity_size = capacity; + self.store_stat.engine_last_used_size = used_size; + self.store_stat.engine_last_available_size = available; + + stats.set_capacity(capacity); + stats.set_used_size(used_size); stats.set_available(available); stats.set_bytes_read( self.store_stat.engine_total_bytes_read - self.store_stat.engine_last_total_bytes_read, @@ -1274,12 +1250,12 @@ where self.store_stat .engine_last_query_num .fill_query_stats(&self.store_stat.engine_total_query_num); - self.store_stat.last_report_ts = if store_info.is_some() { + self.store_stat.last_report_ts = if !is_fake_heartbeat { UnixSecs::now() } else { - // If `store_info` is None, the given Task::StoreHeartbeat should be a fake - // heartbeat to PD, we won't update the last_report_ts to avoid incorrectly - // marking current TiKV node in normal state. + // If `is_fake_heartbeat == true`, the given Task::StoreHeartbeat should be a + // fake heartbeat to PD, we won't update the last_report_ts to avoid + // incorrectly marking current TiKV node in normal state. self.store_stat.last_report_ts }; self.store_stat.region_bytes_written.flush(); @@ -1287,10 +1263,6 @@ where self.store_stat.region_bytes_read.flush(); self.store_stat.region_keys_read.flush(); - STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); - STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); - STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); - let slow_score = self.health_reporter.get_slow_score(); stats.set_slow_score(slow_score as u64); let (rps, slow_trend_pb) = self @@ -1886,7 +1858,7 @@ where stats.set_is_busy(true); // We do not need to report store_info, so we just set `None` here. - self.handle_store_heartbeat(stats, None, None, None); + self.handle_store_heartbeat(stats, true, None, None); warn!("scheduling store_heartbeat timeout, force report store slow score to pd."; "store_id" => self.store_id, ); @@ -2033,9 +2005,9 @@ where ER: RaftEngine, T: PdClient, { - type Task = Task; + type Task = Task; - fn run(&mut self, task: Task) { + fn run(&mut self, task: Task) { debug!("executing task"; "task" => %task); if !self.is_hb_receiver_scheduled { @@ -2240,10 +2212,9 @@ where } Task::StoreHeartbeat { stats, - store_info, report, dr_autosync_status, - } => self.handle_store_heartbeat(stats, store_info, report, dr_autosync_status), + } => self.handle_store_heartbeat(stats, false, report, dr_autosync_status), Task::ReportBatchSplit { regions } => self.handle_report_batch_split(regions), Task::ValidatePeer { region, peer } => self.handle_validate_peer(region, peer), Task::ReadStats { read_stats } => self.handle_read_stats(read_stats), @@ -2497,51 +2468,16 @@ fn collect_report_read_peer_stats( stats } -fn collect_engine_size( - coprocessor_host: &CoprocessorHost, - store_info: Option<&StoreInfo>, - snap_mgr_size: u64, -) -> Option<(u64, u64, u64)> { +fn collect_engine_size(coprocessor_host: &CoprocessorHost) -> (u64, u64, u64) { if let Some(engine_size) = coprocessor_host.on_compute_engine_size() { - return Some((engine_size.capacity, engine_size.used, engine_size.avail)); - } - let store_info = store_info.unwrap(); - let (disk_cap, disk_avail) = match get_disk_space_stats(store_info.kv_engine.path()) { - Err(e) => { - error!( - "get disk stat for rocksdb failed"; - "engine_path" => store_info.kv_engine.path(), - "err" => ?e - ); - return None; - } - Ok((total_size, available_size)) => (total_size, available_size), - }; - let capacity = if store_info.capacity == 0 || disk_cap < store_info.capacity { - disk_cap + (engine_size.capacity, engine_size.used, engine_size.avail) } else { - store_info.capacity - }; - let raft_size = store_info - .raft_engine - .get_engine_size() - .expect("raft engine used size"); - - let kv_size = store_info - .kv_engine - .get_engine_used_size() - .expect("kv engine used size"); - - STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); - STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_mgr_size as i64); - STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); - - let used_size = snap_mgr_size + kv_size + raft_size; - let mut available = capacity.checked_sub(used_size).unwrap_or_default(); - // We only care about rocksdb SST file size, so we should check disk available - // here. - available = cmp::min(available, disk_avail); - Some((capacity, used_size, available)) + ( + disk::get_disk_capacity(), + disk::get_disk_used_size(), + disk::get_disk_available_size(), + ) + } } fn get_read_query_num(stat: &pdpb::QueryStats) -> u64 { @@ -2566,17 +2502,17 @@ mod tests { fn test_collect_stats() { use std::{sync::Mutex, time::Instant}; - use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; + use engine_test::kv::KvTestEngine; struct RunnerTest { store_stat: Arc>, - stats_monitor: StatsMonitor>, + stats_monitor: StatsMonitor>, } impl RunnerTest { fn new( interval: u64, - scheduler: Scheduler>, + scheduler: Scheduler>, store_stat: Arc>, ) -> RunnerTest { let mut stats_monitor = StatsMonitor::new( @@ -2612,9 +2548,9 @@ mod tests { } impl Runnable for RunnerTest { - type Task = Task; + type Task = Task; - fn run(&mut self, task: Task) { + fn run(&mut self, task: Task) { if let Task::StoreInfos { cpu_usages, read_io_rates, @@ -2680,7 +2616,7 @@ mod tests { assert_eq!(store_stats.peer_stats.len(), 3) } - use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; + use engine_test::kv::KvTestEngine; use metapb::Peer; use resource_metering::{RawRecord, TagInfos}; @@ -2811,12 +2747,7 @@ mod tests { let obs = PdObserver::default(); host.registry .register_pd_task_observer(1, BoxPdTaskObserver::new(obs)); - let store_size = collect_engine_size::(&host, None, 0); - let (cap, used, avail) = if let Some((cap, used, avail)) = store_size { - (cap, used, avail) - } else { - panic!("store_size should not be none"); - }; + let (cap, used, avail) = collect_engine_size::(&host); assert_eq!(cap, 444); assert_eq!(used, 111); assert_eq!(avail, 333); @@ -2824,7 +2755,7 @@ mod tests { #[test] fn test_pd_worker_send_stats_on_read_and_cpu() { - let mut pd_worker: LazyWorker> = + let mut pd_worker: LazyWorker> = LazyWorker::new("test-pd-worker-collect-stats"); // Set the interval long enough for mocking the channel full state. let interval = 600_u64; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 35f160de2fd..eeeeb20e126 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -57,6 +57,7 @@ use kvproto::{ }; use pd_client::{ meta_storage::{Checked, Sourced}, + metrics::STORE_SIZE_EVENT_INT_VEC, PdClient, RpcClient, }; use raft_log_engine::RaftLogEngine; @@ -1464,7 +1465,20 @@ where capacity ); } + // Update disk status. disk::set_disk_status(cur_disk_status); + disk::set_disk_capacity(capacity); + disk::set_disk_used_size(used_size); + disk::set_disk_available_size(available); + + // Update metrics. + STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); + STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_size as i64); + STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); + + STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); + STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); + STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); }) } diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 9e76ff6dba7..9ac701053c3 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -49,6 +49,7 @@ use kvproto::{ }; use pd_client::{ meta_storage::{Checked, Sourced}, + metrics::STORE_SIZE_EVENT_INT_VEC, PdClient, RpcClient, }; use raft_log_engine::RaftLogEngine; @@ -1254,7 +1255,20 @@ where capacity ); } + // Update disk status. disk::set_disk_status(cur_disk_status); + disk::set_disk_capacity(capacity); + disk::set_disk_used_size(used_size); + disk::set_disk_available_size(available); + + // Update metrics. + STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); + STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_size as i64); + STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); + + STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); + STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); + STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); }) } diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index c87609a0c02..f512a1be228 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - path::Path, + path::{Path, PathBuf}, sync::{atomic::AtomicU64, Arc, Mutex, RwLock}, }; @@ -43,6 +43,7 @@ use tikv::{ }; use tikv_util::{ config::VersionTrack, + sys::disk, time::ThreadReadId, worker::{Builder as WorkerBuilder, LazyWorker}, }; @@ -331,6 +332,25 @@ impl Simulator for NodeCluster { Module::Coprocessor, Box::new(SplitCheckConfigManager(split_scheduler.clone())), ); + // Spawn a task to update the disk status periodically. + { + let engines = engines.clone(); + let data_dir = PathBuf::from(engines.kv.path()); + let snap_mgr = snap_mgr.clone(); + bg_worker.spawn_interval_task(std::time::Duration::from_millis(1000), move || { + let snap_size = snap_mgr.get_total_snap_size().unwrap(); + let kv_size = engines + .kv + .get_engine_used_size() + .expect("get kv engine size"); + let used_size = snap_size + kv_size; + let (capacity, available) = disk::get_disk_space_stats(&data_dir).unwrap(); + + disk::set_disk_capacity(capacity); + disk::set_disk_used_size(used_size); + disk::set_disk_available_size(std::cmp::min(available, capacity - used_size)); + }); + } node.try_bootstrap_store(engines.clone())?; node.start( diff --git a/components/tikv_util/src/sys/disk.rs b/components/tikv_util/src/sys/disk.rs index 5918bdd8e3b..d1f3353b134 100644 --- a/components/tikv_util/src/sys/disk.rs +++ b/components/tikv_util/src/sys/disk.rs @@ -7,6 +7,12 @@ use std::{ use fail::fail_point; pub use kvproto::disk_usage::DiskUsage; +// The following variables are used to store the disk capacity, used size, and +// available size. +static DISK_CAPACITY: AtomicU64 = AtomicU64::new(0); +static DISK_USED_SIZE: AtomicU64 = AtomicU64::new(0); +static DISK_AVAILABLE_SIZE: AtomicU64 = AtomicU64::new(0); + // DISK_RESERVED_SPACE means if left space is less than this, tikv will // turn to maintenance mode. There are another 2 value derived from this, // 50% for a migration only mode and 20% for disk space holder size. @@ -16,18 +22,52 @@ static DISK_RESERVED_SPACE: AtomicU64 = AtomicU64::new(0); static RAFT_DISK_RESERVED_SPACE: AtomicU64 = AtomicU64::new(0); static DISK_STATUS: AtomicI32 = AtomicI32::new(0); +#[inline] +pub fn set_disk_capacity(v: u64) { + DISK_CAPACITY.store(v, Ordering::Release) +} + +#[inline] +pub fn get_disk_capacity() -> u64 { + DISK_CAPACITY.load(Ordering::Acquire) +} + +#[inline] +pub fn set_disk_used_size(v: u64) { + DISK_USED_SIZE.store(v, Ordering::Release) +} + +#[inline] +pub fn get_disk_used_size() -> u64 { + DISK_USED_SIZE.load(Ordering::Acquire) +} + +#[inline] +pub fn set_disk_available_size(v: u64) { + DISK_AVAILABLE_SIZE.store(v, Ordering::Release) +} + +#[inline] +pub fn get_disk_available_size() -> u64 { + DISK_AVAILABLE_SIZE.load(Ordering::Acquire) +} + +#[inline] pub fn set_disk_reserved_space(v: u64) { DISK_RESERVED_SPACE.store(v, Ordering::Release) } +#[inline] pub fn get_disk_reserved_space() -> u64 { DISK_RESERVED_SPACE.load(Ordering::Acquire) } +#[inline] pub fn set_raft_disk_reserved_space(v: u64) { RAFT_DISK_RESERVED_SPACE.store(v, Ordering::Release) } +#[inline] pub fn get_raft_disk_reserved_space() -> u64 { RAFT_DISK_RESERVED_SPACE.load(Ordering::Acquire) } diff --git a/src/server/raft_server.rs b/src/server/raft_server.rs index 15e98441583..03af1b0995f 100644 --- a/src/server/raft_server.rs +++ b/src/server/raft_server.rs @@ -163,7 +163,7 @@ where engines: Engines, trans: T, snap_mgr: SnapManager, - pd_worker: LazyWorker>, + pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, importer: Arc>, @@ -453,7 +453,7 @@ where engines: Engines, trans: T, snap_mgr: SnapManager, - pd_worker: LazyWorker>, + pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, importer: Arc>, From 6aa21c600f534b44cd0f0de623ac8d1961d5445c Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 10 Dec 2024 14:59:03 +0800 Subject: [PATCH 46/86] *: upgrade url to 2.5.4 to fix RUSTSEC-2024-0421. (#17963) ref tikv/tikv#15990 Signed-off-by: lucasliang --- Cargo.lock | 285 +++++++++++++++++++++++++++++++++--- cmd/tikv-server/src/main.rs | 4 +- deny.toml | 2 +- 3 files changed, 267 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4745e6838b8..be3befe64f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1969,6 +1969,17 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "dyn-clone" version = "1.0.4" @@ -3235,6 +3246,124 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -3243,12 +3372,23 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", ] [[package]] @@ -3655,6 +3795,12 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "lock_api" version = "0.4.6" @@ -6362,6 +6508,17 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "sysinfo" version = "0.26.9" @@ -7378,6 +7535,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.0" @@ -7714,27 +7881,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.3.0" @@ -7755,9 +7907,9 @@ checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" [[package]] name = "url" -version = "2.5.0" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -7771,12 +7923,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8-ranges" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "0.8.2" @@ -8268,6 +8432,18 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "ws2_32-sys" version = "0.2.1" @@ -8328,6 +8504,30 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.32" @@ -8348,12 +8548,55 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", + "synstructure", +] + [[package]] name = "zeroize" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "zipf" version = "6.1.0" diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index c049fd848b4..9fe7f6986d0 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -23,8 +23,8 @@ fn main() { let matches = App::new("TiKV") .about("A distributed transactional key-value database powered by Rust and Raft") .author(crate_authors!()) - .version(version_info.as_ref()) - .long_version(version_info.as_ref()) + .version::<&str>(version_info.as_ref()) + .long_version::<&str>(version_info.as_ref()) .arg( Arg::with_name("config") .short("C") diff --git a/deny.toml b/deny.toml index b6bfe29f2f2..24b03f42c29 100644 --- a/deny.toml +++ b/deny.toml @@ -84,7 +84,7 @@ unlicensed = "deny" copyleft = "deny" private = { ignore = false } # Allow licenses in Category A -allow = ["0BSD", "Apache-2.0", "BSD-3-Clause", "CC0-1.0", "ISC", "MIT", "Zlib"] +allow = ["0BSD", "Apache-2.0", "BSD-3-Clause", "CC0-1.0", "ISC", "MIT", "Zlib", "Unicode-3.0"] exceptions = [ # unicode-ident includes data generated from Unicode Character Database # which is licensed under Unicode-DFS-2016. From d00d04b0d73dce7202da976b760f1ccb1a321a4a Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 10 Dec 2024 15:30:15 +0800 Subject: [PATCH 47/86] storage: fix the check if `reserve-space` == 0. (#17964) ref tikv/tikv#17939 This PR addresses and resolves the issue that if `reserver-space` is set to 0, the disk stats could not be updated as expected. Signed-off-by: lucasliang --- components/server/src/server.rs | 13 +++++++------ components/server/src/server2.rs | 13 +++++++------ tests/integrations/raftstore/test_stats.rs | 1 + 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index eeeeb20e126..23b5fbc2376 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1385,10 +1385,6 @@ where let snap_mgr = self.snap_mgr.clone().unwrap(); let reserve_space = disk::get_disk_reserved_space(); let reserve_raft_space = disk::get_raft_disk_reserved_space(); - if reserve_space == 0 && reserve_raft_space == 0 { - info!("disk space checker not enabled"); - return; - } let raft_path = engines.raft.get_engine_path().to_string(); let separated_raft_mount_path = path_in_diff_mount_point(raft_path.as_str(), engines.kv.path()); @@ -1465,8 +1461,13 @@ where capacity ); } - // Update disk status. - disk::set_disk_status(cur_disk_status); + // Update disk status if disk space checker is enabled. + if reserve_space == 0 && reserve_raft_space == 0 { + info!("ignore updating disk status as no reserve space is set"); + } else { + disk::set_disk_status(cur_disk_status); + } + // Update disk capacity, used size and available size. disk::set_disk_capacity(capacity); disk::set_disk_used_size(used_size); disk::set_disk_available_size(available); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 9ac701053c3..996146f8a2a 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1171,10 +1171,6 @@ where let snap_mgr = self.snap_mgr.clone().unwrap(); let reserve_space = disk::get_disk_reserved_space(); let reserve_raft_space = disk::get_raft_disk_reserved_space(); - if reserve_space == 0 && reserve_raft_space == 0 { - info!("disk space checker not enabled"); - return; - } let raft_engine = self.engines.as_ref().unwrap().raft_engine.clone(); let tablet_registry = self.tablet_registry.clone().unwrap(); let raft_path = raft_engine.get_engine_path().to_string(); @@ -1255,8 +1251,13 @@ where capacity ); } - // Update disk status. - disk::set_disk_status(cur_disk_status); + // Update disk status if disk space checker is enabled. + if reserve_space == 0 && reserve_raft_space == 0 { + info!("ignore updating disk status as no reserve space is set"); + } else { + disk::set_disk_status(cur_disk_status); + } + // Update disk capacity, used size and available size. disk::set_disk_capacity(capacity); disk::set_disk_used_size(used_size); disk::set_disk_available_size(available); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 34b894ed08b..e41bc8ebd86 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -47,6 +47,7 @@ fn test_simple_store_stats(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(20); + cluster.cfg.storage.reserve_space = ReadableSize(0); cluster.run(); // wait store reports stats. From 4c68641ddf3e1fac7a7c556af63cf67a00ddc3bc Mon Sep 17 00:00:00 2001 From: Bisheng Huang Date: Tue, 10 Dec 2024 18:02:06 +0800 Subject: [PATCH 48/86] raftstore: Fix flaky test_split_region_with_no_valid_split_keys (#17953) close tikv/tikv#17557 The test expects a split to occur but sometimes failed because the split check was triggered too early (before the DB had enough keys) and found no split key. Once the first split check finds nothing, subsequent checks are delayed until `size_diff_hint` (an approximate measure of region size change) reaches the `region_split_check_diff` threshold. By lowering this threshold, the second split check is triggered in time to meet the test's expectation. In prod, `region_split_check_diff` is typically set to 1/16 of the `region_split_size`. For this test, setting it equal to `region_split_size` was sufficient to fix the test flakiness. Signed-off-by: Bisheng Huang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- tests/failpoints/cases/test_split_region.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index e9b23df27f1..916258a3545 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1614,6 +1614,10 @@ fn test_not_reset_has_dirty_data_due_to_slow_split() { fn test_split_region_with_no_valid_split_keys() { let mut cluster = test_raftstore::new_node_cluster(0, 3); cluster.cfg.coprocessor.region_split_size = Some(ReadableSize::kb(1)); + // `region_split_check_diff` must be set as well after adjusting + // `region_split_size`. Otherwise, split checks may be skipped and a split + // may not be triggered as expected. + cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(500); cluster.run(); From 261458672ab66e6acef5017535783628e62bb76c Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 11 Dec 2024 18:03:12 +0800 Subject: [PATCH 49/86] *: Fix missing workspace members (#17979) ref tikv/tikv#15990 Some components are missing in workspace members, so `cargo-sort` won't cover them. Using wildcard for workspace members so that none can't be easily forgot anymore. Signed-off-by: Connor1996 --- Cargo.lock | 12 +++++ Cargo.toml | 75 ++++---------------------- components/engine_panic/Cargo.toml | 2 +- components/engine_rocks/Cargo.toml | 2 +- components/engine_traits/Cargo.toml | 2 +- components/hybrid_engine/Cargo.toml | 26 ++++----- components/in_memory_engine/Cargo.toml | 42 +++++++-------- components/raft_log_engine/Cargo.toml | 2 +- 8 files changed, 59 insertions(+), 104 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be3befe64f6..713ac614a87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2170,6 +2170,18 @@ dependencies = [ "txn_types", ] +[[package]] +name = "engine_traits_tests" +version = "0.0.1" +dependencies = [ + "encryption_export", + "engine_test", + "engine_traits", + "panic_hook", + "tempfile", + "test_util", +] + [[package]] name = "enum_dispatch" version = "0.3.8" diff --git a/Cargo.toml b/Cargo.toml index 2bcdc7d9af1..61a577cbe92 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -233,79 +233,22 @@ resolver = "2" members = [ "cmd/tikv-ctl", "cmd/tikv-server", - "components/api_version", - "components/backup", - "components/backup-stream", - "components/batch-system", - "components/causal_ts", - "components/cdc", - "components/cloud", - "components/cloud/aws", - "components/cloud/azure", - "components/cloud/gcp", - "components/codec", - "components/collections", - "components/compact-log-backup", - "components/concurrency_manager", - "components/coprocessor_plugin_api", - "components/crossbeam-skiplist", - "components/crypto", - "components/encryption", - "components/encryption/export", - "components/engine_rocks_helper", - # Only enable tirocks in local development, otherwise it can slow down compilation. - # TODO: always enable tirocks and remove engine_rocks. - # "components/engine_tirocks", - "components/error_code", - "components/external_storage", - "components/file_system", - "components/health_controller", - "components/into_other", - "components/keys", - "components/log_wrappers", - "components/online_config", - "components/panic_hook", - "components/pd_client", - "components/profiler", - "components/raftstore", - "components/raftstore-v2", - "components/resolved_ts", - "components/resource_control", - "components/resource_metering", - "components/security", - "components/server", - "components/service", - "components/snap_recovery", - "components/sst_importer", - "components/test_backup", - "components/test_coprocessor", + "components/*", "components/test_coprocessor_plugin/example_plugin", - "components/test_pd", - "components/test_pd_client", - "components/test_raftstore", - "components/test_raftstore-v2", - "components/test_raftstore_macro", - "components/test_sst_importer", - "components/test_storage", - "components/test_util", - "components/tidb_query_aggr", - "components/tidb_query_codegen", - "components/tidb_query_common", - "components/tidb_query_datatype", - "components/tidb_query_executors", - "components/tidb_query_expr", - "components/tikv_alloc", - "components/tikv_kv", - "components/tikv_util", - "components/tipb_helper", - "components/tracker", - "components/txn_types", "fuzz", "fuzz/fuzzer-afl", "fuzz/fuzzer-honggfuzz", "fuzz/fuzzer-libfuzzer", "tests", ] +exclude = [ + # Only enable tirocks in local development, otherwise it can slow down compilation. + # TODO: always enable tirocks and remove engine_rocks. + "components/engine_tirocks", + # `test_coprocessor_plugin` itself isn't a valid crate, so we exclude it to avoiding match it by `components/*`. + # What is a valid crate is the subdir in `test_coprocessor_plugin`, like `example_plugin`. + "components/test_coprocessor_plugin", +] default-members = ["cmd/tikv-server", "cmd/tikv-ctl"] [workspace.metadata.cargo-machete] diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index 3ee5d82ad14..c125ef7ccf2 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -10,9 +10,9 @@ license = "Apache-2.0" testexport = [] [dependencies] +encryption = { workspace = true } engine_traits = { workspace = true } kvproto = { workspace = true } -encryption = { workspace = true } raft = { workspace = true } tracker = { workspace = true } txn_types = { workspace = true } diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index d3893da020a..26ca70e1606 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -61,6 +61,6 @@ package = "rocksdb" features = ["encryption"] [dev-dependencies] +proptest = "1.0.0" rand = "0.8" toml = "0.5" -proptest = "1.0.0" diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 2e86822ceac..85b8337ef0d 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -11,6 +11,7 @@ testexport = [] [dependencies] collections = { workspace = true } +encryption = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } @@ -19,7 +20,6 @@ kvproto = { workspace = true } log_wrappers = { workspace = true } protobuf = "2" raft = { workspace = true } -encryption = { workspace = true } serde = "1.0" slog = { workspace = true } slog-global = { workspace = true } diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index 3a23f9927da..2938a329f97 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -6,27 +6,27 @@ publish = false license = "Apache-2.0" [dependencies] -engine_traits = { workspace = true } -txn_types = { workspace = true } -tikv_util = { workspace = true } +crossbeam = { workspace = true } engine_rocks = { workspace = true } -online_config = { workspace = true } +engine_traits = { workspace = true } in_memory_engine = { workspace = true } -slog = { workspace = true } -slog-global = { workspace = true } -tempfile = "3.0" +keys = { workspace = true } +kvproto = { workspace = true } +lazy_static = "1.4.0" +online_config = { workspace = true } prometheus = { version = "0.13", default-features = false, features = [ "nightly", ] } prometheus-static-metric = "0.5" -lazy_static = "1.4.0" -crossbeam = { workspace = true } -raftstore = { workspace = true } raft = { workspace = true } -kvproto = { workspace = true } -keys = { workspace = true } +raftstore = { workspace = true } +slog = { workspace = true } +slog-global = { workspace = true } +tempfile = "3.0" +tikv_util = { workspace = true } +txn_types = { workspace = true } [dev-dependencies] +fail = { version = "0.5", features = ["failpoints"] } tempfile = "3.0" test_util = { workspace = true } -fail = { version = "0.5", features = ["failpoints"] } diff --git a/components/in_memory_engine/Cargo.toml b/components/in_memory_engine/Cargo.toml index 966281d04a7..cf94c9de148 100644 --- a/components/in_memory_engine/Cargo.toml +++ b/components/in_memory_engine/Cargo.toml @@ -20,45 +20,45 @@ path = "benches/load_region.rs" harness = false [dependencies] -engine_traits = { workspace = true } -collections = { workspace = true } -crossbeam-skiplist = { workspace = true } bytes = "1.0" +collections = { workspace = true } crossbeam = { workspace = true } +crossbeam-skiplist = { workspace = true } +dashmap = "5.1" +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +fail = "0.5" futures = { version = "0.3", features = ["compat"] } -tikv_util = { workspace = true } -txn_types = { workspace = true } +hex = "0.4" +keys = { workspace = true } kvproto = { workspace = true } +lazy_static = "1.4.0" +libc = "0.2" log_wrappers = { workspace = true } +online_config = { workspace = true } +parking_lot = "0.12" pd_client = { workspace = true } +prometheus = { version = "0.13", default-features = false, features = ["nightly"] } +prometheus-static-metric = "0.5" raftstore = { workspace = true } -dashmap = "5.1" +rand = "0.8" security = { workspace = true } serde = "1.0" serde_json = "1.0" -slog-global = { workspace = true } slog = { workspace = true } +slog-global = { workspace = true } +smallvec = "1.4" strum = { version = "0.20", features = ["derive"] } -engine_rocks = { workspace = true } -fail = "0.5" -yatp = { workspace = true } -parking_lot = "0.12" -keys = { workspace = true } -prometheus = { version = "0.13", default-features = false, features = ["nightly"] } -prometheus-static-metric = "0.5" -lazy_static = "1.4.0" -hex = "0.4" thiserror = "1.0" -online_config = { workspace = true } -libc = "0.2" -rand = "0.8" +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } -smallvec = "1.4" +txn_types = { workspace = true } +yatp = { workspace = true } [dev-dependencies] criterion = "0.3" +proptest = "1.0.0" tempfile = "3.0" test_pd = { workspace = true } test_util = { workspace = true } -proptest = "1.0.0" tikv_alloc = { workspace = true, features = ["jemalloc"] } diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index c1c48988f44..15f2388b460 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -9,9 +9,9 @@ license = "Apache-2.0" failpoints = ["raft-engine/failpoints"] [dependencies] +codec = { workspace = true } encryption = { workspace = true } engine_traits = { workspace = true } -codec = { workspace = true } file_system = { workspace = true } kvproto = { workspace = true } raft = { workspace = true } From 601e8a7337049ea541f2f8ffb4179e28b9efc9e1 Mon Sep 17 00:00:00 2001 From: hhwyt Date: Thu, 12 Dec 2024 21:14:12 +0800 Subject: [PATCH 50/86] raftstore: limit snapshot generation based on actual I/O usage (#17958) close tikv/tikv#17973 This PR enhances the snapshot generation mechanism to limit speed based on actual I/O usage instead of the logical size of scanned key-value pairs. Signed-off-by: hhwyt --- components/file_system/src/io_stats/mod.rs | 2 +- components/raftstore/src/store/snap.rs | 15 ++- components/raftstore/src/store/snap/io.rs | 112 +++++++++++++++++++-- tests/integrations/storage/test_titan.rs | 2 + 4 files changed, 121 insertions(+), 10 deletions(-) diff --git a/components/file_system/src/io_stats/mod.rs b/components/file_system/src/io_stats/mod.rs index 9a2bc242ed9..7a5551e5044 100644 --- a/components/file_system/src/io_stats/mod.rs +++ b/components/file_system/src/io_stats/mod.rs @@ -31,7 +31,7 @@ mod stub { } pub fn get_thread_io_bytes_total() -> Result { - Err("unimplemented".into()) + Ok(IoBytes::default()) } } #[cfg(not(any(target_os = "linux", feature = "bcc-iosnoop")))] diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 8516703de57..20b9fba22a1 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -899,6 +899,7 @@ impl Snapshot { .get_actual_max_per_file_size(allow_multi_files_snapshot), &self.mgr.limiter, self.mgr.encryption_key_manager.clone(), + for_balance, )? }; SNAPSHOT_LIMIT_GENERATE_BYTES.inc_by(cf_stat.total_size as u64); @@ -2593,6 +2594,18 @@ pub mod tests { db_opt: Option, cf_opts: Option>, ) -> Result + where + E: KvEngine + KvEngineConstructorExt, + { + open_test_db_with_nkeys(path, db_opt, cf_opts, 100) + } + + pub fn open_test_db_with_nkeys( + path: &Path, + db_opt: Option, + cf_opts: Option>, + nkeys: u64, + ) -> Result where E: KvEngine + KvEngineConstructorExt, { @@ -2602,7 +2615,7 @@ pub mod tests { let mut p = Peer::default(); p.set_store_id(TEST_STORE_ID); p.set_id((i + 1) as u64); - for k in 0..100 { + for k in 0..nkeys { let key = keys::data_key(format!("akey{}", k).as_bytes()); db.put_msg_cf(cf, &key[..], &p)?; } diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 2962dd903e6..1d1597e95e2 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -13,7 +13,9 @@ use engine_traits::{ SstCompressionType, SstReader, SstWriter, SstWriterBuilder, WriteBatch, }; use fail::fail_point; -use file_system::{File, OpenOptions}; +#[cfg(not(test))] +use file_system::get_thread_io_bytes_total; +use file_system::{File, IoBytes, IoType, OpenOptions, WithIoType}; use kvproto::encryptionpb::EncryptionMethod; use tikv_util::{ box_try, @@ -24,6 +26,11 @@ use tikv_util::{ use super::{CfFile, Error, IO_LIMITER_CHUNK_SIZE}; +// This defines the number of bytes scanned before trigger an I/O limiter check. +// It is used instead of checking the I/O limiter for each scan to reduce cpu +// overhead. +const SCAN_BYTES_PER_IO_LIMIT_CHECK: usize = 8 * 1024; + /// Used to check a procedure is stale or not. pub trait StaleDetector { fn is_stale(&self) -> bool; @@ -104,6 +111,25 @@ where Ok(stats) } +#[cfg(not(test))] +fn get_thread_io_bytes_stats() -> Result { + get_thread_io_bytes_total() +} + +#[cfg(test)] +fn get_thread_io_bytes_stats() -> Result { + use std::cell::Cell; + thread_local! { + static TOTAL_BYTES: Cell = Cell::new(IoBytes::default()); + } + let mut new_bytes = TOTAL_BYTES.get(); + // We use 2 as the factor because the compression ratio of SST files with the + // zstd algorithm is empirically around 2x. + new_bytes.read += SCAN_BYTES_PER_IO_LIMIT_CHECK as u64 / 2; + TOTAL_BYTES.set(new_bytes); + Ok(new_bytes) +} + /// Build a snapshot file for the given column family in sst format. /// If there are no key-value pairs fetched, no files will be created at `path`, /// otherwise the file will be created and synchronized. @@ -116,6 +142,7 @@ pub fn build_sst_cf_file_list( raw_size_per_file: u64, io_limiter: &Limiter, key_mgr: Option>, + for_balance: bool, ) -> Result where E: KvEngine, @@ -176,6 +203,25 @@ where }; let instant = Instant::now(); + let _io_type_guard = WithIoType::new(if for_balance { + IoType::LoadBalance + } else { + IoType::Replication + }); + let mut prev_io_bytes = get_thread_io_bytes_stats().unwrap(); + let mut next_io_check_size = stats.total_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; + let handle_read_io_usage = |prev_io_bytes: &mut IoBytes, remained_quota: &mut usize| { + let cur_io_bytes = get_thread_io_bytes_stats().unwrap(); + let read_delta = (cur_io_bytes.read - prev_io_bytes.read) as usize; + + while read_delta > *remained_quota { + io_limiter.blocking_consume(IO_LIMITER_CHUNK_SIZE); + *remained_quota += IO_LIMITER_CHUNK_SIZE; + } + *remained_quota -= read_delta; + *prev_io_bytes = cur_io_bytes; + }; + box_try!(snap.scan(cf, start_key, end_key, false, |key, value| { let entry_len = key.len() + value.len(); if file_length + entry_len > raw_size_per_file as usize { @@ -202,15 +248,16 @@ where } } - while entry_len > remained_quota { - // It's possible to acquire more than necessary, but let it be. - io_limiter.blocking_consume(IO_LIMITER_CHUNK_SIZE); - remained_quota += IO_LIMITER_CHUNK_SIZE; - } - remained_quota -= entry_len; - stats.key_count += 1; stats.total_size += entry_len; + + if stats.total_size >= next_io_check_size { + // TODO(@hhwyt): Consider incorporating snapshot file write I/O into the + // limiting mechanism. + handle_read_io_usage(&mut prev_io_bytes, &mut remained_quota); + next_io_check_size = stats.total_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; + } + if let Err(e) = sst_writer.borrow_mut().put(key, value) { let io_error = io::Error::new(io::ErrorKind::Other, e); return Err(io_error.into()); @@ -218,6 +265,10 @@ where file_length += entry_len; Ok(true) })); + // Handle the IO generated by the remaining key-value pairs less than + // SCAN_BYTES_PER_IO_LIMIT_CHECK. + handle_read_io_usage(&mut prev_io_bytes, &mut remained_quota); + if stats.key_count > 0 { box_try!(finish_sst_writer(sst_writer.into_inner(), path, key_mgr)); cf_file.add_file(file_id); @@ -566,6 +617,7 @@ mod tests { *max_file_size, &limiter, db_opt.as_ref().and_then(|opt| opt.get_key_manager()), + true, ) .unwrap(); if stats.key_count == 0 { @@ -602,4 +654,48 @@ mod tests { } } } + + // This test verifies that building SST files is effectively limited by the I/O + // limiter based on actual I/O usage. It achieve this by adding an I/O limiter + // and asserting that the elapsed time for building SST files exceeds the + // lower bound enforced by the I/O limiter. + // + // In this test, the I/O limiter is configured with a throughput limit 8000 + // bytes/sec. A dataset of 1000 keys (totaling 11, 890 bytes) is generated to + // trigger two I/O limiter checks, as the default SCAN_BYTES_PER_IO_LIMIT_CHECK + // is 8192 bytes. During each check, the mocked `get_thread_io_bytes_stats` + // function returns 4096 bytes of I/O usage, resulting in total of 8192 bytes. + // With the 8000 bytes/sec limitation, we assert that the elapsed time must + // exceed 1 second. + #[test] + fn test_build_sst_with_io_limiter() { + let dir = Builder::new().prefix("test-io-limiter").tempdir().unwrap(); + let db = open_test_db_with_nkeys(dir.path(), None, None, 1000).unwrap(); + let bytes_per_sec = 8000_f64; + let limiter = Limiter::new(bytes_per_sec); + let snap_dir = Builder::new().prefix("snap-dir").tempdir().unwrap(); + let mut cf_file = CfFile { + cf: CF_DEFAULT, + path: PathBuf::from(snap_dir.path()), + file_prefix: "test_sst".to_string(), + file_suffix: SST_FILE_SUFFIX.to_string(), + ..Default::default() + }; + + let start = Instant::now(); + let stats = build_sst_cf_file_list::( + &mut cf_file, + &db, + &db.snapshot(), + &keys::data_key(b""), + &keys::data_key(b"z"), + u64::MAX, + &limiter, + None, + true, + ) + .unwrap(); + assert_eq!(stats.total_size, 11890); + assert!(start.saturating_elapsed_secs() > 1_f64); + } } diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 86bcb609426..a99f0383c81 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -380,6 +380,7 @@ fn test_delete_files_in_range_for_titan() { u64::MAX, &limiter, None, + true, ) .unwrap(); let mut cf_file_write = CfFile::new( @@ -397,6 +398,7 @@ fn test_delete_files_in_range_for_titan() { u64::MAX, &limiter, None, + true, ) .unwrap(); From 5ac2208f1b4f5f96db8e3e394c16201480c32148 Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Fri, 13 Dec 2024 17:43:54 +0800 Subject: [PATCH 51/86] expression: change the round rule to `round to nearest even` (#17935) close tikv/tikv#17934 expression: change the round rule to `round to nearest even` Signed-off-by: wshwsh12 <793703860@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tidb_query_datatype/src/codec/convert.rs | 12 ++++++------ components/tidb_query_datatype/src/lib.rs | 1 + components/tidb_query_expr/src/impl_cast.rs | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 2dafd4c8604..75195bc3135 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -289,7 +289,7 @@ impl ToInt for f64 { /// anymore. fn to_int(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { #![allow(clippy::float_cmp)] - let val = self.round(); + let val = self.round_ties_even(); let lower_bound = integer_signed_lower_bound(tp); if val < lower_bound as f64 { ctx.handle_overflow_err(overflow(val, tp))?; @@ -1230,14 +1230,14 @@ mod tests { (-256.6, FieldTypeTp::Short, Some(-257)), (65535.5, FieldTypeTp::Short, None), (65536.1, FieldTypeTp::Int24, Some(65536)), - (65536.5, FieldTypeTp::Int24, Some(65537)), + (65536.5, FieldTypeTp::Int24, Some(65536)), (-65536.1, FieldTypeTp::Int24, Some(-65536)), - (-65536.5, FieldTypeTp::Int24, Some(-65537)), + (-65536.5, FieldTypeTp::Int24, Some(-65536)), (8388610.2, FieldTypeTp::Int24, None), (8388610.4, FieldTypeTp::Long, Some(8388610)), - (8388610.5, FieldTypeTp::Long, Some(8388611)), + (8388610.5, FieldTypeTp::Long, Some(8388610)), (-8388610.4, FieldTypeTp::Long, Some(-8388610)), - (-8388610.5, FieldTypeTp::Long, Some(-8388611)), + (-8388610.5, FieldTypeTp::Long, Some(-8388610)), (4294967296.8, FieldTypeTp::Long, None), (4294967296.8, FieldTypeTp::LongLong, Some(4294967297)), (4294967297.1, FieldTypeTp::LongLong, Some(4294967297)), @@ -1518,7 +1518,7 @@ mod tests { ("3", 3), ("-3", -3), ("4.1", 4), - ("4.5", 5), + ("4.5", 4), ("true", 1), ("false", 0), ("null", 0), diff --git a/components/tidb_query_datatype/src/lib.rs b/components/tidb_query_datatype/src/lib.rs index e6106f858cb..090d43e2062 100644 --- a/components/tidb_query_datatype/src/lib.rs +++ b/components/tidb_query_datatype/src/lib.rs @@ -8,6 +8,7 @@ #![allow(internal_features)] #![feature(str_internals)] #![feature(core_intrinsics)] +#![feature(round_ties_even)] #[macro_use] extern crate num_derive; diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index a8ef3a096e0..d791866626e 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -1925,9 +1925,9 @@ mod tests { let cs = vec![ // (origin, result, overflow) (-10.4, -10i64, false), - (-10.5, -11, false), + (-10.5, -10, false), (10.4, 10, false), - (10.5, 11, false), + (10.5, 10, false), (i64::MAX as f64, i64::MAX, false), ((1u64 << 63) as f64, i64::MAX, false), (i64::MIN as f64, i64::MIN, false), @@ -3124,10 +3124,10 @@ mod tests { false, false, ), - (Json::from_f64(10.5).unwrap(), 11, false, false), + (Json::from_f64(10.5).unwrap(), 10, false, false), (Json::from_f64(10.4).unwrap(), 10, false, false), (Json::from_f64(-10.4).unwrap(), -10, false, false), - (Json::from_f64(-10.5).unwrap(), -11, false, false), + (Json::from_f64(-10.5).unwrap(), -10, false, false), ( Json::from_string(String::from("10.0")).unwrap(), 10, From c5448cbf8080de78f99968fb2c9edf3ad781f509 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Fri, 13 Dec 2024 20:17:42 +0800 Subject: [PATCH 52/86] clippy: fix unknown license error reported by cargo-deny@0.14.3 (#17998) ref tikv/tikv#17928 Fix the `cargo deny` check error by version 0.14.3 after introducing the new license `Unicode-3.0`. Signed-off-by: lucasliang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- scripts/deny | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/deny b/scripts/deny index f1e48771e57..51930bc56f5 100755 --- a/scripts/deny +++ b/scripts/deny @@ -2,7 +2,8 @@ set -euo pipefail -cargo install --locked cargo-deny@0.14.3 2> /dev/null || echo "Install cargo-deny failed" +# Update cargo-deny to the 0.15.1 version to fix the issue reported by https://github.com/tikv/tikv/pull/17987. +cargo install --locked cargo-deny@0.15.1 2> /dev/null || echo "Install cargo-deny failed" cargo deny -V cargo deny fetch all cargo deny check --show-stats From d54f0fdc330b8dc4ee86a45641bd269f7500bff7 Mon Sep 17 00:00:00 2001 From: hazel1225 <133326080+hazel1225@users.noreply.github.com> Date: Mon, 16 Dec 2024 12:18:12 +0800 Subject: [PATCH 53/86] docs: fix the issue with the Slack link not allowing external users to join (#17952) ref tikv/tikv#15990 Signed-off-by: hazel1225 <133326080+hazel1225@users.noreply.github.com> Co-authored-by: Ti Chi Robot --- CONTRIBUTING.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8add755c48d..ba737a5fb15 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ Thanks for your interest in contributing to TiKV! This document outlines some of the conventions on building, running, and testing TiKV, the development workflow, commit message formatting, contact points and other resources. -TiKV has many dependent repositories. If you need any help or mentoring getting started, understanding the codebase, or making a PR (or anything else really), please ask on [Slack](https://tikv.org/chat). If you don't know where to start, please click on the contributor icon below to get you on the right contributing path. +TiKV has many dependent repositories. If you need any help or mentoring getting started, understanding the codebase, or making a PR (or anything else really), please ask on [Slack](https://slack.tidb.io/invite?team=tikv-wg&channel=general). If you don't know where to start, please click on the contributor icon below to get you on the right contributing path. [contribution-map](https://github.com/pingcap/tidb-map/blob/master/maps/contribution-map.md#tikv-distributed-transactional-key-value-database) diff --git a/README.md b/README.md index 089b5edbed0..9b06052012f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ tikv_logo -## [Website](https://tikv.org) | [Documentation](https://tikv.org/docs/latest/concepts/overview/) | [Community Chat](https://tikv.org/chat) +## [Website](https://tikv.org) | [Documentation](https://tikv.org/docs/latest/concepts/overview/) | [Community Chat](https://slack.tidb.io/invite?team=tikv-wg&channel=general) [![Build Status](https://ci.pingcap.net/buildStatus/icon?job=tikv_ghpr_build_master)](https://ci.pingcap.net/blue/organizations/jenkins/tikv_ghpr_build_master/activity) [![Coverage Status](https://codecov.io/gh/tikv/tikv/branch/master/graph/badge.svg)](https://codecov.io/gh/tikv/tikv) From 911ad9897c232be52ff506220664734efb73e8eb Mon Sep 17 00:00:00 2001 From: hhwyt Date: Mon, 16 Dec 2024 12:42:36 +0800 Subject: [PATCH 54/86] raftstore: enhance raft snapshot metrics (#17997) ref tikv/tikv#15990 This pull request refines metrics for the raft snapshot workflow, includes: 1. Use generate-kv, generate-sst and generate-plain metrics to measure snapshot generation speed. These metrics offer better visibility, allowing for a clearer comparison between snapshot generation throughput and send/receive throughput, making it easier to identify bottlenecks. 2. Add metrics for various snapshot tasks to the Snapshot Actions, including send, recv, recv_droppedn and others, providing better visibility for snapshot actions. Signed-off-by: hhwyt Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/metrics.rs | 14 +++++- components/raftstore/src/store/snap.rs | 16 +++++-- components/raftstore/src/store/snap/io.rs | 52 ++++++++++++++++------- metrics/grafana/tikv_details.dashboard.py | 14 +++++- metrics/grafana/tikv_details.json | 21 +++++++-- metrics/grafana/tikv_details.json.sha256 | 2 +- 6 files changed, 93 insertions(+), 26 deletions(-) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 9428c5025db..38716c5b277 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -271,6 +271,12 @@ make_static_metric! { finished, } + pub label_enum SnapshotGenerateBytesType { + kv, + sst, + plain, + } + pub struct SnapshotBrWaitApplyEvent : IntCounter { "event" => SnapshotBrWaitApplyEventType } @@ -339,6 +345,10 @@ make_static_metric! { applystore_busy, }, } + + pub struct SnapshotGenerateBytesTypeVec: IntCounter { + "type" => SnapshotGenerateBytesType, + } } lazy_static! { @@ -947,9 +957,11 @@ lazy_static! { &["type"] ).unwrap(); - pub static ref SNAPSHOT_LIMIT_GENERATE_BYTES: IntCounter = register_int_counter!( + pub static ref SNAPSHOT_LIMIT_GENERATE_BYTES_VEC: SnapshotGenerateBytesTypeVec = register_static_int_counter_vec!( + SnapshotGenerateBytesTypeVec, "tikv_snapshot_limit_generate_bytes", "Total snapshot generate limit used", + &["type"], ) .unwrap(); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 20b9fba22a1..72b3ce1bf40 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -902,7 +902,15 @@ impl Snapshot { for_balance, )? }; - SNAPSHOT_LIMIT_GENERATE_BYTES.inc_by(cf_stat.total_size as u64); + SNAPSHOT_LIMIT_GENERATE_BYTES_VEC + .kv + .inc_by(cf_stat.total_kv_size as u64); + SNAPSHOT_LIMIT_GENERATE_BYTES_VEC + .sst + .inc_by(cf_stat.total_sst_size as u64); + SNAPSHOT_LIMIT_GENERATE_BYTES_VEC + .plain + .inc_by(cf_stat.total_plain_size as u64); cf_file.kv_count = cf_stat.key_count as u64; if cf_file.kv_count > 0 { // Use `kv_count` instead of file size to check empty files because encrypted @@ -925,14 +933,16 @@ impl Snapshot { .observe(cf_stat.key_count as f64); SNAPSHOT_CF_SIZE .get(*cf_enum) - .observe(cf_stat.total_size as f64); + .observe(cf_stat.total_kv_size as f64); info!( "scan snapshot of one cf"; "region_id" => region.get_id(), "snapshot" => self.path(), "cf" => cf, "key_count" => cf_stat.key_count, - "size" => cf_stat.total_size, + "size" => cf_stat.total_kv_size, + "sst_size" => cf_stat.total_sst_size, + "plain_size" => cf_stat.total_plain_size, ); } diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 1d1597e95e2..03bc6caee1e 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -9,8 +9,8 @@ use std::{ use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter, Iv}; use engine_traits::{ - CfName, Error as EngineError, IterOptions, Iterable, Iterator, KvEngine, Mutable, RefIterable, - SstCompressionType, SstReader, SstWriter, SstWriterBuilder, WriteBatch, + CfName, Error as EngineError, ExternalSstFileInfo, IterOptions, Iterable, Iterator, KvEngine, + Mutable, RefIterable, SstCompressionType, SstReader, SstWriter, SstWriterBuilder, WriteBatch, }; use fail::fail_point; #[cfg(not(test))] @@ -36,10 +36,24 @@ pub trait StaleDetector { fn is_stale(&self) -> bool; } +/// Statistics for tracking the process of building SST files. #[derive(Clone, Copy, Default)] pub struct BuildStatistics { + /// The total number of keys processed during the build. pub key_count: usize, - pub total_size: usize, + + /// The total size (in bytes) of key-value pairs processed. + /// This represents the combined size of keys and values before any + /// compression. + pub total_kv_size: usize, + + /// The total size (in bytes) of the generated SST files after compression. + /// This reflects the on-disk size of the output files. + pub total_sst_size: usize, + + /// The total size (in bytes) of the raw data in plain text format. + /// This represents the uncompressed size of the CF_LOCK data. + pub total_plain_size: usize, } /// Build a snapshot file for the given column family in plain format. @@ -88,7 +102,7 @@ where let mut stats = BuildStatistics::default(); box_try!(snap.scan(cf, start_key, end_key, false, |key, value| { stats.key_count += 1; - stats.total_size += key.len() + value.len(); + stats.total_kv_size += key.len() + value.len(); box_try!(BytesEncoder::encode_compact_bytes(&mut writer, key)); box_try!(BytesEncoder::encode_compact_bytes(&mut writer, value)); Ok(true) @@ -103,6 +117,8 @@ where encrypted_file.unwrap().finalize().unwrap() }; box_try!(file.sync_all()); + let metadata = box_try!(file.metadata()); + stats.total_plain_size += metadata.len() as usize; } else { drop(file); box_try!(fs::remove_file(path)); @@ -163,8 +179,8 @@ where let finish_sst_writer = |sst_writer: E::SstWriter, path: String, key_mgr: Option>| - -> Result<(), Error> { - sst_writer.finish()?; + -> Result { + let info = sst_writer.finish()?; (|| { fail_point!("inject_sst_file_corruption", |_| { static CALLED: std::sync::atomic::AtomicBool = @@ -199,7 +215,7 @@ where return Err(io::Error::new(io::ErrorKind::InvalidData, e).into()); } File::open(&path).and_then(|f| f.sync_all())?; - Ok(()) + Ok(info.file_size()) }; let instant = Instant::now(); @@ -209,7 +225,7 @@ where IoType::Replication }); let mut prev_io_bytes = get_thread_io_bytes_stats().unwrap(); - let mut next_io_check_size = stats.total_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; + let mut next_io_check_size = stats.total_kv_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; let handle_read_io_usage = |prev_io_bytes: &mut IoBytes, remained_quota: &mut usize| { let cur_io_bytes = get_thread_io_bytes_stats().unwrap(); let read_delta = (cur_io_bytes.read - prev_io_bytes.read) as usize; @@ -239,7 +255,9 @@ where match result { Ok(new_sst_writer) => { let old_writer = sst_writer.replace(new_sst_writer); - box_try!(finish_sst_writer(old_writer, prev_path, key_mgr.clone())); + stats.total_sst_size += + box_try!(finish_sst_writer(old_writer, prev_path, key_mgr.clone())) + as usize; } Err(e) => { let io_error = io::Error::new(io::ErrorKind::Other, e); @@ -249,13 +267,13 @@ where } stats.key_count += 1; - stats.total_size += entry_len; + stats.total_kv_size += entry_len; - if stats.total_size >= next_io_check_size { + if stats.total_kv_size >= next_io_check_size { // TODO(@hhwyt): Consider incorporating snapshot file write I/O into the // limiting mechanism. handle_read_io_usage(&mut prev_io_bytes, &mut remained_quota); - next_io_check_size = stats.total_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; + next_io_check_size = stats.total_kv_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; } if let Err(e) = sst_writer.borrow_mut().put(key, value) { @@ -270,14 +288,16 @@ where handle_read_io_usage(&mut prev_io_bytes, &mut remained_quota); if stats.key_count > 0 { - box_try!(finish_sst_writer(sst_writer.into_inner(), path, key_mgr)); + stats.total_sst_size += + box_try!(finish_sst_writer(sst_writer.into_inner(), path, key_mgr)) as usize; cf_file.add_file(file_id); info!( - "build_sst_cf_file_list builds {} files in cf {}. Total keys {}, total size {}. raw_size_per_file {}, total takes {:?}", + "build_sst_cf_file_list builds {} files in cf {}. Total keys {}, total kv size {}, total sst size {}. raw_size_per_file {}, total takes {:?}", file_id + 1, cf, stats.key_count, - stats.total_size, + stats.total_kv_size, + stats.total_sst_size, raw_size_per_file, instant.saturating_elapsed(), ); @@ -695,7 +715,7 @@ mod tests { true, ) .unwrap(); - assert_eq!(stats.total_size, 11890); + assert_eq!(stats.total_kv_size, 11890); assert!(start.saturating_elapsed_secs() > 1_f64); } } diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 95b915d6173..889015b75c6 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -4095,6 +4095,15 @@ def Snapshot() -> RowPanel: legend_format="clean-region-by-{{type}}", additional_groupby=True, ), + target( + expr=expr_sum_delta( + "tikv_server_snapshot_task_total", + range_selector="1m", + by_labels=["type"], + ), + legend_format="{{type}}", + additional_groupby=True, + ), ], ), graph_panel( @@ -4106,13 +4115,14 @@ def Snapshot() -> RowPanel: expr=expr_sum_rate( "tikv_snapshot_limit_transport_bytes", by_labels=["instance", "type"], - ), + ) ), target( expr=expr_sum_rate( "tikv_snapshot_limit_generate_bytes", + by_labels=["instance", "type"], ), - legend_format="{{instance}}-generate", + legend_format="{{instance}}-generate-{{type}}", ), ], ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b0f8311ac6e..cd155391f28 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -59372,6 +59372,21 @@ "refId": "", "step": 10, "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_server_snapshot_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, $additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}} {{$additional_groupby}}", + "metric": "", + "query": "sum(delta(\n tikv_server_snapshot_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, $additional_groupby) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], @@ -59508,15 +59523,15 @@ }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-generate", + "legendFormat": "{{instance}}-generate-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 0c7f268dfa0..06a3aec823e 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -e93faab944914bbca21c74daee0223604dd57ba37115c7239d8b707468d5d8db ./metrics/grafana/tikv_details.json +d5cfc7548e0cf9b54b7d1b8615033e6315a53e25f482c8202cf488b183c38470 ./metrics/grafana/tikv_details.json From 58264c108eebb09834c31b7734c5ba194e93194f Mon Sep 17 00:00:00 2001 From: hhwyt Date: Tue, 17 Dec 2024 17:18:36 +0800 Subject: [PATCH 55/86] raftstore: add an error-tolerant IoBytesTracker to calculate delta I/O consumption (#18006) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref tikv/tikv#15990 This PR fixes improper error handling in calls to get_thread_io_bytes_total and refactors related code for clarity. get_thread_io_bytes_total can fail in two scenarios: 1. LOCAL_IO_STATS is not initialized. 2. Errors occur during ThreadId::fetch_io_bytes(), such as failing to open files. Previously: • In io.rs, calls to get_thread_io_bytes_total did not handle the second type of error. • In future.rs, error handling was implemented but not abstracted into a reusable utility. This PR introduces IoBytesTracker, an error-tolerant utility. It starts calculating incremental I/O bytes only after the first successful initialization of fetch_io_bytes. Any I/O bytes consumed before initialization are intentionally ignored. This approach avoids larger inaccuracies by discarding potentially unreliable data. Since io_bytes_total is a thread-local cumulative metric, failures before the start of the statistical logic can result in a falsely underestimated initial value, which may lead to inaccurate delta calculations. Signed-off-by: hhwyt --- Cargo.lock | 1 + Cargo.toml | 1 + components/file_system/Cargo.toml | 3 + components/file_system/src/file.rs | 2 +- components/file_system/src/lib.rs | 174 +++++++++++++++++++++- components/raftstore/Cargo.toml | 2 +- components/raftstore/src/store/snap/io.rs | 52 +++---- components/resource_control/Cargo.toml | 2 +- components/resource_control/src/future.rs | 79 +++------- 9 files changed, 215 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 713ac614a87..98640ae39c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2399,6 +2399,7 @@ dependencies = [ "collections", "crc32fast", "crossbeam-utils", + "fail", "fs2", "lazy_static", "libc 0.2.151", diff --git a/Cargo.toml b/Cargo.toml index 61a577cbe92..979bd8598d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ testexport = [ "engine_rocks/testexport", "engine_panic/testexport", "encryption/testexport", + "file_system/testexport" ] test-engine-kv-rocksdb = ["engine_test/test-engine-kv-rocksdb"] test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index bcfa7221825..9e43bf63596 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -7,11 +7,14 @@ license = "Apache-2.0" [features] bcc-iosnoop = ["bcc"] +failpoints = ["fail/failpoints"] +testexport = [] [dependencies] collections = { workspace = true } crc32fast = "1.2" crossbeam-utils = { workspace = true } +fail = "0.5" fs2 = "0.4" lazy_static = "1.3" libc = "0.2" diff --git a/components/file_system/src/file.rs b/components/file_system/src/file.rs index c072b8f852f..76d662464f8 100644 --- a/components/file_system/src/file.rs +++ b/components/file_system/src/file.rs @@ -254,7 +254,7 @@ impl OpenOptionsExt for OpenOptions { mod tests { use tempfile::Builder; - use super::{super::*, *}; + use super::super::*; #[test] fn test_instrumented_file() { diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 48a7e59d447..f9240e998f6 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -5,19 +5,13 @@ #[macro_use] extern crate lazy_static; - #[cfg(test)] extern crate test; - #[allow(unused_extern_crates)] extern crate tikv_alloc; -mod file; -mod io_stats; -mod metrics; -mod metrics_manager; -mod rate_limiter; - +#[cfg(any(test, feature = "testexport"))] +use std::cell::Cell; pub use std::{ convert::TryFrom, fs::{ @@ -54,6 +48,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use strum::{EnumCount, EnumIter}; use tokio::io::{AsyncRead, ReadBuf}; +mod file; +mod io_stats; +mod metrics; +mod metrics_manager; +mod rate_limiter; + #[derive(Clone, Copy, Debug, PartialEq)] pub enum IoOp { Read, @@ -143,6 +143,122 @@ impl std::ops::AddAssign for IoBytes { } } +#[cfg(not(any(test, feature = "testexport")))] +fn get_thread_io_bytes_stats() -> Result { + get_thread_io_bytes_total() +} + +/// Simulates getting the IO bytes stats for the current thread in test +/// scenarios. +/// +/// This function retrieves the thread-local IO stats and adds the current +/// mock delta values (both read and write) to it. The mock delta is updated +/// on each invocation, simulating incremental IO operations. This is useful +/// for testing scenarios where the IO stats change over time. +#[cfg(any(test, feature = "testexport"))] +fn get_thread_io_bytes_stats() -> Result { + fail::fail_point!("failed_to_get_thread_io_bytes_stats", |_| { + Err("get_thread_io_bytes_total failed".into()) + }); + thread_local! { + static TOTAL_BYTES: Cell = Cell::new(IoBytes::default()); + } + TOTAL_BYTES.with(|stats| { + let mut current_stats = stats.get(); + + // Add the mock IO bytes to the stats. + current_stats.read += (|| { + fail::fail_point!("delta_read_io_bytes", |d| d + .unwrap() + .parse::() + .unwrap()); + 0 + })(); + current_stats.write += (|| { + fail::fail_point!("delta_write_io_bytes", |d| d + .unwrap() + .parse::() + .unwrap()); + 0 + })(); + + stats.set(current_stats); + Ok(current_stats) + }) +} + +/// A utility struct to track I/O bytes with error-tolerant initialization. +/// +/// This struct is used to compute the delta (difference) of I/O bytes between +/// successive calls to `get_thread_io_bytes_total`. It handles cases where the +/// first call to `get_thread_io_bytes_total` may fail by ignoring I/O bytes +/// until a successful value is obtained. +/// +/// Detail explanation: +/// 1. On the first successful call to `get_thread_io_bytes_total`, the value is +/// treated as the initial baseline. +/// 2. If `get_thread_io_bytes_total` fails initially, all I/O bytes before a +/// successful call are ignored. +/// 3. Once initialized, this struct calculates the delta between successive +/// values from `get_thread_io_bytes_total`. +pub struct IoBytesTracker { + // A flag indicating whether the tracker has been successfully initialized. + initialized: bool, + // Stores the previous successfully fetched I/O bytes. Used to calculate deltas. + prev_io_bytes: IoBytes, +} +impl IoBytesTracker { + /// Creates a new `IoBytesTracker` and attempts to initialize it. + /// + /// If `get_thread_io_bytes_total` succeeds during initialization, + /// the tracker is marked as initialized and ready to compute deltas. + /// Otherwise, it will defer initialization until the next successful + /// `update`. + pub fn new() -> Self { + let mut tracker = IoBytesTracker { + initialized: false, + prev_io_bytes: IoBytes::default(), + }; + tracker.update(); // Attempt to initialize immediately + tracker + } + + /// Update the tracker with the current I/O bytes. + /// If initialization failed previously, it will initialize on the first + /// successful fetch. Returns the delta of I/O bytes if initialized, + /// otherwise returns None. + pub fn update(&mut self) -> Option { + match get_thread_io_bytes_stats() { + Ok(current_io_bytes) => { + if self.initialized { + let read_delta = current_io_bytes.read - self.prev_io_bytes.read; + let write_delta = current_io_bytes.write - self.prev_io_bytes.write; + self.prev_io_bytes = current_io_bytes; + Some(IoBytes { + read: read_delta, + write: write_delta, + }) + } else { + // Initialize on the first successful fetch + self.prev_io_bytes = current_io_bytes; + self.initialized = true; + None // No delta to report yet + } + } + Err(_) => { + // Skip updates if the current fetch fails + None + } + } + } +} + +impl Default for IoBytesTracker { + fn default() -> Self { + Self::new() + } +} + #[repr(u32)] #[derive(Debug, Clone, PartialEq, Copy, EnumCount)] pub enum IoPriority { @@ -656,4 +772,46 @@ mod tests { reserve_space_for_recover(data_path, 0).unwrap(); assert!(!file.exists()); } + + #[test] + fn test_io_bytes_tracker_normal() { + #[cfg(not(feature = "failpoints"))] + return; + + fail::cfg("delta_read_io_bytes", "return(100)").unwrap(); + fail::cfg("delta_write_io_bytes", "return(50)").unwrap(); + let mut io_tracker = IoBytesTracker::new(); + assert_eq!(io_tracker.prev_io_bytes.read, 100); + assert_eq!(io_tracker.prev_io_bytes.write, 50); + assert_eq!(io_tracker.initialized, true); + let io_bytes = io_tracker.update(); + assert_eq!(io_bytes.unwrap().read, 100); + assert_eq!(io_bytes.unwrap().write, 50); + assert_eq!(io_tracker.prev_io_bytes.read, 200); + assert_eq!(io_tracker.prev_io_bytes.write, 100); + } + + #[test] + fn test_io_bytes_tracker_initialization_failure() { + #[cfg(not(feature = "failpoints"))] + return; + + fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); + fail::cfg("delta_read_io_bytes", "return(100)").unwrap(); + fail::cfg("delta_write_io_bytes", "return(50)").unwrap(); + + let mut io_tracker = IoBytesTracker::new(); + assert_eq!(io_tracker.initialized, false); + assert_eq!(io_tracker.prev_io_bytes.read, 0); + assert_eq!(io_tracker.prev_io_bytes.write, 0); + let io_bytes = io_tracker.update(); + assert!(io_bytes.is_none()); + assert_eq!(io_tracker.prev_io_bytes.read, 100); + assert_eq!(io_tracker.prev_io_bytes.write, 50); + let io_bytes = io_tracker.update(); + assert_eq!(io_bytes.unwrap().read, 100); + assert_eq!(io_bytes.unwrap().write, 50); + assert_eq!(io_tracker.prev_io_bytes.read, 200); + assert_eq!(io_tracker.prev_io_bytes.write, 100); + } } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 3f4cdd961d4..ce017f9330f 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -36,7 +36,7 @@ engine_rocks = { workspace = true, optional = true } engine_traits = { workspace = true } error_code = { workspace = true } fail = "0.5" -file_system = { workspace = true } +file_system = { workspace = true, features = ["testexport"] } futures = "0.3" futures-util = { version = "0.3.1", default-features = false, features = [ "io", diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 03bc6caee1e..74a5739f2c6 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -13,9 +13,7 @@ use engine_traits::{ Mutable, RefIterable, SstCompressionType, SstReader, SstWriter, SstWriterBuilder, WriteBatch, }; use fail::fail_point; -#[cfg(not(test))] -use file_system::get_thread_io_bytes_total; -use file_system::{File, IoBytes, IoType, OpenOptions, WithIoType}; +use file_system::{File, IoBytesTracker, IoType, OpenOptions, WithIoType}; use kvproto::encryptionpb::EncryptionMethod; use tikv_util::{ box_try, @@ -127,25 +125,6 @@ where Ok(stats) } -#[cfg(not(test))] -fn get_thread_io_bytes_stats() -> Result { - get_thread_io_bytes_total() -} - -#[cfg(test)] -fn get_thread_io_bytes_stats() -> Result { - use std::cell::Cell; - thread_local! { - static TOTAL_BYTES: Cell = Cell::new(IoBytes::default()); - } - let mut new_bytes = TOTAL_BYTES.get(); - // We use 2 as the factor because the compression ratio of SST files with the - // zstd algorithm is empirically around 2x. - new_bytes.read += SCAN_BYTES_PER_IO_LIMIT_CHECK as u64 / 2; - TOTAL_BYTES.set(new_bytes); - Ok(new_bytes) -} - /// Build a snapshot file for the given column family in sst format. /// If there are no key-value pairs fetched, no files will be created at `path`, /// otherwise the file will be created and synchronized. @@ -224,18 +203,17 @@ where } else { IoType::Replication }); - let mut prev_io_bytes = get_thread_io_bytes_stats().unwrap(); - let mut next_io_check_size = stats.total_kv_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; - let handle_read_io_usage = |prev_io_bytes: &mut IoBytes, remained_quota: &mut usize| { - let cur_io_bytes = get_thread_io_bytes_stats().unwrap(); - let read_delta = (cur_io_bytes.read - prev_io_bytes.read) as usize; - while read_delta > *remained_quota { - io_limiter.blocking_consume(IO_LIMITER_CHUNK_SIZE); - *remained_quota += IO_LIMITER_CHUNK_SIZE; + let mut io_tracker = IoBytesTracker::new(); + let mut next_io_check_size = stats.total_kv_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; + let handle_read_io_usage = |io_tracker: &mut IoBytesTracker, remained_quota: &mut usize| { + if let Some(io_bytes_delta) = io_tracker.update() { + while io_bytes_delta.read as usize > *remained_quota { + io_limiter.blocking_consume(IO_LIMITER_CHUNK_SIZE); + *remained_quota += IO_LIMITER_CHUNK_SIZE; + } + *remained_quota -= io_bytes_delta.read as usize; } - *remained_quota -= read_delta; - *prev_io_bytes = cur_io_bytes; }; box_try!(snap.scan(cf, start_key, end_key, false, |key, value| { @@ -272,7 +250,7 @@ where if stats.total_kv_size >= next_io_check_size { // TODO(@hhwyt): Consider incorporating snapshot file write I/O into the // limiting mechanism. - handle_read_io_usage(&mut prev_io_bytes, &mut remained_quota); + handle_read_io_usage(&mut io_tracker, &mut remained_quota); next_io_check_size = stats.total_kv_size + SCAN_BYTES_PER_IO_LIMIT_CHECK; } @@ -285,7 +263,7 @@ where })); // Handle the IO generated by the remaining key-value pairs less than // SCAN_BYTES_PER_IO_LIMIT_CHECK. - handle_read_io_usage(&mut prev_io_bytes, &mut remained_quota); + handle_read_io_usage(&mut io_tracker, &mut remained_quota); if stats.key_count > 0 { stats.total_sst_size += @@ -689,8 +667,12 @@ mod tests { // exceed 1 second. #[test] fn test_build_sst_with_io_limiter() { + #[cfg(not(feature = "failpoints"))] + return; + let dir = Builder::new().prefix("test-io-limiter").tempdir().unwrap(); let db = open_test_db_with_nkeys(dir.path(), None, None, 1000).unwrap(); + // The max throughput is 8000 bytes/sec. let bytes_per_sec = 8000_f64; let limiter = Limiter::new(bytes_per_sec); let snap_dir = Builder::new().prefix("snap-dir").tempdir().unwrap(); @@ -703,6 +685,7 @@ mod tests { }; let start = Instant::now(); + fail::cfg("delta_read_io_bytes", "return(4096)").unwrap(); let stats = build_sst_cf_file_list::( &mut cf_file, &db, @@ -716,6 +699,7 @@ mod tests { ) .unwrap(); assert_eq!(stats.total_kv_size, 11890); + // Must exceed 1 second! assert!(start.saturating_elapsed_secs() > 1_f64); } } diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 1c8ca387483..6ce694387a0 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -13,7 +13,7 @@ collections = { workspace = true } crossbeam = { workspace = true } dashmap = "5.1" fail = "0.5" -file_system = { workspace = true } +file_system = { workspace = true, features = ["testexport"] } futures = { version = "0.3", features = ["compat"] } kvproto = { workspace = true } lazy_static = "1.0" diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 31af006b21d..c115fcffef4 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -8,7 +8,7 @@ use std::{ time::Duration, }; -use file_system::IoBytes; +use file_system::{IoBytes, IoBytesTracker}; use futures::compat::{Compat01As03, Future01CompatExt}; use pin_project::pin_project; use tikv_util::{time::Instant, timer::GLOBAL_TIMER_HANDLE, warn}; @@ -54,29 +54,6 @@ impl Future for ControlledFuture { } } -#[cfg(not(test))] -fn get_thread_io_bytes_stats() -> Result { - file_system::get_thread_io_bytes_total() -} - -#[cfg(test)] -fn get_thread_io_bytes_stats() -> Result { - use std::cell::Cell; - - fail::fail_point!("failed_to_get_thread_io_bytes_stats", |_| { - Err("get_thread_io_bytes_total failed".into()) - }); - thread_local! { - static TOTAL_BYTES: Cell = Cell::new(IoBytes::default()); - } - - let mut new_bytes = TOTAL_BYTES.get(); - new_bytes.read += 100; - new_bytes.write += 50; - TOTAL_BYTES.set(new_bytes); - Ok(new_bytes) -} - // `LimitedFuture` wraps a Future with ResourceLimiter, it will automically // statistics the cpu time and io bytes consumed by the future, and do async // waiting according the configuration of the ResourceLimiter. @@ -142,36 +119,23 @@ impl Future for LimitedFuture { } // get io stats is very expensive, so we only do so if only io control is // enabled. - let mut last_io_bytes = None; - if this + let mut io_tracker = if this .resource_limiter .get_limiter(ResourceType::Io) .get_rate_limit() .is_finite() { - match get_thread_io_bytes_stats() { - Ok(b) => { - last_io_bytes = Some(b); - } - Err(e) => { - warn!("load thread io bytes failed"; "err" => e); - } - } - } + Some(IoBytesTracker::new()) + } else { + None + }; let start = Instant::now(); let res = this.f.poll(cx); let dur = start.saturating_elapsed(); - let io_bytes = if let Some(last_io_bytes) = last_io_bytes { - match get_thread_io_bytes_stats() { - Ok(io_bytes) => io_bytes - last_io_bytes, - Err(e) => { - warn!("load thread io bytes failed"; "err" => e); - IoBytes::default() - } - } - } else { - IoBytes::default() - }; + let io_bytes = io_tracker + .as_mut() + .and_then(|tracker| tracker.update()) + .unwrap_or_else(IoBytes::default); let mut wait_dur = this .resource_limiter .consume(dur, io_bytes, res.is_pending()); @@ -280,6 +244,9 @@ mod tests { #[test] fn test_limited_future() { + #[cfg(not(feature = "failpoints"))] + return; + let pool = YatpPoolBuilder::new(DefaultTicker::default()) .thread_count(1, 1, 1) .name_prefix("test") @@ -304,6 +271,9 @@ mod tests { receiver.recv().unwrap(); } + fail::cfg("delta_read_io_bytes", "return(100)").unwrap(); + fail::cfg("delta_write_io_bytes", "return(50)").unwrap(); + let mut i = 0; let mut stats: GroupStatistics; // consume the remain free limit quota. @@ -331,15 +301,12 @@ mod tests { ); // fetch io bytes failed, consumed value is 0. - #[cfg(feature = "failpoints")] - { - fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); - spawn_and_wait(&pool, empty(), resource_limiter.clone()); - assert_eq!( - resource_limiter.get_limit_statistics(Io).total_consumed, - new_stats.total_consumed - ); - fail::remove("failed_to_get_thread_io_bytes_stats"); - } + fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); + spawn_and_wait(&pool, empty(), resource_limiter.clone()); + assert_eq!( + resource_limiter.get_limit_statistics(Io).total_consumed, + new_stats.total_consumed + ); + fail::remove("failed_to_get_thread_io_bytes_stats"); } } From c970d9123ecc3afb51a8fcdb2ef07756ac99f5c1 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 17 Dec 2024 19:27:30 +0800 Subject: [PATCH 56/86] in_memory_engine: evict region on rollback merge event (#18012) close tikv/tikv#18008 If region is load after PrepareMerge(e.g. due to hot region load if threshold is set too low), and then the region merge is rollbacked, target region should either be evicted or update to newer epoch version. This PR choose evict on rollback merge event for simplicity. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/observer/load_eviction.rs | 4 +- .../failpoints/cases/test_in_memory_engine.rs | 100 +++++++++++++++++- 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/components/hybrid_engine/src/observer/load_eviction.rs b/components/hybrid_engine/src/observer/load_eviction.rs index cc2e12aacd4..8a3728a0618 100644 --- a/components/hybrid_engine/src/observer/load_eviction.rs +++ b/components/hybrid_engine/src/observer/load_eviction.rs @@ -78,7 +78,9 @@ impl LoadEvictionObserver { || (state.modified_region.is_some() && matches!( cmd.request.get_admin_request().get_cmd_type(), - AdminCmdType::PrepareMerge | AdminCmdType::CommitMerge + AdminCmdType::PrepareMerge + | AdminCmdType::CommitMerge + | AdminCmdType::RollbackMerge )) { let cache_region = CacheRegion::from_region(ctx.region()); diff --git a/tests/failpoints/cases/test_in_memory_engine.rs b/tests/failpoints/cases/test_in_memory_engine.rs index 13ef4c5cbfa..11915e83836 100644 --- a/tests/failpoints/cases/test_in_memory_engine.rs +++ b/tests/failpoints/cases/test_in_memory_engine.rs @@ -19,7 +19,7 @@ use kvproto::{ import_sstpb::SstMeta, kvrpcpb::Context, raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, RaftRequestHeader, Request}, - raft_serverpb::RaftMessage, + raft_serverpb::{PeerState, RaftMessage}, }; use pd_client::PdClient; use protobuf::Message; @@ -36,8 +36,9 @@ use test_coprocessor::{ handle_request, init_data_with_details_pd_client, DagChunkSpliter, DagSelect, ProductTable, }; use test_raftstore::{ - get_tso, new_learner_peer, new_peer, new_put_cf_cmd, new_server_cluster_with_hybrid_engine, - CloneFilterFactory, Cluster, Direction, RegionPacketFilter, ServerCluster, + configure_for_merge, get_tso, must_get_equal, new_learner_peer, new_peer, new_put_cf_cmd, + new_server_cluster_with_hybrid_engine, CloneFilterFactory, Cluster, Direction, + RegionPacketFilter, ServerCluster, }; use test_util::eventually; use tidb_query_datatype::{ @@ -1059,3 +1060,96 @@ fn test_eviction_when_destroy_uninitialized_peer() { pd_client.must_add_peer(region.get_id(), learner2.clone()); cluster.must_region_exist(region.get_id(), 2); } + +// IME should also handle RollbackMerge event, we also try to evict the region +// on merge rollback for simplicity. If region is loaded after PrepareMerge and +// the merge is rollbacked, IME should track this rollback because it will also +// change epoch version. +#[test] +fn test_region_rollback_merge() { + let mut cluster = new_server_cluster_with_hybrid_engine(0, 3); + configure_for_merge(&mut cluster.cfg); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run_conf_change(); + + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + + pd_client.must_add_peer(left.get_id(), new_peer(2, 2)); + pd_client.must_add_peer(right.get_id(), new_peer(2, 4)); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let mut region = pd_client.get_region(b"k1").unwrap(); + let target_region = pd_client.get_region(b"k3").unwrap(); + + let schedule_merge_fp = "on_schedule_merge"; + fail::cfg(schedule_merge_fp, "return()").unwrap(); + + let (tx, rx) = sync_channel(1); + fail::cfg_callback("on_apply_res_prepare_merge", move || { + tx.send(()).unwrap(); + }) + .unwrap(); + + cluster.merge_region(region.get_id(), target_region.get_id(), Callback::None); + // PrepareMerge is applied. + rx.recv().unwrap(); + + let leader = cluster.leader_of_region(left.get_id()).unwrap(); + + let region_cache_engine = cluster.sim.rl().get_region_cache_engine(leader.store_id); + + // After prepare merge, version becomes 2 + 1 = 3; + region.mut_region_epoch().set_version(3); + // load region after PrepareMerge. + { + let cache_region = CacheRegion::from_region(®ion); + region_cache_engine + .core() + .region_manager() + .new_region(cache_region); + } + + // Add a peer to trigger rollback. + pd_client.must_add_peer(right.get_id(), new_peer(3, 5)); + cluster.must_put(b"k4", b"v4"); + must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); + + let mut region = pd_client.get_region(b"k1").unwrap(); + // After split and prepare_merge, version becomes 1 + 2 = 3; + assert_eq!(region.get_region_epoch().get_version(), 3); + // After ConfChange and prepare_merge, conf version becomes 1 + 2 = 3; + assert_eq!(region.get_region_epoch().get_conf_ver(), 3); + fail::remove(schedule_merge_fp); + // Wait till rollback. + cluster.must_put(b"k11", b"v11"); + + // After rollback, version becomes 3 + 1 = 4; + region.mut_region_epoch().set_version(4); + for i in 1..3 { + must_get_equal(&cluster.get_engine(i), b"k11", b"v11"); + let state = cluster.region_local_state(region.get_id(), i); + assert_eq!(state.get_state(), PeerState::Normal); + assert_eq!(*state.get_region(), region); + } + + // after rollback, IME cached region is evicted. + test_util::eventually( + Duration::from_millis(10), + Duration::from_millis(1000), + || { + let region_map = region_cache_engine + .core() + .region_manager() + .regions_map() + .read(); + region_map.regions().is_empty() + }, + ); +} From 6b78e092ec6cd4bd149f549d2b3ee902391e5cd9 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 17 Dec 2024 20:01:39 +0800 Subject: [PATCH 57/86] raftstore: do not skip handling raft command (#18013) close tikv/tikv#18005 Do not skip handling raft command when peer fsm stopped. RaftCommand should always be handled or they will cause panic. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 6 ++++-- tests/failpoints/cases/test_merge.rs | 23 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 11238f3c698..54cef0ab58f 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -672,8 +672,10 @@ where // skip handling remain messages if fsm is destroyed. This can aviod handling // arbitary messages(e.g. CasualMessage::ForceCompactRaftLogs) that may need // to read raft logs which maybe lead to panic. - if self.fsm.stopped { - break; + // We do not skip RaftCommand because raft commond callback should always be + // handled or it will cause panic. + if self.fsm.stopped && !matches!(&m, PeerMsg::RaftCommand(_)) { + continue; } distribution[m.discriminant()] += 1; match m { diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 502fce4d727..50fd6d68e41 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -14,6 +14,7 @@ use engine_traits::{Peekable, CF_RAFT}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, + raft_cmdpb::{self, RaftCmdRequest}, raft_serverpb::{PeerState, RaftMessage, RegionLocalState}, tikvpb::TikvClient, }; @@ -2305,6 +2306,25 @@ fn test_raft_log_gc_after_merge() { rx.recv_timeout(Duration::from_secs(1)).unwrap(); let raft_router = cluster.get_router(1).unwrap(); + + // send a raft cmd to test when peer fsm is closed, this cmd will still be + // handled. + let cmd = { + let mut cmd = RaftCmdRequest::default(); + let mut req = raft_cmdpb::Request::default(); + req.set_read_index(raft_cmdpb::ReadIndexRequest::default()); + cmd.mut_requests().push(req); + cmd.mut_header().region_id = 1; + cmd + }; + let (tx, rx) = std::sync::mpsc::channel(); + let callback = Callback::read(Box::new(move |req| { + tx.send(req).unwrap(); + })); + let cmd_req = RaftCommand::new(cmd, callback); + raft_router.send_raft_command(cmd_req).unwrap(); + + // send a casual msg that can trigger panic after peer fms closed. raft_router .send_casual_msg(1, CasualMessage::ForceCompactRaftLogs) .unwrap(); @@ -2314,4 +2334,7 @@ fn test_raft_log_gc_after_merge() { // wait some time for merge finish. std::thread::sleep(Duration::from_secs(1)); must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); + + let resp = rx.recv().unwrap(); + assert!(resp.response.get_header().has_error()); } From 5c4a575faadb976616813028609550185afb5971 Mon Sep 17 00:00:00 2001 From: Hangjie Mo Date: Tue, 17 Dec 2024 21:19:09 +0800 Subject: [PATCH 58/86] tidb_query_expr: fix round function with real type (#18009) close tikv/tikv#18010 Signed-off-by: Hangjie Mo Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tidb_query_expr/src/impl_math.rs | 34 ++++++++++++++++++--- components/tidb_query_expr/src/lib.rs | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index 41d8a625816..1ce86f10193 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -411,7 +411,7 @@ pub fn conv(n: BytesRef, from_base: &Int, to_base: &Int) -> Result #[inline] #[rpn_fn] pub fn round_real(arg: &Real) -> Result> { - Ok(Real::new(arg.round()).ok()) + Ok(Real::new(arg.round_ties_even()).ok()) } #[inline] @@ -551,9 +551,12 @@ fn round_with_frac_dec(arg0: &Decimal, arg1: &Int) -> Result> { pub fn round_with_frac_real(arg0: &Real, arg1: &Int) -> Result> { let number = arg0; let digits = arg1; - let power = 10.0_f64.powi(-digits as i32); - let frac = *number / power; - Ok(Some(Real::new(frac.round() * power).unwrap())) + let power = 10.0_f64.powi(*digits as i32); + let frac = *number * power; + if frac.is_infinite() { + return Ok(Some(*number)); + } + Ok(Some(Real::new(frac.round_ties_even() / power).unwrap())) } thread_local! { @@ -1702,6 +1705,14 @@ mod tests { Some(Real::new(-3.12_f64).unwrap()), Some(Real::new(-3f64).unwrap()), ), + ( + Some(Real::new(-3.5_f64).unwrap()), + Some(Real::new(-4f64).unwrap()), + ), + ( + Some(Real::new(-4.5_f64).unwrap()), + Some(Real::new(-4f64).unwrap()), + ), ( Some(Real::new(f64::MAX).unwrap()), Some(Real::new(f64::MAX).unwrap()), @@ -2072,6 +2083,21 @@ mod tests { Some(-1), Some(Real::new(20.0_f64).unwrap()), ), + ( + Some(Real::new(0.95_f64).unwrap()), + Some(1), + Some(Real::new(1.0_f64).unwrap()), + ), + ( + Some(Real::new(1.05_f64).unwrap()), + Some(1), + Some(Real::new(1.0_f64).unwrap()), + ), + ( + Some(Real::new(1.05_f64).unwrap()), + Some(1000000), + Some(Real::new(1.05_f64).unwrap()), + ), (Some(Real::new(23.298_f64).unwrap()), None, None), (None, Some(2), None), (None, None, None), diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 06df1084b82..55cb4601f54 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -14,6 +14,7 @@ #![feature(specialization)] #![feature(test)] #![feature(const_mut_refs)] +#![feature(round_ties_even)] #[macro_use(box_err, box_try, try_opt)] extern crate tikv_util; From dd1edd061ab87bc42f44ec65f6b371dd1410acee Mon Sep 17 00:00:00 2001 From: Bisheng Huang Date: Wed, 18 Dec 2024 11:33:39 +0800 Subject: [PATCH 59/86] raftstore: properly release snapshot precheck resource after snapshot reception (#17903) close tikv/tikv#17881 Ensures `recving_count` is decremented before releasing the snapshot precheck resource. This prevents a race condition where a new precheck succeeds, but the receiver rejects the snapshot because it fails the `receiving_busy` check. Signed-off-by: Bisheng Huang --- components/raftstore/src/store/snap.rs | 6 ++- src/server/snap.rs | 39 +++++++++++++--- tests/failpoints/cases/test_snap.rs | 62 +++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 9 deletions(-) diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 72b3ce1bf40..bc2aacb88ae 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1927,7 +1927,11 @@ impl SnapManager { /// recv_snap_complete is part of the snapshot recv precheck process, and /// should be called when a follower finishes receiving a snapshot. pub fn recv_snap_complete(&self, region_id: u64) { - self.core.recv_concurrency_limiter.finish_recv(region_id) + self.core.recv_concurrency_limiter.finish_recv(region_id); + // In tests, the first failpoint can be used to trigger a callback while + // the second failpoint can be used to pause the thread. + fail_point!("post_recv_snap_complete1", region_id == 1, |_| {}); + fail_point!("post_recv_snap_complete2", region_id == 1, |_| {}); } /// Adjusts the capacity of the snapshot receive concurrency limiter to diff --git a/src/server/snap.rs b/src/server/snap.rs index b8b1f5b97d7..fc907026bd6 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -5,7 +5,7 @@ use std::{ io::{Error as IoError, ErrorKind, Read, Write}, pin::Pin, sync::{ - atomic::{AtomicUsize, Ordering}, + atomic::{AtomicU64, AtomicUsize, Ordering}, Arc, }, time::{Duration, Instant as StdInstant}, @@ -369,11 +369,17 @@ fn recv_snap( sink: ClientStreamingSink, snap_mgr: SnapManager, raft_router: R, + recving_count: Arc, ) -> impl Future> { + let region_id = Arc::new(AtomicU64::new(0)); + let region_id_clone = region_id.clone(); + let snap_mgr_clone = snap_mgr.clone(); let recv_task = async move { let mut stream = stream.map_err(Error::from); let head = stream.next().await.transpose()?; let mut context = RecvSnapContext::new(head, &snap_mgr)?; + // Record the region_id for later cleanup. + region_id.store(context.raft_msg.region_id, Ordering::SeqCst); if context.file.is_none() { return context.finish(raft_router); } @@ -403,12 +409,14 @@ fn recv_snap( return Err(e); } } - // Notify the snapshot manager that a snapshot has been received, - // freeing up the associated resource in the concurrency limiter. - snap_mgr.recv_snap_complete(context.raft_msg.region_id); context.finish(raft_router) }; async move { + defer!(cleanup_after_recv( + region_id_clone, + snap_mgr_clone, + recving_count + )); match recv_task.await { Ok(()) => sink.success(Done::default()).await.map_err(Error::from), Err(e) => { @@ -419,6 +427,25 @@ fn recv_snap( } } +// Cleans up resources after snapshot reception. Ensures that the occupied +// resource within the concurrency limiter (used in snapshot precheck) is +// released. +fn cleanup_after_recv( + region_id: Arc, + snap_mgr: SnapManager, + recving_count: Arc, +) { + recving_count.fetch_sub(1, Ordering::SeqCst); + let id = region_id.load(Ordering::SeqCst); + if id != 0 { + // Notify the snapshot manager that a snapshot has been received, + // freeing up the associated resource in the concurrency limiter. Note + // that this should happen after decrementing `recving_count` (see + // #17903). + snap_mgr.recv_snap_complete(id); + } +} + pub struct Runner { env: Arc, snap_mgr: SnapManager, @@ -528,8 +555,8 @@ impl Runnable for Runner { let recving_count = Arc::clone(&self.recving_count); recving_count.fetch_add(1, Ordering::SeqCst); let task = async move { - let result = recv_snap(stream, sink, snap_mgr, raft_router).await; - recving_count.fetch_sub(1, Ordering::SeqCst); + let result = + recv_snap(stream, sink, snap_mgr, raft_router, recving_count).await; if let Err(e) = result { error!("failed to recv snapshot"; "err" => %e); } diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index 526407dff22..9bb3ce672a3 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -676,8 +676,8 @@ fn test_sending_fail_with_net_error() { // need to wait receiver handle the snapshot request sleep_ms(100); - // peer2 can't receive any snapshot, so it doesn't have any key valuse. - // but the receiving_count should be zero if receiving snapshot is failed. + // peer2 can't receive any snapshot, so it doesn't have any key values. + // but the receiving_count should be zero if receiving snapshot failed. let engine2 = cluster.get_engine(2); must_get_none(&engine2, b"k1"); assert_eq!(cluster.get_snap_mgr(2).stats().receiving_count, 0); @@ -1151,3 +1151,61 @@ fn test_snapshot_receiver_busy() { fail::remove("receiving_snapshot_callback"); fail::remove("snap_gen_precheck_failed"); } + +#[test] +fn test_snapshot_receiver_not_busy_after_precheck_is_complete() { + let mut cluster = new_server_cluster(0, 2); + // Test that a snapshot generation is paused when the receiver is busy. To + // trigger the scenario, two regions are set up to send snapshots to the + // same store concurrently while configuring the receiving limit to 1. + cluster.cfg.server.concurrent_recv_snap_limit = 1; + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); + + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer count check. + pd_client.disable_default_operator(); + + let right_region = cluster.run_conf_change(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + // Do a split to create the second region. + let r = cluster.get_region(b"k1"); + cluster.must_split(&r, b"k2"); + // After the split, the keyspace layout looks like this: + // + // k2 (split point) + // │ + // (k1,v1) │ (k3,v3) + // ───────────────────┼────────────────── + // left_region right_region + let left_region = cluster.get_region(b"k1").id; + + // When a snapshot receiver is busy, we want the snapshot generation to + // pause and wait until the receiver becomes available. For the two regions + // in this test, there should only be two snapshot generations in total. + fail::cfg("before_region_gen_snap", "2*print()->panic()").unwrap(); + + // Test flow: + // 1. `right_region` sends its snapshot first. The thread will be paused at the + // `post_recv_snap_complete2` failpoint. + // 2. Before `right_region` is paused, the `post_recv_snap_complete1` failpoint + // callback triggers `left_region` to send its snapshot. + fail::cfg_callback("post_recv_snap_complete1", move || { + pd_client.must_add_peer(left_region, new_peer(2, 1002)); + }) + .unwrap(); + fail::cfg("post_recv_snap_complete2", "pause").unwrap(); + + let pd_client2 = Arc::clone(&cluster.pd_client); + pd_client2.must_add_peer(right_region, new_peer(2, 2)); + // Check that the `left_region` succeeds in sending its snapshot. + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + + // Unblock the `right_region` as well. + fail::remove("post_recv_snap_complete2"); + must_get_equal(&cluster.get_engine(2), b"k3", b"v3"); + + fail::remove("post_recv_snap_complete1"); + fail::remove("before_region_gen_snap"); +} From 066dcfd172364f119c750d823f7ab3ecd4649da3 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 18 Dec 2024 17:02:54 +0800 Subject: [PATCH 60/86] resolve: returns StoreTombstone directly if PD returns `store not found`. (#18023) close tikv/tikv#17875 This PR optimizes the error handling progress when resolving address returns by the response from PD. If the response contains `store xxx not found`, the resolver could directly returns the `StoreTombstone` Error to make the raft-client end the retrying loop quickly. Signed-off-by: lucasliang --- src/server/resolve.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/server/resolve.rs b/src/server/resolve.rs index 013511183e2..01f737d6678 100644 --- a/src/server/resolve.rs +++ b/src/server/resolve.rs @@ -116,10 +116,13 @@ where // after 30 days of deletion. PD returns // "invalid store ID %d, not found" for such store id. // See https://github.com/tikv/pd/blob/v7.3.0/server/grpc_service.go#L777-L780 + // And to avoid repeatedly logging the same errors, it + // can directly return the `StoreTombstone` err. if format!("{:?}", e).contains("not found") { RESOLVE_STORE_COUNTER_STATIC.not_found.inc(); info!("resolve store not found"; "store_id" => store_id); self.router.report_store_maybe_tombstone(store_id); + return Err(Error::StoreTombstone(store_id)); } return Err(box_err!(e)); } @@ -230,6 +233,7 @@ mod tests { use std::{net::SocketAddr, ops::Sub, str::FromStr, sync::Arc, thread, time::Duration}; use collections::HashMap; + use grpcio::{Error as GrpcError, RpcStatus, RpcStatusCode}; use kvproto::metapb; use pd_client::{PdClient, Result}; use tikv_kv::FakeExtension; @@ -244,7 +248,15 @@ mod tests { } impl PdClient for MockPdClient { - fn get_store(&self, _: u64) -> Result { + fn get_store(&self, store_id: u64) -> Result { + if store_id == u64::MAX { + return Err(pd_client::Error::Grpc(GrpcError::RpcFailure( + RpcStatus::with_message( + RpcStatusCode::UNAVAILABLE, + format!("invalid store ID {}, not found", store_id,), + ), + ))); + } if self.store.get_state() == metapb::StoreState::Tombstone { // Simulate the behavior of `get_store` in pd client. return Err(pd_client::Error::StoreTombstone(format!( @@ -305,6 +317,18 @@ mod tests { runner.get_address(0).unwrap_err(); } + #[test] + fn test_resolve_store_with_not_found_err() { + let mut store = new_store(STORE_ADDR, metapb::StoreState::default()); + store.set_id(u64::MAX); + let store_id = store.get_id(); + let runner = new_runner(store); + let result = runner.get_address(store_id).unwrap_err(); + if let Error::StoreTombstone(id) = result { + assert_eq!(store_id, id); + } + } + #[test] fn test_resolve_store_peer_addr() { let mut store = new_store("127.0.0.1:12345", metapb::StoreState::Up); From 7ef4aa9549f136cabdac7e5177f39fe16631b514 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 18 Dec 2024 18:02:33 +0800 Subject: [PATCH 61/86] server: polish the logging when updating disk status. (#18025) ref tikv/tikv#17939 Polish the logging when periodically updating the disk status. Signed-off-by: lucasliang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/server/src/server.rs | 8 +++++--- components/server/src/server2.rs | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 23b5fbc2376..38e711264ee 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1385,6 +1385,10 @@ where let snap_mgr = self.snap_mgr.clone().unwrap(); let reserve_space = disk::get_disk_reserved_space(); let reserve_raft_space = disk::get_raft_disk_reserved_space(); + let need_update_disk_status = reserve_space != 0 || reserve_raft_space != 0; + if !need_update_disk_status { + info!("ignore updating disk status as no reserve space is set"); + } let raft_path = engines.raft.get_engine_path().to_string(); let separated_raft_mount_path = path_in_diff_mount_point(raft_path.as_str(), engines.kv.path()); @@ -1462,9 +1466,7 @@ where ); } // Update disk status if disk space checker is enabled. - if reserve_space == 0 && reserve_raft_space == 0 { - info!("ignore updating disk status as no reserve space is set"); - } else { + if need_update_disk_status { disk::set_disk_status(cur_disk_status); } // Update disk capacity, used size and available size. diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 996146f8a2a..3931d2cc37c 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1171,6 +1171,10 @@ where let snap_mgr = self.snap_mgr.clone().unwrap(); let reserve_space = disk::get_disk_reserved_space(); let reserve_raft_space = disk::get_raft_disk_reserved_space(); + let need_update_disk_status = reserve_space != 0 || reserve_raft_space != 0; + if !need_update_disk_status { + info!("ignore updating disk status as no reserve space is set"); + } let raft_engine = self.engines.as_ref().unwrap().raft_engine.clone(); let tablet_registry = self.tablet_registry.clone().unwrap(); let raft_path = raft_engine.get_engine_path().to_string(); @@ -1252,9 +1256,7 @@ where ); } // Update disk status if disk space checker is enabled. - if reserve_space == 0 && reserve_raft_space == 0 { - info!("ignore updating disk status as no reserve space is set"); - } else { + if need_update_disk_status { disk::set_disk_status(cur_disk_status); } // Update disk capacity, used size and available size. From 678edbe8fa3404c2d9f703031ca621792bf61cb7 Mon Sep 17 00:00:00 2001 From: ekexium Date: Mon, 23 Dec 2024 15:17:45 +0800 Subject: [PATCH 62/86] concurrency_manager: check update_max_ts against a limit (#17917) close tikv/tikv#17916 concurrency_manager: add safety boundary for max_ts updates Add `max_ts_limit` to prevent unreasonable timestamp updates. The limit is synchronized with PD timestamp periodically. Configure via max_ts_allowance_secs and max_ts_sync_interval_secs. Updates from PD bypass this limit. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 80 +- components/backup-stream/src/endpoint.rs | 2 +- components/backup/src/endpoint.rs | 4 +- components/cdc/src/endpoint.rs | 2 +- components/cdc/tests/integrations/test_cdc.rs | 2 +- components/concurrency_manager/Cargo.toml | 15 +- .../benches/update_max_ts.rs | 31 + components/concurrency_manager/src/lib.rs | 548 ++++++- components/error_code/src/coprocessor.rs | 4 +- components/error_code/src/storage.rs | 1 + components/raftstore-v2/src/worker/pd/misc.rs | 13 +- components/raftstore/src/store/worker/pd.rs | 13 +- components/resolved_ts/src/advance.rs | 5 +- components/server/src/server.rs | 44 +- components/server/src/server2.rs | 45 +- components/tikv_util/src/time.rs | 2 +- metrics/grafana/tikv_details.dashboard.py | 19 + metrics/grafana/tikv_details.json | 1376 +++++++++-------- metrics/grafana/tikv_details.json.sha256 | 2 +- src/config/mod.rs | 1 + src/coprocessor/endpoint.rs | 9 +- src/coprocessor/error.rs | 4 + src/server/raftkv/mod.rs | 7 +- src/storage/config.rs | 33 +- src/storage/config_manager.rs | 10 + src/storage/errors.rs | 14 + src/storage/mod.rs | 22 +- src/storage/mvcc/mod.rs | 5 + .../txn/actions/acquire_pessimistic_lock.rs | 4 +- src/storage/txn/actions/prewrite.rs | 18 +- .../txn/commands/check_secondary_locks.rs | 6 +- src/storage/txn/commands/check_txn_status.rs | 7 +- src/storage/txn/commands/cleanup.rs | 4 +- src/storage/txn/commands/prewrite.rs | 12 +- src/storage/txn/mod.rs | 7 + tests/failpoints/cases/test_storage.rs | 1 + tests/failpoints/cases/test_transaction.rs | 6 +- tests/integrations/config/mod.rs | 3 + tests/integrations/config/test-custom.toml | 3 + tests/integrations/server/kv_service.rs | 2 +- 40 files changed, 1697 insertions(+), 689 deletions(-) create mode 100644 components/concurrency_manager/benches/update_max_ts.rs diff --git a/Cargo.lock b/Cargo.lock index 98640ae39c2..f4ef3094d93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,6 +97,12 @@ dependencies = [ "libc 0.2.151", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "ansi_term" version = "0.11.0" @@ -945,7 +951,7 @@ dependencies = [ name = "batch-system" version = "0.1.0" dependencies = [ - "criterion", + "criterion 0.3.5", "crossbeam", "dashmap", "derive_more", @@ -1201,12 +1207,18 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "causal_ts" version = "0.0.1" dependencies = [ "async-trait", - "criterion", + "criterion 0.3.5", "enum_dispatch", "error_code", "futures 0.3.15", @@ -1264,7 +1276,7 @@ dependencies = [ "causal_ts", "collections", "concurrency_manager", - "criterion", + "criterion 0.3.5", "crossbeam", "engine_rocks", "engine_traits", @@ -1542,13 +1554,21 @@ dependencies = [ name = "concurrency_manager" version = "0.0.1" dependencies = [ - "criterion", + "criterion 0.4.0", + "crossbeam", "crossbeam-skiplist 0.1.3", "fail", "futures 0.3.15", "kvproto", + "lazy_static", + "online_config", "parking_lot 0.12.1", + "prometheus", "rand 0.8.5", + "serde", + "slog", + "slog-global", + "thiserror", "tikv_alloc", "tikv_util", "tokio", @@ -1665,9 +1685,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" dependencies = [ "atty", - "cast", + "cast 0.2.2", "clap 2.33.0", - "criterion-plot", + "criterion-plot 0.4.4", "csv", "itertools", "lazy_static", @@ -1684,13 +1704,39 @@ dependencies = [ "walkdir", ] +[[package]] +name = "criterion" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +dependencies = [ + "anes", + "atty", + "cast 0.3.0", + "ciborium", + "clap 3.1.6", + "criterion-plot 0.5.0", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + [[package]] name = "criterion-cpu-time" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63aaaf47e457badbcb376c65a49d0f182c317ebd97dc6d1ced94c8e1d09c0f3a" dependencies = [ - "criterion", + "criterion 0.3.5", "libc 0.2.151", ] @@ -1700,7 +1746,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eba5111e09fabb08bfaedbe28c832876bb38d4f9519f715466332880d80b0eac" dependencies = [ - "criterion", + "criterion 0.3.5", "perfcnt", ] @@ -1710,7 +1756,17 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" dependencies = [ - "cast", + "cast 0.2.2", + "itertools", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast 0.3.0", "itertools", ] @@ -3416,7 +3472,7 @@ version = "0.0.1" dependencies = [ "bytes", "collections", - "criterion", + "criterion 0.3.5", "crossbeam", "crossbeam-skiplist 0.1.3", "dashmap", @@ -6902,7 +6958,7 @@ dependencies = [ "collections", "concurrency_manager", "crc64fast", - "criterion", + "criterion 0.3.5", "criterion-cpu-time", "criterion-perf-events", "crossbeam", @@ -7080,7 +7136,7 @@ dependencies = [ "codec", "collections", "crc32fast", - "criterion", + "criterion 0.3.5", "encoding_rs 0.8.29", "error_code", "hex 0.4.3", diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 7fede7dad0c..4dd35e926b0 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -818,7 +818,7 @@ where .await .map_err(|err| Error::from(err).report("failed to get tso from pd")) .unwrap_or_default(); - cm.update_max_ts(pd_tso); + cm.update_max_ts(pd_tso, "backup-stream").unwrap(); let min_ts = cm.global_min_lock_ts().unwrap_or(TimeStamp::max()); Ord::min(pd_tso, min_ts) } diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 0bf0f5105f5..161218bc83e 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -350,7 +350,9 @@ impl BackupRange { snap_ctx.key_ranges = vec![key_range]; } else { // Update max_ts and check the in-memory lock table before getting the snapshot - concurrency_manager.update_max_ts(backup_ts); + concurrency_manager + .update_max_ts(backup_ts, "backup_range") + .map_err(TxnError::from)?; concurrency_manager .read_range_check( self.start_key.as_ref(), diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index a860d3d1260..da33bfd4021 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1122,7 +1122,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint() { guard }; - cm.update_max_ts(20.into()); + cm.update_max_ts(20.into(), "").unwrap(); let guard = lock_key(b"a", 80); suite.set_tso(99); diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index 8e2817d0097..dc9cf1b558a 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -6,16 +6,24 @@ version = "0.0.1" license = "Apache-2.0" [dependencies] +crossbeam = "0.8" crossbeam-skiplist = { workspace = true } fail = "0.5" kvproto = { workspace = true } +lazy_static = "1.4.0" +online_config = { workspace = true } parking_lot = "0.12" +prometheus = "0.13" +serde = { version = "1.0.194", features = ["derive"] } +slog = { workspace = true } +slog-global = { workspace = true } +thiserror = "1.0" tikv_util = { workspace = true } tokio = { version = "1.5", features = ["macros", "sync", "time"] } txn_types = { workspace = true } [dev-dependencies] -criterion = "0.3" +criterion = "0.4" futures = "0.3" rand = "0.8.3" tikv_alloc = { workspace = true, features = ["jemalloc"] } @@ -24,3 +32,8 @@ tikv_alloc = { workspace = true, features = ["jemalloc"] } name = "lock_table" path = "benches/lock_table.rs" harness = false + +[[bench]] +name = "update_max_ts" +path = "benches/update_max_ts.rs" +harness = false diff --git a/components/concurrency_manager/benches/update_max_ts.rs b/components/concurrency_manager/benches/update_max_ts.rs new file mode 100644 index 00000000000..2c8dfd58120 --- /dev/null +++ b/components/concurrency_manager/benches/update_max_ts.rs @@ -0,0 +1,31 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use concurrency_manager::{ActionOnInvalidMaxTs, ConcurrencyManager}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use txn_types::TimeStamp; + +fn benchmark_update_max_ts(c: &mut Criterion) { + let latest_ts = TimeStamp::new(1000); + let limit_valid_time = Duration::from_secs(20); + let cm = ConcurrencyManager::new_with_config( + latest_ts, + limit_valid_time, + ActionOnInvalidMaxTs::Error, + ); + + cm.set_max_ts_limit(TimeStamp::new(4000)); + + let new_ts = TimeStamp::new(3000); + + c.bench_function("update_max_ts", |b| { + b.iter(|| { + cm.update_max_ts(black_box(new_ts), || format!("benchmark-{}", new_ts)) + .unwrap(); + }) + }); +} + +criterion_group!(benches, benchmark_update_max_ts); +criterion_main!(benches); diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index 1c6bdb8dbf1..20b2b39ba22 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -16,13 +16,22 @@ mod key_handle; mod lock_table; use std::{ + error::Error, + fmt, + fmt::Display, mem::MaybeUninit, sync::{ - atomic::{AtomicU64, Ordering}, + atomic::{AtomicU64, AtomicUsize, Ordering}, Arc, }, + time::Duration, }; +use crossbeam::atomic::AtomicCell; +use lazy_static::lazy_static; +use prometheus::{register_int_gauge, IntGauge}; +use thiserror::Error; +use tikv_util::{error, time::Instant}; use txn_types::{Key, Lock, TimeStamp}; pub use self::{ @@ -30,19 +39,103 @@ pub use self::{ lock_table::LockTable, }; +lazy_static! { + static ref MAX_TS_LIMIT_GAUGE: IntGauge = register_int_gauge!( + "tikv_concurrency_manager_max_ts_limit", + "Current value of max_ts_limit" + ) + .unwrap(); + static ref MAX_TS_GAUGE: IntGauge = + register_int_gauge!("tikv_concurrency_manager_max_ts", "Current value of max_ts").unwrap(); +} + +const DEFAULT_LIMIT_VALID_DURATION: Duration = Duration::from_secs(60); + +// It is suggested that limit_valid_duration = sync_interval * +// LIMIT_VALID_TIME_MULTIPLIER, to balance between +// 1. tolerate temporary issues in updating the limit. +// 2. avoid long-term blocking of max_ts update caused by network partition +// between TiKV and PD. +pub const LIMIT_VALID_TIME_MULTIPLIER: u32 = 3; + +#[derive(Copy, Clone, PartialEq, Eq)] +struct MaxTsLimit { + limit: TimeStamp, + update_time: Instant, +} + // Pay attention that the async functions of ConcurrencyManager should not hold // the mutex. #[derive(Clone)] pub struct ConcurrencyManager { max_ts: Arc, lock_table: LockTable, + + // max_ts_limit and its update time. + // + // max_ts_limit is an assertion: max_ts should not be updated to a value greater than this + // limit. + // + // When the limit is not updated for a long time(exceeding the threshold), we use an + // approximate limit. + max_ts_limit: Arc>, + limit_valid_duration: Duration, + action_on_invalid_max_ts: Arc, + + time_provider: Arc, } impl ConcurrencyManager { pub fn new(latest_ts: TimeStamp) -> Self { + Self::new_with_config( + latest_ts, + DEFAULT_LIMIT_VALID_DURATION, + ActionOnInvalidMaxTs::Panic, + ) + } + + pub fn new_with_config( + latest_ts: TimeStamp, + limit_valid_duration: Duration, + action_on_invalid_max_ts: ActionOnInvalidMaxTs, + ) -> Self { + let initial_limit = MaxTsLimit { + limit: TimeStamp::new(0), + update_time: Instant::now(), + }; + ConcurrencyManager { max_ts: Arc::new(AtomicU64::new(latest_ts.into_inner())), + max_ts_limit: Arc::new(AtomicCell::new(initial_limit)), lock_table: LockTable::default(), + action_on_invalid_max_ts: Arc::new(AtomicActionOnInvalidMaxTs::new( + action_on_invalid_max_ts, + )), + limit_valid_duration, + time_provider: Arc::new(CoarseInstantTimeProvider), + } + } + + #[cfg(test)] + fn new_with_time_provider( + latest_ts: TimeStamp, + limit_valid_duration: Duration, + action_on_invalid_max_ts: ActionOnInvalidMaxTs, + time_provider: Arc, + ) -> Self { + let initial_limit = MaxTsLimit { + limit: TimeStamp::new(0), + update_time: time_provider.now(), + }; + ConcurrencyManager { + max_ts: Arc::new(AtomicU64::new(latest_ts.into_inner())), + max_ts_limit: Arc::new(AtomicCell::new(initial_limit)), + lock_table: LockTable::default(), + action_on_invalid_max_ts: Arc::new(AtomicActionOnInvalidMaxTs::new( + action_on_invalid_max_ts, + )), + limit_valid_duration, + time_provider, } } @@ -52,9 +145,134 @@ impl ConcurrencyManager { /// Updates max_ts with the given new_ts. It has no effect if /// max_ts >= new_ts or new_ts is TimeStamp::max(). - pub fn update_max_ts(&self, new_ts: TimeStamp) { - if new_ts != TimeStamp::max() { - self.max_ts.fetch_max(new_ts.into_inner(), Ordering::SeqCst); + /// + /// To avoid invalid ts breaking the invariants, the new_ts should be + /// less than or equal to the max_ts_limit. + /// + /// # Returns + /// - Ok(()): If the update is successful or has no effect + /// - Err(limit): If new_ts is greater than the max_ts_limit, returns the + /// current limit value + + pub fn update_max_ts( + &self, + new_ts: TimeStamp, + source: impl IntoErrorSource, + ) -> Result<(), InvalidMaxTsUpdate> { + if new_ts.is_max() { + return Ok(()); + } + let limit = self.max_ts_limit.load(); + + // check that new_ts is less than or equal to the limit + if !limit.limit.is_zero() && new_ts > limit.limit { + // NOTE: `limit` and `last_update` are read non-atomically as a whole, so they + // can be inconsistent, i.e. they may not be from the same event of + // setting the limit. The consequence is that we may mistakenly + // treat an "invalid" limit as a "valid" one. This is acceptable + // because the limit is just an assertion, and the inconsistency + // is not harmful. + let last_update = limit.update_time; + let now = self.time_provider.now(); + assert!(now >= last_update); + let duration_to_last_limit_update = now - last_update; + + if duration_to_last_limit_update < self.limit_valid_duration { + // limit is valid + let source = source.into_error_source(); + self.report_error(new_ts, limit.limit, source, false)?; + } else { + // limit is stale + // use an approximate limit to avoid false alerts caused by failed limit updates + + let approximate_limit = TimeStamp::compose( + limit.limit.physical() + duration_to_last_limit_update.as_millis() as u64, + limit.limit.logical(), + ); + + if new_ts > approximate_limit { + let source = source.into_error_source(); + self.report_error(new_ts, approximate_limit, source, true)?; + } + } + } + + MAX_TS_GAUGE.set( + self.max_ts + .fetch_max(new_ts.into_inner(), Ordering::SeqCst) + .max(new_ts.into_inner()) as i64, + ); + Ok(()) + } + + fn report_error( + &self, + new_ts: TimeStamp, + limit: TimeStamp, + source: impl slog::Value + Display, + using_approximate: bool, + ) -> Result<(), InvalidMaxTsUpdate> { + let can_panic = !using_approximate; + error!("invalid max_ts update"; + "attempted_ts" => new_ts, + "max_allowed" => limit.into_inner(), + "source" => &source, + "using_approximate" => using_approximate, + ); + match self.action_on_invalid_max_ts.load() { + ActionOnInvalidMaxTs::Panic if can_panic => { + panic!( + "invalid max_ts update: {} exceeds the limit {}, source={}", + new_ts, + limit.into_inner(), + source + ); + } + ActionOnInvalidMaxTs::Error => Err(InvalidMaxTsUpdate { + attempted_ts: new_ts, + max_allowed: limit, + }), + ActionOnInvalidMaxTs::Log => Ok(()), + ActionOnInvalidMaxTs::Panic => Ok(()), + } + } + + /// Set the maximum allowed value for max_ts updates, except for the updates + /// from PD TSO. The limit must be updated regularly to prevent the + /// blocking of max_ts. It prevents max_ts from being updated to an + /// unreasonable value, which is usually caused by bugs or unsafe + /// usages. + /// + /// # Note + /// If the new limit is smaller than the current limit, this operation will + /// have no effect and return silently. + pub fn set_max_ts_limit(&self, limit: TimeStamp) { + if limit.is_max() { + error!("max_ts_limit cannot be set to u64::max"); + return; + } + + loop { + let current = self.max_ts_limit.load(); + + if limit.into_inner() <= current.limit.into_inner() { + break; + } + + let new_state = MaxTsLimit { + limit, + update_time: self.time_provider.now(), + }; + + match self.max_ts_limit.compare_exchange(current, new_state) { + Ok(_) => { + MAX_TS_LIMIT_GAUGE.set(limit.into_inner() as i64); + break; + } + Err(_) => { + continue; + } + } } } @@ -141,14 +359,194 @@ impl ConcurrencyManager { }); min_lock } + + pub fn set_action_on_invalid_max_ts(&self, action: ActionOnInvalidMaxTs) { + self.action_on_invalid_max_ts.store(action); + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ActionOnInvalidMaxTs { + Panic, + Error, + Log, +} + +#[derive(Debug)] +pub struct ParseActionError(String); + +impl fmt::Display for ParseActionError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Error for ParseActionError {} + +impl TryFrom<&str> for ActionOnInvalidMaxTs { + type Error = ParseActionError; + + fn try_from(value: &str) -> Result { + match value.to_lowercase().as_str() { + "panic" => Ok(Self::Panic), + "error" => Ok(Self::Error), + "log" => Ok(Self::Log), + _ => Err(ParseActionError(format!("invalid action value: {}", value))), + } + } +} + +impl TryFrom for ActionOnInvalidMaxTs { + type Error = ParseActionError; + + fn try_from(value: String) -> Result { + Self::try_from(value.as_str()) + } +} + +pub struct AtomicActionOnInvalidMaxTs { + inner: AtomicUsize, +} + +impl AtomicActionOnInvalidMaxTs { + pub fn new(initial: ActionOnInvalidMaxTs) -> Self { + Self { + inner: AtomicUsize::new(match initial { + ActionOnInvalidMaxTs::Panic => 0, + ActionOnInvalidMaxTs::Error => 1, + ActionOnInvalidMaxTs::Log => 2, + }), + } + } + + pub fn store(&self, value: ActionOnInvalidMaxTs) { + self.inner.store( + match value { + ActionOnInvalidMaxTs::Panic => 0, + ActionOnInvalidMaxTs::Error => 1, + ActionOnInvalidMaxTs::Log => 2, + }, + Ordering::SeqCst, + ); + } + + pub fn load(&self) -> ActionOnInvalidMaxTs { + match self.inner.load(Ordering::SeqCst) { + 0 => ActionOnInvalidMaxTs::Panic, + 1 => ActionOnInvalidMaxTs::Error, + 2 => ActionOnInvalidMaxTs::Log, + _ => unreachable!("Invalid atomic state"), + } + } +} + +#[derive(Debug, Error, Clone)] +#[error("invalid max_ts update: {attempted_ts} exceeds the limit {max_allowed}")] +pub struct InvalidMaxTsUpdate { + pub attempted_ts: TimeStamp, + pub max_allowed: TimeStamp, +} + +pub trait ValueDisplay: slog::Value + Display {} +impl ValueDisplay for String {} +impl ValueDisplay for &str {} + +mod sealed { + pub trait Sealed {} +} + +pub trait IntoErrorSource: sealed::Sealed { + type Output: ValueDisplay; + fn into_error_source(self) -> Self::Output; +} + +// &str impl +impl<'a> sealed::Sealed for &'a str {} +impl<'a> IntoErrorSource for &'a str { + type Output = &'a str; + fn into_error_source(self) -> Self::Output { + self + } +} + +// String impl +impl sealed::Sealed for String {} +impl IntoErrorSource for String { + type Output = String; + fn into_error_source(self) -> Self::Output { + self + } +} + +// Closure impl +impl sealed::Sealed for F +where + F: FnOnce() -> T, + T: ValueDisplay, +{ +} +impl IntoErrorSource for F +where + F: FnOnce() -> T, + T: ValueDisplay, +{ + type Output = T; + fn into_error_source(self) -> T { + self() + } +} + +/// Trait to abstract time-related functionality, for a monotonic clock +trait TimeProvider: Send + Sync { + /// Returns the current instant. + fn now(&self) -> Instant; +} + +struct CoarseInstantTimeProvider; + +impl TimeProvider for CoarseInstantTimeProvider { + fn now(&self) -> Instant { + Instant::now_coarse() + } } #[cfg(test)] mod tests { + use std::sync::Mutex; + use txn_types::LockType; use super::*; + #[derive(Clone)] + struct MockTimeProvider { + current_time: Arc>, + } + + impl MockTimeProvider { + /// Creates a new MockTimeProvider initialized with the given instant. + fn new(start_time: Instant) -> Self { + MockTimeProvider { + current_time: Arc::new(Mutex::new(start_time)), + } + } + + /// Advances the current time by the specified duration. + fn advance(&self, duration: Duration) { + let mut time = self.current_time.lock().unwrap(); + // Note: Instant doesn't support addition, so we mock behavior. + // This simplistic approach assumes no overflow. + *time += duration; + } + } + + impl TimeProvider for MockTimeProvider { + fn now(&self) -> Instant { + let time = self.current_time.lock().unwrap(); + *time + } + } + #[tokio::test] async fn test_lock_keys_order() { let concurrency_manager = ConcurrencyManager::new(1.into()); @@ -166,13 +564,13 @@ mod tests { #[tokio::test] async fn test_update_max_ts() { let concurrency_manager = ConcurrencyManager::new(10.into()); - concurrency_manager.update_max_ts(20.into()); + let _ = concurrency_manager.update_max_ts(20.into(), ""); assert_eq!(concurrency_manager.max_ts(), 20.into()); - concurrency_manager.update_max_ts(5.into()); + let _ = concurrency_manager.update_max_ts(5.into(), ""); assert_eq!(concurrency_manager.max_ts(), 20.into()); - concurrency_manager.update_max_ts(TimeStamp::max()); + let _ = concurrency_manager.update_max_ts(TimeStamp::max(), ""); assert_eq!(concurrency_manager.max_ts(), 20.into()); } @@ -224,4 +622,140 @@ mod tests { assert_eq!(concurrency_manager.global_min_lock_ts(), Some(20.into())); } } + + #[test] + fn test_max_ts_limit() { + let cm = ConcurrencyManager::new_with_config( + TimeStamp::new(100), + DEFAULT_LIMIT_VALID_DURATION, + ActionOnInvalidMaxTs::Error, + ); + + // Initially limit should be 0 + cm.update_max_ts(TimeStamp::new(150), "").unwrap(); + + // Set initial limit to 200 + cm.set_max_ts_limit(TimeStamp::new(200)); + + // Try to lower limit to 150 - should be ignored + cm.set_max_ts_limit(TimeStamp::new(150)); + cm.update_max_ts(TimeStamp::new(180), "").unwrap(); // Should still work up to 200 + assert!(cm.update_max_ts(TimeStamp::new(250), "").is_err()); // Should fail above 200 + + // Increase limit to 300 - should work + cm.set_max_ts_limit(TimeStamp::new(300)); + cm.update_max_ts(TimeStamp::new(250), "").unwrap(); + } + + #[test] + fn test_max_ts_limit_edge_cases() { + let cm = ConcurrencyManager::new(TimeStamp::new(100)); + + // Test transition from zero limit + assert_eq!(cm.max_ts_limit.load().limit, 0.into()); + cm.set_max_ts_limit(TimeStamp::new(1000)); + assert_eq!(cm.max_ts_limit.load().limit, 1000.into()); + + // Try to lower from 1000 to 500 - should be ignored + cm.set_max_ts_limit(TimeStamp::new(500)); + assert_eq!(cm.max_ts_limit.load().limit, 1000.into()); + + // Test setting limit to max, should have no effect + cm.set_max_ts_limit(TimeStamp::max()); + assert_eq!(cm.max_ts_limit.load().limit, 1000.into()); + } + + #[test] + fn test_max_ts_updates_with_monotonic_limit() { + let cm = ConcurrencyManager::new_with_config( + TimeStamp::new(100), + DEFAULT_LIMIT_VALID_DURATION, + ActionOnInvalidMaxTs::Error, + ); + + // Set limit to 200 + cm.set_max_ts_limit(TimeStamp::new(200)); + + // Update max_ts to 150 + cm.update_max_ts(TimeStamp::new(150), "").unwrap(); + assert_eq!(cm.max_ts(), TimeStamp::new(150)); + + // Try to lower limit to 180 - should be ignored + cm.set_max_ts_limit(TimeStamp::new(180)); + + // Should still fail for values above 200 + let result = cm.update_max_ts(TimeStamp::new(250), ""); + assert!(result.is_err()); + if let Err(e) = result { + assert_eq!(e.attempted_ts, TimeStamp::new(250)); + assert_eq!(e.max_allowed, TimeStamp::new(200)); + } + } + + #[test] + fn test_limit_valid_duration_boundary() { + let start_time = Instant::now(); + let mock_time = MockTimeProvider::new(start_time); + let time_provider = Arc::new(mock_time.clone()); + + let cm = ConcurrencyManager::new_with_time_provider( + TimeStamp::new(100), + Duration::from_secs(60), + ActionOnInvalidMaxTs::Error, + time_provider.clone(), + ); + + cm.set_max_ts_limit(TimeStamp::new(200)); + + time_provider.advance(Duration::from_secs(59)); + assert!(cm.update_max_ts(TimeStamp::new(250), "").is_err()); + + time_provider.advance(Duration::from_secs(1)); + cm.update_max_ts(TimeStamp::new(250), "").unwrap(); + assert_eq!(cm.max_ts().into_inner(), 250); + } + + #[test] + fn test_max_ts_limit_expired_allows_update() { + let start_time = Instant::now(); + let mock_time = MockTimeProvider::new(start_time); + let time_provider = Arc::new(mock_time.clone()); + + let cm = ConcurrencyManager::new_with_time_provider( + TimeStamp::new(100), + Duration::from_secs(60), + ActionOnInvalidMaxTs::Error, + time_provider.clone(), + ); + + cm.set_max_ts_limit(TimeStamp::new(200)); + + mock_time.advance(Duration::from_secs(61)); + + // Updating to 250 should be allowed, since the limit should be invalidated + cm.update_max_ts(TimeStamp::new(250), "test_source".to_string()) + .unwrap(); + assert_eq!(cm.max_ts().into_inner(), 250); + } + + #[test] + #[should_panic(expected = "invalid max_ts update")] + fn test_panic_on_invalid_max_ts_enabled() { + let cm = ConcurrencyManager::new(TimeStamp::new(100)); + + cm.set_max_ts_limit(TimeStamp::new(200)); + + // should panic + cm.update_max_ts(TimeStamp::new(250), "test_source".to_string()) + .unwrap(); + } + + #[test] + fn test_update_max_ts_without_limit() { + let cm = ConcurrencyManager::new(TimeStamp::new(100)); + + cm.update_max_ts(TimeStamp::new(500), "test_source".to_string()) + .unwrap(); + assert_eq!(cm.max_ts().into_inner(), 500); + } } diff --git a/components/error_code/src/coprocessor.rs b/components/error_code/src/coprocessor.rs index 31f0ed4224a..dfa4749795f 100644 --- a/components/error_code/src/coprocessor.rs +++ b/components/error_code/src/coprocessor.rs @@ -16,5 +16,7 @@ define_error_codes!( CORRUPTED_DATA => ("CorruptedData", "", ""), STORAGE_ERROR => ("StorageError", "", ""), - INVALID_CHARACTER_STRING => ("InvalidCharacterString", "", "") + INVALID_CHARACTER_STRING => ("InvalidCharacterString", "", ""), + + INVALID_MAX_TS_UPDATE => ("InvalidMaxTsUpdate", "", "") ); diff --git a/components/error_code/src/storage.rs b/components/error_code/src/storage.rs index e7caefb1b65..33cdc7b116a 100644 --- a/components/error_code/src/storage.rs +++ b/components/error_code/src/storage.rs @@ -25,6 +25,7 @@ define_error_codes!( DEADLINE_EXCEEDED => ("DeadlineExceeded", "", ""), API_VERSION_NOT_MATCHED => ("ApiVersionNotMatched", "", ""), INVALID_KEY_MODE => ("InvalidKeyMode", "", ""), + INVALID_MAX_TS_UPDATE => ("InvalidMaxTsUpdate", "", ""), COMMITTED => ("Committed", "", ""), PESSIMISTIC_LOCK_ROLLED_BACK => ("PessimisticLockRolledBack", "", ""), diff --git a/components/raftstore-v2/src/worker/pd/misc.rs b/components/raftstore-v2/src/worker/pd/misc.rs index 6ade8d87de5..fd2848b95a4 100644 --- a/components/raftstore-v2/src/worker/pd/misc.rs +++ b/components/raftstore-v2/src/worker/pd/misc.rs @@ -12,7 +12,6 @@ use pd_client::PdClient; use raftstore::{store::TxnExt, Result}; use slog::{info, warn}; use tikv_util::{box_err, timer::GLOBAL_TIMER_HANDLE}; -use txn_types::TimeStamp; use super::Runner; @@ -51,18 +50,22 @@ where // And it won't break correctness of transaction commands, as // causal_ts_provider.flush() is implemented as // pd_client.get_tso() + renew TSO cached. - let res: Result = if let Some(causal_ts_provider) = &causal_ts_provider { + let res: Result<()> = if let Some(causal_ts_provider) = &causal_ts_provider { causal_ts_provider .async_flush() .await .map_err(|e| box_err!(e)) } else { pd_client.get_tso().await.map_err(Into::into) - }; + } + .and_then(|ts| { + concurrency_manager + .update_max_ts(ts, "raftstore-v2") + .map_err(|e| crate::Error::Other(box_err!(e))) + }); match res { - Ok(ts) => { - concurrency_manager.update_max_ts(ts); + Ok(()) => { success = txn_ext .max_ts_sync_status .compare_exchange( diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 16c4fec66f0..5909d825888 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -52,7 +52,6 @@ use tikv_util::{ warn, worker::{Runnable, ScheduleError, Scheduler}, }; -use txn_types::TimeStamp; use yatp::Remote; use super::split_controller::AutoSplitControllerContext; @@ -1676,7 +1675,7 @@ where // And it won't break correctness of transaction commands, as // causal_ts_provider.flush() is implemented as pd_client.get_tso() + renew TSO // cached. - let res: crate::Result = + let res: crate::Result<()> = if let Some(causal_ts_provider) = &causal_ts_provider { causal_ts_provider .async_flush() @@ -1684,11 +1683,15 @@ where .map_err(|e| box_err!(e)) } else { pd_client.get_tso().await.map_err(Into::into) - }; + } + .and_then(|ts| { + concurrency_manager + .update_max_ts(ts, "raftstore") + .map_err(|e| crate::Error::Other(box_err!(e))) + }); match res { - Ok(ts) => { - concurrency_manager.update_max_ts(ts); + Ok(()) => { // Set the least significant bit to 1 to mark it as synced. success = txn_ext .max_ts_sync_status diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 2667a43a304..9b7def6d8d8 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -117,7 +117,10 @@ impl AdvanceTsWorker { // optimizations like async commit is enabled. // Note: This step must be done before scheduling `Task::MinTs` task, and the // resolver must be checked in or after `Task::MinTs`' execution. - cm.update_max_ts(min_ts); + if let Err(e) = cm.update_max_ts(min_ts, "resolved-ts") { + error!("failed to advance resolved_ts: failed to update max_ts in concurrency manager"; "err" => ?e); + return; + } if let Some((min_mem_lock_ts, lock)) = cm.global_min_lock() { if min_mem_lock_ts < min_ts { min_ts = min_mem_lock_ts; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 38e711264ee..0f69c09ce73 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -28,7 +28,7 @@ use backup_stream::{ }; use causal_ts::CausalTsProviderImpl; use cdc::CdcConfigManager; -use concurrency_manager::ConcurrencyManager; +use concurrency_manager::{ConcurrencyManager, LIMIT_VALID_TIME_MULTIPLIER}; use engine_rocks::{ from_rocks_compression_type, RocksCompactedEvent, RocksEngine, RocksStatistics, }; @@ -114,7 +114,7 @@ use tikv::{ config::EngineType, config_manager::StorageConfigManger, kv::LocalTablets, - mvcc::MvccConsistencyCheckObserver, + mvcc::{MvccConsistencyCheckObserver, TimeStamp}, txn::{ flow_controller::{EngineFlowController, FlowController}, txn_status_cache::TxnStatusCache, @@ -178,6 +178,7 @@ fn run_impl( tikv.init_metrics_flusher(fetcher, engines_info); tikv.init_cgroup_monitor(); tikv.init_storage_stats_task(engines); + tikv.init_max_ts_updater(); tikv.run_server(server_config); tikv.run_status_server(in_memory_engine); tikv.core.init_quota_tuning_task(tikv.quota_limiter.clone()); @@ -401,7 +402,16 @@ where // Initialize concurrency manager let latest_ts = block_on(pd_client.get_tso()).expect("failed to get timestamp from PD"); - let concurrency_manager = ConcurrencyManager::new(latest_ts); + let concurrency_manager = ConcurrencyManager::new_with_config( + latest_ts, + (config.storage.max_ts_sync_interval * LIMIT_VALID_TIME_MULTIPLIER).into(), + config + .storage + .action_on_invalid_max_ts + .as_str() + .try_into() + .unwrap(), + ); // use different quota for front-end and back-end requests let quota_limiter = Arc::new(QuotaLimiter::new( @@ -704,6 +714,7 @@ where ttl_scheduler, flow_controller, storage.get_scheduler(), + storage.get_concurrency_manager(), )), ); @@ -1133,7 +1144,7 @@ where // Create Debugger. let mut debugger = DebuggerImpl::new( Engines::new(engines.engines.kv.clone(), engines.engines.raft.clone()), - self.cfg_controller.as_ref().unwrap().clone(), + cfg_controller.clone(), Some(storage), ); debugger.set_kv_statistics(self.kv_statistics.clone()); @@ -1154,6 +1165,31 @@ where server_config } + fn init_max_ts_updater(&self) { + let cm = self.concurrency_manager.clone(); + let pd_client = self.pd_client.clone(); + + let max_ts_sync_interval = self.core.config.storage.max_ts_sync_interval.into(); + let cfg_controller = self.cfg_controller.as_ref().unwrap().clone(); + self.core + .background_worker + .spawn_interval_async_task(max_ts_sync_interval, move || { + let cm = cm.clone(); + let pd_client = pd_client.clone(); + let allowance_ms = + cfg_controller.get_current().storage.max_ts_drift_allowance.as_millis(); + + async move { + let pd_tso = pd_client.get_tso().await; + if let Ok(ts) = pd_tso { + cm.set_max_ts_limit(TimeStamp::compose(ts.physical() + allowance_ms, 0)); + } else { + warn!("failed to get tso from pd in background, the max_ts validity check could be skipped"); + } + } + }); + } + fn register_services(&mut self) { let servers = self.servers.as_mut().unwrap(); let engines = self.engines.as_ref().unwrap(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 3931d2cc37c..d2732dd42eb 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -33,7 +33,7 @@ use backup_stream::{ }; use causal_ts::CausalTsProviderImpl; use cdc::CdcConfigManager; -use concurrency_manager::ConcurrencyManager; +use concurrency_manager::{ConcurrencyManager, LIMIT_VALID_TIME_MULTIPLIER}; use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine, TabletRegistry, CF_DEFAULT, CF_WRITE}; use file_system::{get_io_rate_limiter, BytesFetcher, MetricsManager as IoMetricsManager}; @@ -101,7 +101,7 @@ use tikv::{ config::EngineType, config_manager::StorageConfigManger, kv::LocalTablets, - mvcc::MvccConsistencyCheckObserver, + mvcc::{MvccConsistencyCheckObserver, TimeStamp}, txn::{ flow_controller::{FlowController, TabletFlowController}, txn_status_cache::TxnStatusCache, @@ -162,6 +162,7 @@ fn run_impl( tikv.init_metrics_flusher(fetcher, engines_info); tikv.init_cgroup_monitor(); tikv.init_storage_stats_task(); + tikv.init_max_ts_updater(); tikv.run_server(server_config); tikv.run_status_server(); tikv.core.init_quota_tuning_task(tikv.quota_limiter.clone()); @@ -325,7 +326,16 @@ where // Initialize concurrency manager let latest_ts = block_on(pd_client.get_tso()).expect("failed to get timestamp from PD"); - let concurrency_manager = ConcurrencyManager::new(latest_ts); + let concurrency_manager = ConcurrencyManager::new_with_config( + latest_ts, + (config.storage.max_ts_sync_interval * LIMIT_VALID_TIME_MULTIPLIER).into(), + config + .storage + .action_on_invalid_max_ts + .as_str() + .try_into() + .unwrap(), + ); // use different quota for front-end and back-end requests let quota_limiter = Arc::new(QuotaLimiter::new( @@ -586,6 +596,7 @@ where ttl_scheduler, flow_controller, storage.get_scheduler(), + storage.get_concurrency_manager(), )), ); @@ -947,6 +958,34 @@ where server_config } + fn init_max_ts_updater(&self) { + let cm = self.concurrency_manager.clone(); + let pd_client = self.pd_client.clone(); + + let max_ts_sync_interval = self.core.config.storage.max_ts_sync_interval.into(); + let cfg_controller = self.cfg_controller.as_ref().unwrap().clone(); + self.core + .background_worker + .spawn_interval_async_task(max_ts_sync_interval, move || { + let cm = cm.clone(); + let pd_client = pd_client.clone(); + let allowance_ms = cfg_controller + .get_current() + .storage + .max_ts_drift_allowance + .as_millis(); + + async move { + let pd_tso = pd_client.get_tso().await; + if let Ok(ts) = pd_tso { + cm.set_max_ts_limit(TimeStamp::compose(ts.physical() + allowance_ms, 0)); + } else { + warn!("failed to get tso from pd in background"); + } + } + }); + } + fn register_services(&mut self) { let servers = self.servers.as_mut().unwrap(); let engines = self.engines.as_ref().unwrap(); diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index af541d385d6..7944bef7131 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -285,7 +285,7 @@ mod inner { /// A measurement of a monotonically increasing clock. /// It's similar and meant to replace `std::time::Instant`, /// for providing extra features. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Eq)] pub enum Instant { Monotonic(Timespec), MonotonicCoarse(Timespec), diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 889015b75c6..f0d3e320627 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -3157,6 +3157,25 @@ def Storage() -> RowPanel: ), ] ) + layout.row( + [ + graph_panel( + title="Concurrency manager max-ts", + description="The max_ts in the concurrency manager", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr="tikv_concurrency_manager_max_ts_limit", + legend_format="max_ts_limit", + ), + target( + expr="tikv_concurrency_manager_max_ts", + legend_format="max_ts", + ), + ], + ) + ] + ) return layout.row_panel diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index cd155391f28..97b1f9e4c96 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -10488,55 +10488,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Storage", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 77, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The max_ts in the concurrency manager", "editable": true, "error": false, "fieldConfig": { @@ -10559,11 +10517,11 @@ "h": 7, "w": 24, "x": 0, - "y": 0 + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 78, + "id": 77, "interval": null, "isNew": true, "legend": { @@ -10599,62 +10557,37 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "/.*-total/", - "bars": false, - "fill": 1, - "fillBelowTo": null, - "lines": true, - "yaxis": 2, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "expr": "tikv_concurrency_manager_max_ts_limit", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-reject-by-{{reason}}", + "legendFormat": "max_ts_limit", "metric": "", - "query": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "query": "tikv_concurrency_manager_max_ts_limit", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "tikv_concurrency_manager_max_ts", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-total", + "legendFormat": "max_ts", "metric": "", - "query": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-stale-read", - "metric": "", - "query": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "tikv_concurrency_manager_max_ts", "refId": "", "step": 10, "target": "" @@ -10663,7 +10596,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Local reader requests", + "title": "Concurrency manager max-ts", "tooltip": { "msResolution": true, "shared": true, @@ -10711,7 +10644,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Local Reader", + "title": "Storage", "transformations": [], "transparent": false, "type": "row" @@ -10739,7 +10672,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 79, + "id": 78, "interval": null, "links": [], "maxDataPoints": 100, @@ -10751,7 +10684,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of raftstore thread", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -10772,13 +10705,13 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 80, + "id": 79, "interval": null, "isNew": true, "legend": { @@ -10814,22 +10747,62 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*-total/", + "bars": false, + "fill": 1, + "fillBelowTo": null, + "lines": true, + "yaxis": 2, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-reject-by-{{reason}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-total", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-stale-read", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -10838,7 +10811,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft store CPU", + "title": "Local reader requests", "tooltip": { "msResolution": true, "shared": true, @@ -10857,7 +10830,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -10878,13 +10851,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Local Reader", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 80, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of async apply", + "description": "The CPU utilization of raftstore thread", "editable": true, "error": false, "fieldConfig": { @@ -10906,7 +10921,7 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 0 }, "height": null, @@ -10954,7 +10969,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -10962,7 +10977,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -10971,7 +10986,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Async apply CPU", + "title": "Raft store CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11017,23 +11032,14 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of store writer thread", + "description": "The CPU utilization of async apply", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ] + "steps": [] } } }, @@ -11048,8 +11054,8 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -11096,7 +11102,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -11104,25 +11110,16 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store writer CPU", + "title": "Async apply CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11168,14 +11165,23 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of gRPC", + "description": "The CPU utilization of store writer thread", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ] } } }, @@ -11190,7 +11196,7 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 7 }, "height": null, @@ -11238,7 +11244,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -11246,16 +11252,158 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ], "timeFrom": null, "timeShift": null, - "title": "gRPC poll CPU", + "title": "Store writer CPU", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The CPU utilization of gRPC", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 84, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "gRPC poll CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11337,7 +11485,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 84, + "id": 85, "interval": null, "isNew": true, "legend": { @@ -11488,7 +11636,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 85, + "id": 86, "interval": null, "isNew": true, "legend": { @@ -11630,7 +11778,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 86, + "id": 87, "interval": null, "isNew": true, "legend": { @@ -11763,7 +11911,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 87, + "id": 88, "interval": null, "isNew": true, "legend": { @@ -11896,7 +12044,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 88, + "id": 89, "interval": null, "isNew": true, "legend": { @@ -12029,7 +12177,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 89, + "id": 90, "interval": null, "isNew": true, "legend": { @@ -12162,7 +12310,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 90, + "id": 91, "interval": null, "isNew": true, "legend": { @@ -12295,7 +12443,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 91, + "id": 92, "interval": null, "isNew": true, "legend": { @@ -12428,7 +12576,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 92, + "id": 93, "interval": null, "isNew": true, "legend": { @@ -12561,7 +12709,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 93, + "id": 94, "interval": null, "isNew": true, "legend": { @@ -12694,7 +12842,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 94, + "id": 95, "interval": null, "isNew": true, "legend": { @@ -12857,7 +13005,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 95, + "id": 96, "interval": null, "isNew": true, "legend": { @@ -12999,7 +13147,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 96, + "id": 97, "interval": null, "isNew": true, "legend": { @@ -13180,7 +13328,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 97, + "id": 98, "interval": null, "isNew": true, "legend": { @@ -13352,7 +13500,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 98, + "id": 99, "interval": null, "isNew": true, "legend": { @@ -13500,7 +13648,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 99, + "id": 100, "interval": null, "isNew": true, "legend": { @@ -13636,7 +13784,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 100, + "id": 101, "interval": null, "links": [], "maxDataPoints": 100, @@ -13675,7 +13823,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 101, + "id": 102, "interval": null, "isNew": true, "legend": { @@ -13823,7 +13971,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 102, + "id": 103, "interval": null, "isNew": true, "legend": { @@ -13971,7 +14119,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 103, + "id": 104, "interval": null, "isNew": true, "legend": { @@ -14104,7 +14252,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 104, + "id": 105, "interval": null, "isNew": true, "legend": { @@ -14255,7 +14403,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 105, + "id": 106, "interval": null, "links": [], "maxDataPoints": 100, @@ -14294,7 +14442,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 106, + "id": 107, "interval": null, "isNew": true, "legend": { @@ -14495,7 +14643,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 107, + "id": 108, "interval": null, "isNew": true, "legend": { @@ -14696,7 +14844,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 108, + "id": 109, "interval": null, "isNew": true, "legend": { @@ -14897,7 +15045,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 109, + "id": 110, "interval": null, "isNew": true, "legend": { @@ -15098,7 +15246,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 110, + "id": 111, "interval": null, "isNew": true, "legend": { @@ -15299,7 +15447,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 111, + "id": 112, "interval": null, "isNew": true, "legend": { @@ -15500,7 +15648,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 112, + "id": 113, "interval": null, "isNew": true, "legend": { @@ -15701,7 +15849,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 113, + "id": 114, "interval": null, "isNew": true, "legend": { @@ -15902,7 +16050,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 114, + "id": 115, "interval": null, "isNew": true, "legend": { @@ -16103,7 +16251,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 115, + "id": 116, "interval": null, "isNew": true, "legend": { @@ -16304,7 +16452,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 116, + "id": 117, "interval": null, "isNew": true, "legend": { @@ -16505,7 +16653,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 117, + "id": 118, "interval": null, "isNew": true, "legend": { @@ -16706,7 +16854,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 118, + "id": 119, "interval": null, "isNew": true, "legend": { @@ -16910,7 +17058,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 119, + "id": 120, "interval": null, "links": [], "maxDataPoints": 100, @@ -16956,7 +17104,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 120, + "id": 121, "interval": null, "legend": { "show": false @@ -17054,7 +17202,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 121, + "id": 122, "interval": null, "isNew": true, "legend": { @@ -17262,7 +17410,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 122, + "id": 123, "interval": null, "legend": { "show": false @@ -17360,7 +17508,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 123, + "id": 124, "interval": null, "isNew": true, "legend": { @@ -17568,7 +17716,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 124, + "id": 125, "interval": null, "legend": { "show": false @@ -17666,7 +17814,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 125, + "id": 126, "interval": null, "isNew": true, "legend": { @@ -17874,7 +18022,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 126, + "id": 127, "interval": null, "legend": { "show": false @@ -17972,7 +18120,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 127, + "id": 128, "interval": null, "isNew": true, "legend": { @@ -18180,7 +18328,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 128, + "id": 129, "interval": null, "legend": { "show": false @@ -18278,7 +18426,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 129, + "id": 130, "interval": null, "isNew": true, "legend": { @@ -18486,7 +18634,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 130, + "id": 131, "interval": null, "legend": { "show": false @@ -18584,7 +18732,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 131, + "id": 132, "interval": null, "isNew": true, "legend": { @@ -18785,7 +18933,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 132, + "id": 133, "interval": null, "isNew": true, "legend": { @@ -18933,7 +19081,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 133, + "id": 134, "interval": null, "isNew": true, "legend": { @@ -19069,7 +19217,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 134, + "id": 135, "interval": null, "links": [], "maxDataPoints": 100, @@ -19108,7 +19256,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 135, + "id": 136, "interval": null, "isNew": true, "legend": { @@ -19241,7 +19389,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 136, + "id": 137, "interval": null, "isNew": true, "legend": { @@ -19374,7 +19522,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 137, + "id": 138, "interval": null, "isNew": true, "legend": { @@ -19507,7 +19655,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 138, + "id": 139, "interval": null, "isNew": true, "legend": { @@ -19647,7 +19795,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 139, + "id": 140, "interval": null, "legend": { "show": false @@ -19745,7 +19893,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 140, + "id": 141, "interval": null, "isNew": true, "legend": { @@ -19953,7 +20101,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 141, + "id": 142, "interval": null, "legend": { "show": false @@ -20051,7 +20199,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 142, + "id": 143, "interval": null, "isNew": true, "legend": { @@ -20259,7 +20407,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 143, + "id": 144, "interval": null, "legend": { "show": false @@ -20357,7 +20505,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 144, + "id": 145, "interval": null, "isNew": true, "legend": { @@ -20565,7 +20713,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 145, + "id": 146, "interval": null, "legend": { "show": false @@ -20670,7 +20818,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 146, + "id": 147, "interval": null, "legend": { "show": false @@ -20768,7 +20916,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 147, + "id": 148, "interval": null, "isNew": true, "legend": { @@ -20901,7 +21049,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 148, + "id": 149, "interval": null, "isNew": true, "legend": { @@ -21052,7 +21200,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 149, + "id": 150, "interval": null, "links": [], "maxDataPoints": 100, @@ -21091,7 +21239,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 150, + "id": 151, "interval": null, "isNew": true, "legend": { @@ -21239,7 +21387,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 151, + "id": 152, "interval": null, "isNew": true, "legend": { @@ -21394,7 +21542,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 152, + "id": 153, "interval": null, "legend": { "show": false @@ -21492,7 +21640,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 153, + "id": 154, "interval": null, "isNew": true, "legend": { @@ -21632,7 +21780,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 154, + "id": 155, "interval": null, "legend": { "show": false @@ -21737,7 +21885,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 155, + "id": 156, "interval": null, "legend": { "show": false @@ -21842,7 +21990,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 156, + "id": 157, "interval": null, "legend": { "show": false @@ -21947,7 +22095,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 157, + "id": 158, "interval": null, "legend": { "show": false @@ -22052,7 +22200,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 158, + "id": 159, "interval": null, "legend": { "show": false @@ -22157,7 +22305,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 159, + "id": 160, "interval": null, "legend": { "show": false @@ -22262,7 +22410,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 160, + "id": 161, "interval": null, "legend": { "show": false @@ -22367,7 +22515,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 161, + "id": 162, "interval": null, "legend": { "show": false @@ -22472,7 +22620,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 162, + "id": 163, "interval": null, "legend": { "show": false @@ -22577,7 +22725,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 163, + "id": 164, "interval": null, "legend": { "show": false @@ -22675,7 +22823,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 164, + "id": 165, "interval": null, "isNew": true, "legend": { @@ -22808,7 +22956,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 165, + "id": 166, "interval": null, "isNew": true, "legend": { @@ -22959,7 +23107,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 166, + "id": 167, "interval": null, "links": [], "maxDataPoints": 100, @@ -22998,7 +23146,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 167, + "id": 168, "interval": null, "isNew": true, "legend": { @@ -23131,7 +23279,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 168, + "id": 169, "interval": null, "isNew": true, "legend": { @@ -23264,7 +23412,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 169, + "id": 170, "interval": null, "isNew": true, "legend": { @@ -23397,7 +23545,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 170, + "id": 171, "interval": null, "isNew": true, "legend": { @@ -23530,7 +23678,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 171, + "id": 172, "interval": null, "isNew": true, "legend": { @@ -23663,7 +23811,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 172, + "id": 173, "interval": null, "isNew": true, "legend": { @@ -23818,7 +23966,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 173, + "id": 174, "interval": null, "legend": { "show": false @@ -23916,7 +24064,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 174, + "id": 175, "interval": null, "isNew": true, "legend": { @@ -24124,7 +24272,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 175, + "id": 176, "interval": null, "legend": { "show": false @@ -24222,7 +24370,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 176, + "id": 177, "interval": null, "isNew": true, "legend": { @@ -24426,7 +24574,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 177, + "id": 178, "interval": null, "links": [], "maxDataPoints": 100, @@ -24465,7 +24613,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 178, + "id": 179, "interval": null, "isNew": true, "legend": { @@ -24598,7 +24746,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 179, + "id": 180, "interval": null, "isNew": true, "legend": { @@ -24731,7 +24879,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 180, + "id": 181, "interval": null, "isNew": true, "legend": { @@ -24864,7 +25012,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 181, + "id": 182, "interval": null, "isNew": true, "legend": { @@ -24997,7 +25145,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 182, + "id": 183, "interval": null, "isNew": true, "legend": { @@ -25130,7 +25278,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 183, + "id": 184, "interval": null, "isNew": true, "legend": { @@ -25293,7 +25441,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 184, + "id": 185, "interval": null, "isNew": true, "legend": { @@ -25429,7 +25577,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 185, + "id": 186, "interval": null, "links": [], "maxDataPoints": 100, @@ -25468,7 +25616,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 186, + "id": 187, "interval": null, "isNew": true, "legend": { @@ -25616,7 +25764,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 187, + "id": 188, "interval": null, "isNew": true, "legend": { @@ -25764,7 +25912,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 188, + "id": 189, "interval": null, "isNew": true, "legend": { @@ -25897,7 +26045,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 189, + "id": 190, "interval": null, "isNew": true, "legend": { @@ -26030,7 +26178,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 190, + "id": 191, "interval": null, "isNew": true, "legend": { @@ -26163,7 +26311,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 191, + "id": 192, "interval": null, "isNew": true, "legend": { @@ -26296,7 +26444,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 192, + "id": 193, "interval": null, "isNew": true, "legend": { @@ -26429,7 +26577,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 193, + "id": 194, "interval": null, "isNew": true, "legend": { @@ -26562,7 +26710,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 194, + "id": 195, "interval": null, "isNew": true, "legend": { @@ -26739,7 +26887,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 195, + "id": 196, "interval": null, "links": [], "maxDataPoints": 100, @@ -26778,7 +26926,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 196, + "id": 197, "interval": null, "isNew": true, "legend": { @@ -26941,7 +27089,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 197, + "id": 198, "interval": null, "isNew": true, "legend": { @@ -27142,7 +27290,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 198, + "id": 199, "interval": null, "isNew": true, "legend": { @@ -27290,7 +27438,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 199, + "id": 200, "interval": null, "isNew": true, "legend": { @@ -27453,7 +27601,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 200, + "id": 201, "interval": null, "isNew": true, "legend": { @@ -27654,7 +27802,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 201, + "id": 202, "interval": null, "isNew": true, "legend": { @@ -27832,7 +27980,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 202, + "id": 203, "interval": null, "isNew": true, "legend": { @@ -27995,7 +28143,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 203, + "id": 204, "interval": null, "isNew": true, "legend": { @@ -28158,7 +28306,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 204, + "id": 205, "interval": null, "isNew": true, "legend": { @@ -28291,7 +28439,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 205, + "id": 206, "interval": null, "isNew": true, "legend": { @@ -28495,7 +28643,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 206, + "id": 207, "interval": null, "links": [], "maxDataPoints": 100, @@ -28534,7 +28682,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 207, + "id": 208, "interval": null, "isNew": true, "legend": { @@ -28727,7 +28875,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 208, + "id": 209, "interval": null, "isNew": true, "legend": { @@ -28905,7 +29053,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 209, + "id": 210, "interval": null, "isNew": true, "legend": { @@ -29113,7 +29261,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 210, + "id": 211, "interval": null, "isNew": true, "legend": { @@ -29291,7 +29439,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 211, + "id": 212, "interval": null, "isNew": true, "legend": { @@ -29454,7 +29602,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 212, + "id": 213, "interval": null, "isNew": true, "legend": { @@ -29632,7 +29780,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 213, + "id": 214, "interval": null, "isNew": true, "legend": { @@ -29765,7 +29913,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 214, + "id": 215, "interval": null, "isNew": true, "legend": { @@ -29943,7 +30091,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 215, + "id": 216, "interval": null, "isNew": true, "legend": { @@ -30076,7 +30224,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 216, + "id": 217, "interval": null, "isNew": true, "legend": { @@ -30254,7 +30402,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 217, + "id": 218, "interval": null, "isNew": true, "legend": { @@ -30387,7 +30535,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 218, + "id": 219, "interval": null, "isNew": true, "legend": { @@ -30565,7 +30713,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 219, + "id": 220, "interval": null, "isNew": true, "legend": { @@ -30743,7 +30891,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 220, + "id": 221, "interval": null, "isNew": true, "legend": { @@ -30921,7 +31069,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 221, + "id": 222, "interval": null, "isNew": true, "legend": { @@ -31054,7 +31202,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 222, + "id": 223, "interval": null, "isNew": true, "legend": { @@ -31187,7 +31335,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 223, + "id": 224, "interval": null, "isNew": true, "legend": { @@ -31320,7 +31468,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 224, + "id": 225, "interval": null, "isNew": true, "legend": { @@ -31543,7 +31691,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 225, + "id": 226, "interval": null, "isNew": true, "legend": { @@ -31736,7 +31884,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 226, + "id": 227, "interval": null, "isNew": true, "legend": { @@ -31899,7 +32047,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 227, + "id": 228, "interval": null, "isNew": true, "legend": { @@ -32092,7 +32240,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 228, + "id": 229, "interval": null, "isNew": true, "legend": { @@ -32240,7 +32388,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 229, + "id": 230, "interval": null, "isNew": true, "legend": { @@ -32373,7 +32521,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 230, + "id": 231, "interval": null, "isNew": true, "legend": { @@ -32521,7 +32669,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 231, + "id": 232, "interval": null, "isNew": true, "legend": { @@ -32699,7 +32847,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 232, + "id": 233, "interval": null, "isNew": true, "legend": { @@ -32862,7 +33010,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 233, + "id": 234, "interval": null, "isNew": true, "legend": { @@ -33040,7 +33188,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 234, + "id": 235, "interval": null, "isNew": true, "legend": { @@ -33173,7 +33321,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 235, + "id": 236, "interval": null, "isNew": true, "legend": { @@ -33306,7 +33454,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 236, + "id": 237, "interval": null, "isNew": true, "legend": { @@ -33439,7 +33587,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 237, + "id": 238, "interval": null, "isNew": true, "legend": { @@ -33572,7 +33720,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 238, + "id": 239, "interval": null, "isNew": true, "legend": { @@ -33705,7 +33853,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 239, + "id": 240, "interval": null, "isNew": true, "legend": { @@ -33845,7 +33993,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 240, + "id": 241, "interval": null, "legend": { "show": false @@ -33943,7 +34091,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 241, + "id": 242, "interval": null, "isNew": true, "legend": { @@ -34144,7 +34292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 242, + "id": 243, "interval": null, "isNew": true, "legend": { @@ -34277,7 +34425,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 243, + "id": 244, "interval": null, "isNew": true, "legend": { @@ -34455,7 +34603,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 244, + "id": 245, "interval": null, "isNew": true, "legend": { @@ -34588,7 +34736,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 245, + "id": 246, "interval": null, "isNew": true, "legend": { @@ -34724,7 +34872,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 246, + "id": 247, "interval": null, "links": [], "maxDataPoints": 100, @@ -34763,7 +34911,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 247, + "id": 248, "interval": null, "isNew": true, "legend": { @@ -34911,7 +35059,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 248, + "id": 249, "interval": null, "isNew": true, "legend": { @@ -35059,7 +35207,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 249, + "id": 250, "interval": null, "isNew": true, "legend": { @@ -35192,7 +35340,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 250, + "id": 251, "interval": null, "isNew": true, "legend": { @@ -35325,7 +35473,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 251, + "id": 252, "interval": null, "isNew": true, "legend": { @@ -35503,7 +35651,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 252, + "id": 253, "interval": null, "isNew": true, "legend": { @@ -35681,7 +35829,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 253, + "id": 254, "interval": null, "isNew": true, "legend": { @@ -35859,7 +36007,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 254, + "id": 255, "interval": null, "isNew": true, "legend": { @@ -35992,7 +36140,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 255, + "id": 256, "interval": null, "isNew": true, "legend": { @@ -36170,7 +36318,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 256, + "id": 257, "interval": null, "isNew": true, "legend": { @@ -36303,7 +36451,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 257, + "id": 258, "interval": null, "isNew": true, "legend": { @@ -36466,7 +36614,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 258, + "id": 259, "interval": null, "isNew": true, "legend": { @@ -36644,7 +36792,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 259, + "id": 260, "interval": null, "isNew": true, "legend": { @@ -36822,7 +36970,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 260, + "id": 261, "interval": null, "isNew": true, "legend": { @@ -37000,7 +37148,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 261, + "id": 262, "interval": null, "isNew": true, "legend": { @@ -37133,7 +37281,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 262, + "id": 263, "interval": null, "isNew": true, "legend": { @@ -37311,7 +37459,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 263, + "id": 264, "interval": null, "isNew": true, "legend": { @@ -37444,7 +37592,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 264, + "id": 265, "interval": null, "isNew": true, "legend": { @@ -37622,7 +37770,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 265, + "id": 266, "interval": null, "isNew": true, "legend": { @@ -37755,7 +37903,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 266, + "id": 267, "interval": null, "isNew": true, "legend": { @@ -37888,7 +38036,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 267, + "id": 268, "interval": null, "isNew": true, "legend": { @@ -38066,7 +38214,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 268, + "id": 269, "interval": null, "isNew": true, "legend": { @@ -38244,7 +38392,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 269, + "id": 270, "interval": null, "isNew": true, "legend": { @@ -38377,7 +38525,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 270, + "id": 271, "interval": null, "isNew": true, "legend": { @@ -38555,7 +38703,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 271, + "id": 272, "interval": null, "isNew": true, "legend": { @@ -38688,7 +38836,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 272, + "id": 273, "interval": null, "isNew": true, "legend": { @@ -38866,7 +39014,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 273, + "id": 274, "interval": null, "isNew": true, "legend": { @@ -39002,7 +39150,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 274, + "id": 275, "interval": null, "links": [], "maxDataPoints": 100, @@ -39041,7 +39189,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 275, + "id": 276, "interval": null, "isNew": true, "legend": { @@ -39174,7 +39322,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 276, + "id": 277, "interval": null, "isNew": true, "legend": { @@ -39322,7 +39470,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 277, + "id": 278, "interval": null, "isNew": true, "legend": { @@ -39523,7 +39671,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 278, + "id": 279, "interval": null, "isNew": true, "legend": { @@ -39656,7 +39804,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 279, + "id": 280, "interval": null, "isNew": true, "legend": { @@ -39789,7 +39937,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 280, + "id": 281, "interval": null, "isNew": true, "legend": { @@ -39922,7 +40070,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 281, + "id": 282, "interval": null, "isNew": true, "legend": { @@ -40055,7 +40203,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 282, + "id": 283, "interval": null, "isNew": true, "legend": { @@ -40188,7 +40336,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 283, + "id": 284, "interval": null, "isNew": true, "legend": { @@ -40328,7 +40476,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 284, + "id": 285, "interval": null, "legend": { "show": false @@ -40433,7 +40581,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 285, + "id": 286, "interval": null, "legend": { "show": false @@ -40531,7 +40679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 286, + "id": 287, "interval": null, "isNew": true, "legend": { @@ -40671,7 +40819,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 287, + "id": 288, "interval": null, "legend": { "show": false @@ -40769,7 +40917,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 288, + "id": 289, "interval": null, "isNew": true, "legend": { @@ -40909,7 +41057,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 289, + "id": 290, "interval": null, "legend": { "show": false @@ -41007,7 +41155,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 290, + "id": 291, "interval": null, "isNew": true, "legend": { @@ -41215,7 +41363,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 291, + "id": 292, "interval": null, "legend": { "show": false @@ -41313,7 +41461,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 292, + "id": 293, "interval": null, "isNew": true, "legend": { @@ -41514,7 +41662,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 293, + "id": 294, "interval": null, "isNew": true, "legend": { @@ -41722,7 +41870,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 294, + "id": 295, "interval": null, "isNew": true, "legend": { @@ -41900,7 +42048,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 295, + "id": 296, "interval": null, "isNew": true, "legend": { @@ -42033,7 +42181,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 296, + "id": 297, "interval": null, "isNew": true, "legend": { @@ -42166,7 +42314,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 297, + "id": 298, "interval": null, "isNew": true, "legend": { @@ -42299,7 +42447,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 298, + "id": 299, "interval": null, "isNew": true, "legend": { @@ -42435,7 +42583,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 299, + "id": 300, "interval": null, "links": [], "maxDataPoints": 100, @@ -42474,7 +42622,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 300, + "id": 301, "interval": null, "isNew": true, "legend": { @@ -42622,7 +42770,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 301, + "id": 302, "interval": null, "isNew": true, "legend": { @@ -42762,7 +42910,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 302, + "id": 303, "interval": null, "legend": { "show": false @@ -42860,7 +43008,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 303, + "id": 304, "interval": null, "isNew": true, "legend": { @@ -42993,7 +43141,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 304, + "id": 305, "interval": null, "isNew": true, "legend": { @@ -43126,7 +43274,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 305, + "id": 306, "interval": null, "isNew": true, "legend": { @@ -43304,7 +43452,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 306, + "id": 307, "interval": null, "isNew": true, "legend": { @@ -43467,7 +43615,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 307, + "id": 308, "interval": null, "isNew": true, "legend": { @@ -43615,7 +43763,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 308, + "id": 309, "interval": null, "isNew": true, "legend": { @@ -43748,7 +43896,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 309, + "id": 310, "interval": null, "isNew": true, "legend": { @@ -43884,7 +44032,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 310, + "id": 311, "interval": null, "links": [], "maxDataPoints": 100, @@ -43923,7 +44071,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 311, + "id": 312, "interval": null, "isNew": true, "legend": { @@ -44071,7 +44219,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 312, + "id": 313, "interval": null, "isNew": true, "legend": { @@ -44204,7 +44352,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 313, + "id": 314, "interval": null, "isNew": true, "legend": { @@ -44337,7 +44485,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 314, + "id": 315, "interval": null, "isNew": true, "legend": { @@ -44470,7 +44618,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 315, + "id": 316, "interval": null, "isNew": true, "legend": { @@ -44603,7 +44751,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 316, + "id": 317, "interval": null, "isNew": true, "legend": { @@ -44758,7 +44906,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 317, + "id": 318, "interval": null, "legend": { "show": false @@ -44859,7 +45007,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 318, + "id": 319, "interval": null, "links": [], "maxDataPoints": 100, @@ -44898,7 +45046,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 319, + "id": 320, "interval": null, "isNew": true, "legend": { @@ -45031,7 +45179,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 320, + "id": 321, "interval": null, "isNew": true, "legend": { @@ -45164,7 +45312,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 321, + "id": 322, "interval": null, "isNew": true, "legend": { @@ -45304,7 +45452,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 322, + "id": 323, "interval": null, "legend": { "show": false @@ -45402,7 +45550,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 323, + "id": 324, "interval": null, "isNew": true, "legend": { @@ -45603,7 +45751,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 324, + "id": 325, "interval": null, "isNew": true, "legend": { @@ -45804,7 +45952,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 325, + "id": 326, "interval": null, "isNew": true, "legend": { @@ -46008,7 +46156,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 326, + "id": 327, "interval": null, "links": [], "maxDataPoints": 100, @@ -46047,7 +46195,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 327, + "id": 328, "interval": null, "isNew": true, "legend": { @@ -46195,7 +46343,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 328, + "id": 329, "interval": null, "isNew": true, "legend": { @@ -46396,7 +46544,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 329, + "id": 330, "interval": null, "isNew": true, "legend": { @@ -46597,7 +46745,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 330, + "id": 331, "interval": null, "isNew": true, "legend": { @@ -46798,7 +46946,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 331, + "id": 332, "interval": null, "isNew": true, "legend": { @@ -46999,7 +47147,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 332, + "id": 333, "interval": null, "isNew": true, "legend": { @@ -47132,7 +47280,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 333, + "id": 334, "interval": null, "isNew": true, "legend": { @@ -47265,7 +47413,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 334, + "id": 335, "interval": null, "isNew": true, "legend": { @@ -47398,7 +47546,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 335, + "id": 336, "interval": null, "isNew": true, "legend": { @@ -47531,7 +47679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 336, + "id": 337, "interval": null, "isNew": true, "legend": { @@ -47739,7 +47887,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 337, + "id": 338, "interval": null, "legend": { "show": false @@ -47840,7 +47988,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 338, + "id": 339, "interval": null, "links": [], "maxDataPoints": 100, @@ -47886,7 +48034,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 339, + "id": 340, "interval": null, "legend": { "show": false @@ -47984,7 +48132,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 340, + "id": 341, "interval": null, "isNew": true, "legend": { @@ -48185,7 +48333,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 341, + "id": 342, "interval": null, "isNew": true, "legend": { @@ -48318,7 +48466,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 342, + "id": 343, "interval": null, "isNew": true, "legend": { @@ -48451,7 +48599,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 343, + "id": 344, "interval": null, "isNew": true, "legend": { @@ -48584,7 +48732,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 344, + "id": 345, "interval": null, "isNew": true, "legend": { @@ -48785,7 +48933,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 345, + "id": 346, "interval": null, "isNew": true, "legend": { @@ -48918,7 +49066,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 346, + "id": 347, "interval": null, "isNew": true, "legend": { @@ -49051,7 +49199,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 347, + "id": 348, "interval": null, "isNew": true, "legend": { @@ -49187,7 +49335,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 348, + "id": 349, "interval": null, "links": [], "maxDataPoints": 100, @@ -49226,7 +49374,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 349, + "id": 350, "interval": null, "isNew": true, "legend": { @@ -49427,7 +49575,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 350, + "id": 351, "interval": null, "isNew": true, "legend": { @@ -49628,7 +49776,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 351, + "id": 352, "interval": null, "isNew": true, "legend": { @@ -49829,7 +49977,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 352, + "id": 353, "interval": null, "isNew": true, "legend": { @@ -50030,7 +50178,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 353, + "id": 354, "interval": null, "isNew": true, "legend": { @@ -50163,7 +50311,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 354, + "id": 355, "interval": null, "isNew": true, "legend": { @@ -50296,7 +50444,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 355, + "id": 356, "interval": null, "isNew": true, "legend": { @@ -50429,7 +50577,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 356, + "id": 357, "interval": null, "isNew": true, "legend": { @@ -50562,7 +50710,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 357, + "id": 358, "interval": null, "isNew": true, "legend": { @@ -50695,7 +50843,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 358, + "id": 359, "interval": null, "isNew": true, "legend": { @@ -50835,7 +50983,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 359, + "id": 360, "interval": null, "legend": { "show": false @@ -50933,7 +51081,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 360, + "id": 361, "interval": null, "isNew": true, "legend": { @@ -51137,7 +51285,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 361, + "id": 362, "interval": null, "links": [], "maxDataPoints": 100, @@ -51176,7 +51324,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 362, + "id": 363, "interval": null, "isNew": true, "legend": { @@ -51309,7 +51457,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 363, + "id": 364, "interval": null, "isNew": true, "legend": { @@ -51442,7 +51590,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 364, + "id": 365, "interval": null, "isNew": true, "legend": { @@ -51582,7 +51730,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 365, + "id": 366, "interval": null, "legend": { "show": false @@ -51680,7 +51828,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 366, + "id": 367, "interval": null, "isNew": true, "legend": { @@ -51881,7 +52029,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 367, + "id": 368, "interval": null, "isNew": true, "legend": { @@ -52082,7 +52230,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 368, + "id": 369, "interval": null, "isNew": true, "legend": { @@ -52286,7 +52434,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 369, + "id": 370, "interval": null, "links": [], "maxDataPoints": 100, @@ -52325,7 +52473,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 370, + "id": 371, "interval": null, "isNew": true, "legend": { @@ -52503,7 +52651,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 372, "interval": null, "isNew": true, "legend": { @@ -52704,7 +52852,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 372, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -52837,7 +52985,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 374, "interval": null, "isNew": true, "legend": { @@ -52970,7 +53118,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 374, + "id": 375, "interval": null, "isNew": true, "legend": { @@ -53103,7 +53251,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 375, + "id": 376, "interval": null, "isNew": true, "legend": { @@ -53236,7 +53384,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 376, + "id": 377, "interval": null, "isNew": true, "legend": { @@ -53369,7 +53517,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 377, + "id": 378, "interval": null, "isNew": true, "legend": { @@ -53498,7 +53646,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 378, + "id": 379, "interval": null, "links": [], "maxDataPoints": 100, @@ -53573,7 +53721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 379, + "id": 380, "interval": null, "links": [], "maxDataPoints": 100, @@ -53652,7 +53800,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 380, + "id": 381, "interval": null, "isNew": true, "legend": { @@ -53905,7 +54053,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 381, + "id": 382, "interval": null, "isNew": true, "legend": { @@ -54038,7 +54186,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 382, + "id": 383, "interval": null, "isNew": true, "legend": { @@ -54174,7 +54322,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 383, + "id": 384, "interval": null, "links": [], "maxDataPoints": 100, @@ -54213,7 +54361,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 384, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -54361,7 +54509,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 386, "interval": null, "isNew": true, "legend": { @@ -54494,7 +54642,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 386, + "id": 387, "interval": null, "isNew": true, "legend": { @@ -54695,7 +54843,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 387, + "id": 388, "interval": null, "isNew": true, "legend": { @@ -54843,7 +54991,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 389, "interval": null, "isNew": true, "legend": { @@ -55044,7 +55192,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 389, + "id": 390, "interval": null, "isNew": true, "legend": { @@ -55177,7 +55325,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 390, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -55310,7 +55458,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 392, "interval": null, "isNew": true, "legend": { @@ -55443,7 +55591,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 392, + "id": 393, "interval": null, "isNew": true, "legend": { @@ -55576,7 +55724,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 393, + "id": 394, "interval": null, "isNew": true, "legend": { @@ -55716,7 +55864,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 394, + "id": 395, "interval": null, "legend": { "show": false @@ -55814,7 +55962,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 396, "interval": null, "isNew": true, "legend": { @@ -56018,7 +56166,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 396, + "id": 397, "interval": null, "links": [], "maxDataPoints": 100, @@ -56057,7 +56205,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 397, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -56190,7 +56338,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 399, "interval": null, "isNew": true, "legend": { @@ -56323,7 +56471,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 399, + "id": 400, "interval": null, "isNew": true, "legend": { @@ -56456,7 +56604,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 401, "interval": null, "isNew": true, "legend": { @@ -56592,7 +56740,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 402, "interval": null, "links": [], "maxDataPoints": 100, @@ -56631,7 +56779,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -56764,7 +56912,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 404, "interval": null, "isNew": true, "legend": { @@ -56897,7 +57045,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 405, "interval": null, "isNew": true, "legend": { @@ -57045,7 +57193,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 405, + "id": 406, "interval": null, "isNew": true, "legend": { @@ -57178,7 +57326,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 406, + "id": 407, "interval": null, "isNew": true, "legend": { @@ -57311,7 +57459,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 407, + "id": 408, "interval": null, "isNew": true, "legend": { @@ -57444,7 +57592,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 408, + "id": 409, "interval": null, "isNew": true, "legend": { @@ -57580,7 +57728,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 409, + "id": 410, "interval": null, "links": [], "maxDataPoints": 100, @@ -57619,7 +57767,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 410, + "id": 411, "interval": null, "isNew": true, "legend": { @@ -57752,7 +57900,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 412, "interval": null, "isNew": true, "legend": { @@ -57885,7 +58033,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 412, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -58018,7 +58166,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 413, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -58151,7 +58299,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 414, + "id": 415, "interval": null, "isNew": true, "legend": { @@ -58284,7 +58432,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 415, + "id": 416, "interval": null, "isNew": true, "legend": { @@ -58420,7 +58568,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 416, + "id": 417, "interval": null, "links": [], "maxDataPoints": 100, @@ -58459,7 +58607,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 417, + "id": 418, "interval": null, "isNew": true, "legend": { @@ -58592,7 +58740,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 418, + "id": 419, "interval": null, "isNew": true, "legend": { @@ -58725,7 +58873,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 419, + "id": 420, "interval": null, "isNew": true, "legend": { @@ -58873,7 +59021,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 420, + "id": 421, "interval": null, "isNew": true, "legend": { @@ -59036,7 +59184,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 421, + "id": 422, "interval": null, "isNew": true, "legend": { @@ -59169,7 +59317,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 422, + "id": 423, "interval": null, "isNew": true, "legend": { @@ -59302,7 +59450,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 423, + "id": 424, "interval": null, "isNew": true, "legend": { @@ -59465,7 +59613,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 424, + "id": 425, "interval": null, "isNew": true, "legend": { @@ -59613,7 +59761,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 425, + "id": 426, "interval": null, "isNew": true, "legend": { @@ -59749,7 +59897,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 426, + "id": 427, "interval": null, "links": [], "maxDataPoints": 100, @@ -59788,7 +59936,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 427, + "id": 428, "interval": null, "isNew": true, "legend": { @@ -59921,7 +60069,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 428, + "id": 429, "interval": null, "isNew": true, "legend": { @@ -60054,7 +60202,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 429, + "id": 430, "interval": null, "isNew": true, "legend": { @@ -60187,7 +60335,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 430, + "id": 431, "interval": null, "isNew": true, "legend": { @@ -60320,7 +60468,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 431, + "id": 432, "interval": null, "isNew": true, "legend": { @@ -60453,7 +60601,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 432, + "id": 433, "interval": null, "isNew": true, "legend": { @@ -60586,7 +60734,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 433, + "id": 434, "interval": null, "isNew": true, "legend": { @@ -60719,7 +60867,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 434, + "id": 435, "interval": null, "isNew": true, "legend": { @@ -60852,7 +61000,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 435, + "id": 436, "interval": null, "isNew": true, "legend": { @@ -60992,7 +61140,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 436, + "id": 437, "interval": null, "legend": { "show": false @@ -61090,7 +61238,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 437, + "id": 438, "interval": null, "isNew": true, "legend": { @@ -61223,7 +61371,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 438, + "id": 439, "interval": null, "isNew": true, "legend": { @@ -61371,7 +61519,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 440, "interval": null, "isNew": true, "legend": { @@ -61519,7 +61667,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 440, + "id": 441, "interval": null, "isNew": true, "legend": { @@ -61659,7 +61807,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 441, + "id": 442, "interval": null, "legend": { "show": false @@ -61757,7 +61905,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 442, + "id": 443, "interval": null, "isNew": true, "legend": { @@ -61890,7 +62038,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 444, "interval": null, "isNew": true, "legend": { @@ -62026,7 +62174,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 444, + "id": 445, "interval": null, "links": [], "maxDataPoints": 100, @@ -62065,7 +62213,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 446, "interval": null, "isNew": true, "legend": { @@ -62198,7 +62346,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 446, + "id": 447, "interval": null, "isNew": true, "legend": { @@ -62361,7 +62509,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 447, + "id": 448, "interval": null, "isNew": true, "legend": { @@ -62509,7 +62657,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 448, + "id": 449, "interval": null, "isNew": true, "legend": { @@ -62642,7 +62790,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 449, + "id": 450, "interval": null, "isNew": true, "legend": { @@ -62782,7 +62930,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 450, + "id": 451, "interval": null, "legend": { "show": false @@ -62887,7 +63035,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 451, + "id": 452, "interval": null, "legend": { "show": false @@ -62992,7 +63140,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 452, + "id": 453, "interval": null, "legend": { "show": false @@ -63090,7 +63238,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 453, + "id": 454, "interval": null, "isNew": true, "legend": { @@ -63230,7 +63378,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 454, + "id": 455, "interval": null, "legend": { "show": false @@ -63335,7 +63483,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 455, + "id": 456, "interval": null, "legend": { "show": false @@ -63440,7 +63588,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 456, + "id": 457, "interval": null, "legend": { "show": false @@ -63538,7 +63686,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 457, + "id": 458, "interval": null, "isNew": true, "legend": { @@ -63671,7 +63819,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 458, + "id": 459, "interval": null, "isNew": true, "legend": { @@ -63804,7 +63952,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 459, + "id": 460, "interval": null, "isNew": true, "legend": { @@ -63944,7 +64092,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 460, + "id": 461, "interval": null, "legend": { "show": false @@ -64042,7 +64190,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 462, "interval": null, "isNew": true, "legend": { @@ -64178,7 +64326,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 462, + "id": 463, "interval": null, "links": [], "maxDataPoints": 100, @@ -64217,7 +64365,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 463, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -64380,7 +64528,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 465, "interval": null, "isNew": true, "legend": { @@ -64513,7 +64661,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 465, + "id": 466, "interval": null, "isNew": true, "legend": { @@ -64653,7 +64801,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 466, + "id": 467, "interval": null, "legend": { "show": false @@ -64758,7 +64906,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 467, + "id": 468, "interval": null, "legend": { "show": false @@ -64856,7 +65004,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 468, + "id": 469, "interval": null, "isNew": true, "legend": { @@ -65011,7 +65159,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 469, + "id": 470, "interval": null, "legend": { "show": false @@ -65116,7 +65264,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 470, + "id": 471, "interval": null, "legend": { "show": false @@ -65221,7 +65369,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 471, + "id": 472, "interval": null, "legend": { "show": false @@ -65319,7 +65467,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 472, + "id": 473, "interval": null, "isNew": true, "legend": { @@ -65489,7 +65637,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 473, + "id": 474, "interval": null, "legend": { "show": false @@ -65587,7 +65735,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 474, + "id": 475, "interval": null, "isNew": true, "legend": { @@ -65788,7 +65936,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 475, + "id": 476, "interval": null, "isNew": true, "legend": { @@ -65989,7 +66137,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 476, + "id": 477, "interval": null, "isNew": true, "legend": { @@ -66122,7 +66270,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 477, + "id": 478, "interval": null, "isNew": true, "legend": { @@ -66285,7 +66433,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 479, "interval": null, "isNew": true, "legend": { @@ -66418,7 +66566,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 479, + "id": 480, "interval": null, "isNew": true, "legend": { @@ -66551,7 +66699,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 480, + "id": 481, "interval": null, "isNew": true, "legend": { @@ -66752,7 +66900,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 481, + "id": 482, "interval": null, "isNew": true, "legend": { @@ -66885,7 +67033,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 482, + "id": 483, "interval": null, "isNew": true, "legend": { @@ -67025,7 +67173,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 483, + "id": 484, "interval": null, "legend": { "show": false @@ -67130,7 +67278,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 484, + "id": 485, "interval": null, "legend": { "show": false @@ -67235,7 +67383,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 485, + "id": 486, "interval": null, "legend": { "show": false @@ -67340,7 +67488,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 486, + "id": 487, "interval": null, "legend": { "show": false @@ -67445,7 +67593,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 487, + "id": 488, "interval": null, "legend": { "show": false @@ -67550,7 +67698,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 488, + "id": 489, "interval": null, "legend": { "show": false @@ -67655,7 +67803,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 489, + "id": 490, "interval": null, "legend": { "show": false @@ -67753,7 +67901,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 491, "interval": null, "isNew": true, "legend": { @@ -67901,7 +68049,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 492, "interval": null, "isNew": true, "legend": { @@ -68034,7 +68182,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 493, "interval": null, "isNew": true, "legend": { @@ -68167,7 +68315,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, + "id": 494, "interval": null, "isNew": true, "legend": { @@ -68315,7 +68463,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 494, + "id": 495, "interval": null, "isNew": true, "legend": { @@ -68451,7 +68599,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 495, + "id": 496, "interval": null, "links": [], "maxDataPoints": 100, @@ -68502,7 +68650,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 496, + "id": 497, "interval": null, "links": [], "maxDataPoints": 100, @@ -68598,7 +68746,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 497, + "id": 498, "interval": null, "links": [], "maxDataPoints": 100, @@ -68673,7 +68821,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 498, + "id": 499, "interval": null, "links": [], "maxDataPoints": 100, @@ -68748,7 +68896,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 499, + "id": 500, "interval": null, "links": [], "maxDataPoints": 100, @@ -68823,7 +68971,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 500, + "id": 501, "interval": null, "links": [], "maxDataPoints": 100, @@ -68898,7 +69046,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 501, + "id": 502, "interval": null, "links": [], "maxDataPoints": 100, @@ -68973,7 +69121,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 502, + "id": 503, "interval": null, "links": [], "maxDataPoints": 100, @@ -69048,7 +69196,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 503, + "id": 504, "interval": null, "links": [], "maxDataPoints": 100, @@ -69127,7 +69275,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 504, + "id": 505, "interval": null, "isNew": true, "legend": { @@ -69260,7 +69408,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 505, + "id": 506, "interval": null, "isNew": true, "legend": { @@ -69393,7 +69541,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 506, + "id": 507, "interval": null, "isNew": true, "legend": { @@ -69526,7 +69674,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 507, + "id": 508, "interval": null, "isNew": true, "legend": { @@ -69659,7 +69807,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 508, + "id": 509, "interval": null, "isNew": true, "legend": { @@ -69792,7 +69940,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 509, + "id": 510, "interval": null, "isNew": true, "legend": { @@ -69940,7 +70088,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 510, + "id": 511, "interval": null, "isNew": true, "legend": { @@ -70073,7 +70221,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 511, + "id": 512, "interval": null, "isNew": true, "legend": { @@ -70206,7 +70354,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 512, + "id": 513, "interval": null, "isNew": true, "legend": { @@ -70372,7 +70520,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 513, + "id": 514, "interval": null, "legend": { "show": false @@ -70477,7 +70625,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 514, + "id": 515, "interval": null, "legend": { "show": false @@ -70582,7 +70730,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 515, + "id": 516, "interval": null, "legend": { "show": false @@ -70687,7 +70835,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 516, + "id": 517, "interval": null, "legend": { "show": false @@ -70792,7 +70940,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 517, + "id": 518, "interval": null, "legend": { "show": false @@ -70897,7 +71045,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 518, + "id": 519, "interval": null, "legend": { "show": false @@ -71002,7 +71150,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 519, + "id": 520, "interval": null, "legend": { "show": false @@ -71107,7 +71255,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 520, + "id": 521, "interval": null, "legend": { "show": false @@ -71205,7 +71353,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 521, + "id": 522, "interval": null, "isNew": true, "legend": { @@ -71338,7 +71486,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 522, + "id": 523, "interval": null, "isNew": true, "legend": { @@ -71471,7 +71619,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 523, + "id": 524, "interval": null, "isNew": true, "legend": { @@ -71604,7 +71752,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 524, + "id": 525, "interval": null, "isNew": true, "legend": { @@ -71737,7 +71885,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 525, + "id": 526, "interval": null, "isNew": true, "legend": { @@ -71870,7 +72018,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 526, + "id": 527, "interval": null, "isNew": true, "legend": { @@ -72003,7 +72151,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 527, + "id": 528, "interval": null, "isNew": true, "legend": { @@ -72136,7 +72284,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 528, + "id": 529, "interval": null, "isNew": true, "legend": { @@ -72276,7 +72424,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 529, + "id": 530, "interval": null, "legend": { "show": false @@ -72381,7 +72529,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 530, + "id": 531, "interval": null, "legend": { "show": false @@ -72479,7 +72627,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 531, + "id": 532, "interval": null, "isNew": true, "legend": { @@ -72612,7 +72760,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 532, + "id": 533, "interval": null, "isNew": true, "legend": { @@ -72745,7 +72893,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 533, + "id": 534, "interval": null, "isNew": true, "legend": { @@ -72878,7 +73026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 534, + "id": 535, "interval": null, "isNew": true, "legend": { @@ -73011,7 +73159,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 535, + "id": 536, "interval": null, "isNew": true, "legend": { @@ -73144,7 +73292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 536, + "id": 537, "interval": null, "isNew": true, "legend": { @@ -73280,7 +73428,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 537, + "id": 538, "interval": null, "links": [], "maxDataPoints": 100, @@ -73319,7 +73467,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 538, + "id": 539, "interval": null, "isNew": true, "legend": { @@ -73467,7 +73615,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 539, + "id": 540, "interval": null, "isNew": true, "legend": { @@ -73600,7 +73748,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 540, + "id": 541, "interval": null, "isNew": true, "legend": { @@ -73733,7 +73881,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 541, + "id": 542, "interval": null, "isNew": true, "legend": { @@ -73869,7 +74017,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 542, + "id": 543, "interval": null, "links": [], "maxDataPoints": 100, @@ -73908,7 +74056,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 543, + "id": 544, "interval": null, "isNew": true, "legend": { @@ -74041,7 +74189,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 544, + "id": 545, "interval": null, "isNew": true, "legend": { @@ -74174,7 +74322,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 545, + "id": 546, "interval": null, "isNew": true, "legend": { @@ -74307,7 +74455,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 546, + "id": 547, "interval": null, "isNew": true, "legend": { @@ -74440,7 +74588,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 547, + "id": 548, "interval": null, "isNew": true, "legend": { @@ -74573,7 +74721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 548, + "id": 549, "interval": null, "isNew": true, "legend": { @@ -74709,7 +74857,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 549, + "id": 550, "interval": null, "links": [], "maxDataPoints": 100, @@ -74748,7 +74896,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 550, + "id": 551, "interval": null, "isNew": true, "legend": { @@ -74881,7 +75029,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 551, + "id": 552, "interval": null, "isNew": true, "legend": { @@ -75017,7 +75165,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 552, + "id": 553, "interval": null, "links": [], "maxDataPoints": 100, @@ -75056,7 +75204,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 553, + "id": 554, "interval": null, "isNew": true, "legend": { @@ -75257,7 +75405,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 554, + "id": 555, "interval": null, "isNew": true, "legend": { @@ -75393,7 +75541,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 555, + "id": 556, "interval": null, "links": [], "maxDataPoints": 100, @@ -75432,7 +75580,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 556, + "id": 557, "interval": null, "isNew": true, "legend": { @@ -75565,7 +75713,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 557, + "id": 558, "interval": null, "isNew": true, "legend": { @@ -75698,7 +75846,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 558, + "id": 559, "interval": null, "isNew": true, "legend": { @@ -75831,7 +75979,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 559, + "id": 560, "interval": null, "isNew": true, "legend": { @@ -75964,7 +76112,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 560, + "id": 561, "interval": null, "isNew": true, "legend": { @@ -76112,7 +76260,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 561, + "id": 562, "interval": null, "isNew": true, "legend": { @@ -76316,7 +76464,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 562, + "id": 563, "interval": null, "links": [], "maxDataPoints": 100, @@ -76355,7 +76503,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 563, + "id": 564, "interval": null, "isNew": true, "legend": { @@ -76488,7 +76636,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 564, + "id": 565, "interval": null, "isNew": true, "legend": { @@ -76621,7 +76769,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 565, + "id": 566, "interval": null, "isNew": true, "legend": { @@ -76754,7 +76902,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 566, + "id": 567, "interval": null, "isNew": true, "legend": { @@ -76887,7 +77035,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 567, + "id": 568, "interval": null, "isNew": true, "legend": { @@ -77084,7 +77232,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 568, + "id": 569, "interval": null, "links": [], "maxDataPoints": 100, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 06a3aec823e..34c5c48ac6a 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -d5cfc7548e0cf9b54b7d1b8615033e6315a53e25f482c8202cf488b183c38470 ./metrics/grafana/tikv_details.json +fb206d6a1e887c038e752582478ff75d8b508d40f402392f7d152126891272a9 ./metrics/grafana/tikv_details.json diff --git a/src/config/mod.rs b/src/config/mod.rs index 26c2f9000c3..b848dcefc22 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5667,6 +5667,7 @@ mod tests { scheduler, flow_controller.clone(), storage.get_scheduler(), + storage.get_concurrency_manager(), )), ); (storage, cfg_controller, receiver, flow_controller) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 2030c92c305..40189b6f7b6 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -137,7 +137,8 @@ impl Endpoint { fn check_memory_locks(&self, req_ctx: &ReqContext) -> Result<()> { let start_ts = req_ctx.txn_start_ts; if !req_ctx.context.get_stale_read() { - self.concurrency_manager.update_max_ts(start_ts); + self.concurrency_manager + .update_max_ts(start_ts, || format!("coprocessor-{}", start_ts))?; } if need_check_locks(req_ctx.context.get_isolation_level()) { let begin_instant = Instant::now(); @@ -926,6 +927,12 @@ macro_rules! make_error_response_common { errorpb.set_server_is_busy(server_is_busy_err); $resp.set_region_error(errorpb); } + Error::InvalidMaxTsUpdate(e) => { + $tag = "invalid_max_ts_update"; + let mut err = errorpb::Error::default(); + err.set_message(e.to_string()); + $resp.set_region_error(err); + } Error::Other(_) => { $tag = "other"; warn!("unexpected other error encountered processing coprocessor task"; diff --git a/src/coprocessor/error.rs b/src/coprocessor/error.rs index 5c3ce554cc2..1ecb4479da3 100644 --- a/src/coprocessor/error.rs +++ b/src/coprocessor/error.rs @@ -30,6 +30,9 @@ pub enum Error { #[error("Coprocessor task canceled due to exceeding memory quota")] MemoryQuotaExceeded, + #[error("{0}")] + InvalidMaxTsUpdate(#[from] concurrency_manager::InvalidMaxTsUpdate), + #[error("{0}")] Other(String), } @@ -137,6 +140,7 @@ impl ErrorCodeExt for Error { Error::DeadlineExceeded => error_code::coprocessor::DEADLINE_EXCEEDED, Error::MaxPendingTasksExceeded => error_code::coprocessor::MAX_PENDING_TASKS_EXCEEDED, Error::MemoryQuotaExceeded => error_code::coprocessor::MEMORY_QUOTA_EXCEEDED, + Error::InvalidMaxTsUpdate(_) => error_code::coprocessor::INVALID_MAX_TS_UPDATE, Error::Other(_) => error_code::UNKNOWN, } } diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 3adb5d1381f..28f590c95ad 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -842,7 +842,12 @@ impl ReadIndexObserver for ReplicaReadLockChecker { let begin_instant = Instant::now(); let start_ts = request.get_start_ts().into(); - self.concurrency_manager.update_max_ts(start_ts); + if let Err(e) = self + .concurrency_manager + .update_max_ts(start_ts, || format!("read_index-{}", start_ts)) + { + error!("failed to update max ts in concurrency manager"; "err" => ?e); + } for range in request.mut_key_ranges().iter_mut() { let key_bound = |key: Vec| { if key.is_empty() { diff --git a/src/storage/config.rs b/src/storage/config.rs index a3291d7b3d2..a89f1ac26d6 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -2,7 +2,7 @@ //! Storage configuration. -use std::{cmp::max, error::Error, path::Path}; +use std::{borrow::ToOwned, cmp::max, error::Error, path::Path}; use engine_rocks::raw::{Cache, LRUCacheOptions, MemoryAllocator}; use file_system::{IoPriority, IoRateLimitMode, IoRateLimiter, IoType}; @@ -61,6 +61,8 @@ const DEFAULT_TXN_STATUS_CACHE_CAPACITY: usize = 40_000 * 128; // occur in tests. const FALLBACK_BLOCK_CACHE_CAPACITY: ReadableSize = ReadableSize::mb(128); +const DEFAULT_ACTION_ON_INVALID_MAX_TS: &str = "panic"; + #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case")] pub enum EngineType { @@ -105,6 +107,12 @@ pub struct Config { #[online_config(skip)] pub txn_status_cache_capacity: usize, pub memory_quota: ReadableSize, + /// Maximum max_ts deviation allowed from PD timestamp + pub max_ts_drift_allowance: ReadableDuration, + /// How often to refresh the max_ts limit from PD + #[online_config(skip)] + pub max_ts_sync_interval: ReadableDuration, + pub action_on_invalid_max_ts: String, #[online_config(submodule)] pub flow_control: FlowControlConfig, #[online_config(submodule)] @@ -140,6 +148,9 @@ impl Default for Config { io_rate_limit: IoRateLimitConfig::default(), background_error_recovery_window: ReadableDuration::hours(1), memory_quota: DEFAULT_TXN_MEMORY_QUOTA_CAPACITY, + max_ts_drift_allowance: ReadableDuration::secs(60), + max_ts_sync_interval: ReadableDuration::secs(15), + action_on_invalid_max_ts: DEFAULT_ACTION_ON_INVALID_MAX_TS.into(), } } } @@ -219,6 +230,26 @@ impl Config { self.memory_quota = self.scheduler_pending_write_threshold; } + if self.max_ts_drift_allowance <= self.max_ts_sync_interval { + let msg = format!( + "storage.max-ts-drift-allowance {:?} is smaller than or equal to storage.max-ts-sync-interval {:?}", + self.max_ts_drift_allowance, self.max_ts_sync_interval, + ); + error!("{}", msg); + return Err(msg.into()); + } + + if let Err(e) = concurrency_manager::ActionOnInvalidMaxTs::try_from( + self.action_on_invalid_max_ts.as_str(), + ) { + error!( + "storage.action-on-invalid-max-ts is set to an invalid value {}, \ + change to action panic", + self.action_on_invalid_max_ts, + ); + return Err(e.into()); + } + Ok(()) } diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index b36f2f2209f..fac10a40161 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -4,6 +4,7 @@ use std::{convert::TryInto, sync::Arc}; +use concurrency_manager::ConcurrencyManager; use engine_traits::{ALL_CFS, CF_DEFAULT}; use file_system::{get_io_rate_limiter, IoPriority, IoType}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; @@ -25,6 +26,7 @@ pub struct StorageConfigManger { ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, + concurrency_manager: ConcurrencyManager, } unsafe impl Send for StorageConfigManger {} @@ -36,12 +38,14 @@ impl StorageConfigManger { ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, + concurrency_manager: ConcurrencyManager, ) -> Self { StorageConfigManger { configurable_db, ttl_checker_scheduler, flow_controller, scheduler, + concurrency_manager, } } } @@ -106,6 +110,12 @@ impl ConfigManager } } } + if let Some(v) = change.remove("action_on_invalid_max_ts") { + let str_v: String = v.into(); + let action: concurrency_manager::ActionOnInvalidMaxTs = str_v.try_into()?; + self.concurrency_manager + .set_action_on_invalid_max_ts(action); + } Ok(()) } } diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 02888cc93e8..81386a8f31c 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -303,6 +303,20 @@ pub fn extract_region_error_from_error(e: &Error) -> Option { err.set_flashback_not_prepared(flashback_not_prepared_err); Some(err) } + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::InvalidMaxTsUpdate( + invalid_max_ts_update, + )))) => { + let mut err = errorpb::Error::default(); + err.set_message(invalid_max_ts_update.to_string()); + Some(err) + } + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::InvalidMaxTsUpdate(invalid_max_ts_update), + ))))) => { + let mut err = errorpb::Error::default(); + err.set_message(invalid_max_ts_update.to_string()); + Some(err) + } Error(box ErrorInner::SchedTooBusy) => { let mut err = errorpb::Error::default(); let mut server_is_busy_err = errorpb::ServerIsBusy::default(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 25b3a20dbd3..5af39078648 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1455,7 +1455,9 @@ impl Storage { // Update max_ts and check the in-memory lock table before getting the snapshot if !ctx.get_stale_read() { - concurrency_manager.update_max_ts(start_ts); + concurrency_manager + .update_max_ts(start_ts, "scan") + .map_err(txn::Error::from)?; } if need_check_locks(ctx.get_isolation_level()) { let begin_instant = Instant::now(); @@ -1620,7 +1622,9 @@ impl Storage { let command_duration = Instant::now(); - concurrency_manager.update_max_ts(max_ts); + concurrency_manager + .update_max_ts(max_ts, "scan_lock") + .map_err(txn::Error::from)?; let begin_instant = Instant::now(); // TODO: Though it's very unlikely to find a conflicting memory lock here, it's // not a good idea to return an error to the client, making the GC fail. A @@ -3351,7 +3355,11 @@ fn prepare_snap_ctx<'a>( ) -> Result> { // Update max_ts and check the in-memory lock table before getting the snapshot if !pb_ctx.get_stale_read() { - concurrency_manager.update_max_ts(start_ts); + concurrency_manager + .update_max_ts(start_ts, || { + format!("prepare_snap_ctx-{}-{}", cmd, start_ts) + }) + .map_err(txn::Error::from)?; } fail_point!("before-storage-check-memory-locks"); let isolation_level = pb_ctx.get_isolation_level(); @@ -10266,7 +10274,7 @@ mod tests { .build() .unwrap(); let cm = storage.concurrency_manager.clone(); - cm.update_max_ts(10.into()); + cm.update_max_ts(10.into(), "").unwrap(); // Optimistic prewrite let (tx, rx) = channel(); @@ -10314,7 +10322,7 @@ mod tests { .unwrap(); rx.recv().unwrap(); - cm.update_max_ts(1000.into()); + cm.update_max_ts(1000.into(), "").unwrap(); let (tx, rx) = channel(); storage @@ -11421,7 +11429,7 @@ mod tests { // commit enabled, and max_ts changes when the second request arrives. // A retrying prewrite request arrives. - cm.update_max_ts(20.into()); + cm.update_max_ts(20.into(), "").unwrap(); let mut ctx = Context::default(); ctx.set_is_retry_request(true); let (tx, rx) = channel(); @@ -11605,7 +11613,7 @@ mod tests { // 1PC update let (tx, rx) = channel(); - cm.update_max_ts(59.into()); + cm.update_max_ts(59.into(), "").unwrap(); storage .sched_txn_command( Prewrite::new( diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 30e20a2afc6..e1d01f91562 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -182,6 +182,9 @@ pub enum ErrorInner { #[error("generation out of order: current = {0}, key={1:?}, lock = {1:?}")] GenerationOutOfOrder(u64, Key, Lock), + #[error("{0}")] + InvalidMaxTsUpdate(#[from] concurrency_manager::InvalidMaxTsUpdate), + #[error("{0:?}")] Other(#[from] Box), } @@ -321,6 +324,7 @@ impl ErrorInner { ErrorInner::GenerationOutOfOrder(gen, key, lock_info) => Some( ErrorInner::GenerationOutOfOrder(*gen, key.clone(), lock_info.clone()), ), + ErrorInner::InvalidMaxTsUpdate(e) => Some(ErrorInner::InvalidMaxTsUpdate(e.clone())), ErrorInner::Io(_) | ErrorInner::Other(_) => None, } } @@ -425,6 +429,7 @@ impl ErrorCodeExt for Error { ErrorInner::LockIfExistsFailed { .. } => error_code::storage::LOCK_IF_EXISTS_FAILED, ErrorInner::PrimaryMismatch(_) => error_code::storage::PRIMARY_MISMATCH, ErrorInner::GenerationOutOfOrder(..) => error_code::storage::GENERATION_OUT_OF_ORDER, + ErrorInner::InvalidMaxTsUpdate(_) => error_code::storage::INVALID_MAX_TS_UPDATE, ErrorInner::Other(_) => error_code::storage::UNKNOWN, } } diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 2a9e49b45ff..6c71bb6843a 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -71,7 +71,9 @@ pub fn acquire_pessimistic_lock( // it infers a read to the value, in which case max_ts need to be updated to // guarantee the linearizability and snapshot isolation. if should_not_exist || need_value || need_check_existence { - txn.concurrency_manager.update_max_ts(for_update_ts); + txn.concurrency_manager.update_max_ts(for_update_ts, || { + format!("pessimistic_lock-{}-{}", reader.start_ts, for_update_ts) + })?; } // When `need_value` is set, the value need to be loaded of course. If diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 6c21de599bc..9e95a3ef63f 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -70,7 +70,10 @@ pub fn prewrite_with_generation( // Update max_ts for Insert operation to guarantee linearizability and snapshot // isolation if mutation.should_not_exist { - txn.concurrency_manager.update_max_ts(txn_props.start_ts); + txn.concurrency_manager + .update_max_ts(txn_props.start_ts, || { + format!("prewrite-{}", txn_props.start_ts) + })?; } fail_point!( @@ -149,7 +152,10 @@ pub fn prewrite_with_generation( if mutation.should_not_write { // `checkNotExists` is equivalent to a get operation, so it should update the // max_ts. - txn.concurrency_manager.update_max_ts(txn_props.start_ts); + txn.concurrency_manager + .update_max_ts(txn_props.start_ts, || { + format!("prewrite-{}", txn_props.start_ts) + })?; let min_commit_ts = if mutation.need_min_commit_ts() { // Don't calculate the min_commit_ts according to the concurrency manager's // max_ts for a should_not_write mutation because it's not persisted and doesn't @@ -1060,7 +1066,7 @@ pub mod tests { .unwrap(); assert_eq!(old_value, OldValue::None); - cm.update_max_ts(60.into()); + cm.update_max_ts(60.into(), "").unwrap(); // calculated commit_ts = 61 > 50, err let err = prewrite( &mut txn, @@ -1245,7 +1251,7 @@ pub mod tests { .unwrap(); assert_eq!(old_value, OldValue::None); - cm.update_max_ts(60.into()); + cm.update_max_ts(60.into(), "").unwrap(); // calculated commit_ts = 61 > 50, err let err = prewrite( &mut txn, @@ -1347,7 +1353,7 @@ pub mod tests { // Pessimistic txn skips constraint check, does not read previous write. assert_eq!(old_value, OldValue::Unspecified); - cm.update_max_ts(60.into()); + cm.update_max_ts(60.into(), "").unwrap(); // calculated commit_ts = 61 > 50, ok prewrite( &mut txn, @@ -1400,7 +1406,7 @@ pub mod tests { // Pessimistic txn skips constraint check, does not read previous write. assert_eq!(old_value, OldValue::Unspecified); - cm.update_max_ts(60.into()); + cm.update_max_ts(60.into(), "").unwrap(); // calculated commit_ts = 61 > 50, ok prewrite( &mut txn, diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index aff85ca3e61..6664f16bdc6 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -145,7 +145,11 @@ impl WriteCommand for CheckSecondaryLocks { // It is not allowed for commit to overwrite a protected rollback. So we update // max_ts to prevent this case from happening. let region_id = self.ctx.get_region_id(); - context.concurrency_manager.update_max_ts(self.start_ts); + context + .concurrency_manager + .update_max_ts(self.start_ts, || { + format!("check_secondary_locks-{}", self.start_ts) + })?; let mut txn = MvccTxn::new(self.start_ts, context.concurrency_manager); let mut reader = ReaderWithStats::new( diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index f2bbdac7478..fbb1b7187f1 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -90,7 +90,12 @@ impl WriteCommand for CheckTxnStatus { if !self.caller_start_ts.is_max() && self.caller_start_ts > new_max_ts { new_max_ts = self.caller_start_ts; } - context.concurrency_manager.update_max_ts(new_max_ts); + context.concurrency_manager.update_max_ts(new_max_ts, || { + format!( + "check_txn_status-{}-{}-{}", + self.lock_ts, self.current_ts, self.caller_start_ts + ) + })?; let mut txn = MvccTxn::new(self.lock_ts, context.concurrency_manager); let mut reader = ReaderWithStats::new( diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index a1c234ce062..fa0b446284b 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -51,7 +51,9 @@ impl WriteCommand for Cleanup { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { // It is not allowed for commit to overwrite a protected rollback. So we update // max_ts to prevent this case from happening. - context.concurrency_manager.update_max_ts(self.start_ts); + context + .concurrency_manager + .update_max_ts(self.start_ts, || format!("cleanup-{}", self.start_ts))?; let mut txn = MvccTxn::new(self.start_ts, context.concurrency_manager); let mut reader = ReaderWithStats::new( diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 581e248d9f1..861083fb117 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1264,7 +1264,7 @@ mod tests { must_get(&mut engine, key, 12, value); must_get_commit_ts(&mut engine, key, 10, 11); - cm.update_max_ts(50.into()); + cm.update_max_ts(50.into(), "").unwrap(); let mutations = vec![Mutation::make_put(Key::from_raw(key), value.to_vec())]; @@ -1406,7 +1406,7 @@ mod tests { must_get_commit_ts(&mut engine, k1, 8, 13); must_get_commit_ts(&mut engine, k2, 8, 13); - cm.update_max_ts(50.into()); + cm.update_max_ts(50.into(), "").unwrap(); must_acquire_pessimistic_lock(&mut engine, k1, k1, 20, 20); let mutations = vec![( @@ -1506,7 +1506,7 @@ mod tests { assert_eq!(res.one_pc_commit_ts, TimeStamp::zero()); must_locked(&mut engine, key, 10); - cm.update_max_ts(50.into()); + cm.update_max_ts(50.into(), "").unwrap(); let (k1, v1) = (b"k1", b"v1"); let (k2, v2) = (b"k2", b"v2"); @@ -1578,7 +1578,7 @@ mod tests { assert_eq!(res.one_pc_commit_ts, TimeStamp::zero()); must_locked(&mut engine, key, 10); - cm.update_max_ts(50.into()); + cm.update_max_ts(50.into(), "").unwrap(); let (k1, v1) = (b"k1", b"v1"); let (k2, v2) = (b"k2", b"v2"); @@ -2147,7 +2147,7 @@ mod tests { ) .unwrap(); let commit_ts = res.one_pc_commit_ts; - cm.update_max_ts(commit_ts.next()); + cm.update_max_ts(commit_ts.next(), "").unwrap(); // repeate the prewrite let res = pessimistic_prewrite_with_cm( &mut engine, @@ -2657,7 +2657,7 @@ mod tests { must_commit(&mut engine, b"k2", 5, 18); // Update max_ts to be larger than the max_commit_ts. - cm.update_max_ts(50.into()); + cm.update_max_ts(50.into(), "").unwrap(); // Retry the prewrite on non-pessimistic key. // (is_retry_request flag is not set, here we don't rely on it.) diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 5c4ed85902d..7e323a61e06 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -149,6 +149,9 @@ pub enum ErrorInner { #[error("region {0} not prepared the flashback")] FlashbackNotPrepared(u64), + + #[error("{0}")] + InvalidMaxTsUpdate(#[from] concurrency_manager::InvalidMaxTsUpdate), } impl ErrorInner { @@ -188,6 +191,9 @@ impl ErrorInner { ErrorInner::FlashbackNotPrepared(region_id) => { Some(ErrorInner::FlashbackNotPrepared(region_id)) } + ErrorInner::InvalidMaxTsUpdate(ref e) => { + Some(ErrorInner::InvalidMaxTsUpdate(e.clone())) + } ErrorInner::Other(_) | ErrorInner::ProtoBuf(_) | ErrorInner::Io(_) => None, } } @@ -242,6 +248,7 @@ impl ErrorCodeExt for Error { error_code::storage::MAX_TIMESTAMP_NOT_SYNCED } ErrorInner::FlashbackNotPrepared(_) => error_code::storage::FLASHBACK_NOT_PREPARED, + ErrorInner::InvalidMaxTsUpdate { .. } => error_code::storage::INVALID_MAX_TS_UPDATE, } } } diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 95ae4e82b74..c7c8af28ea1 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -277,6 +277,7 @@ fn test_scale_scheduler_pool() { scheduler, flow_controller, storage.get_scheduler(), + storage.get_concurrency_manager(), )), ); let scheduler = storage.get_scheduler(); diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 57b249c8087..af5b4946f50 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -344,7 +344,7 @@ fn test_max_commit_ts_error() { thread::sleep(Duration::from_millis(200)); cm.read_key_check(&Key::from_raw(b"k1"), |_| Err(())) .unwrap_err(); - cm.update_max_ts(200.into()); + cm.update_max_ts(200.into(), "").unwrap(); let res = prewrite_rx.recv().unwrap().unwrap(); assert!(res.min_commit_ts.is_zero()); @@ -372,7 +372,7 @@ fn test_exceed_max_commit_ts_in_the_middle_of_prewrite() { // Pause between getting max ts and store the lock in memory fail::cfg("before-set-lock-in-memory", "pause").unwrap(); - cm.update_max_ts(40.into()); + cm.update_max_ts(40.into(), "").unwrap(); let mutations = vec![ Mutation::make_put(Key::from_raw(b"k1"), b"v".to_vec()), Mutation::make_put(Key::from_raw(b"k2"), b"v".to_vec()), @@ -401,7 +401,7 @@ fn test_exceed_max_commit_ts_in_the_middle_of_prewrite() { // sleep a while so the first key gets max ts. thread::sleep(Duration::from_millis(200)); - cm.update_max_ts(51.into()); + cm.update_max_ts(51.into(), "").unwrap(); fail::remove("before-set-lock-in-memory"); let res = prewrite_rx.recv().unwrap().unwrap(); assert!(res.min_commit_ts.is_zero()); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 8208c129000..de33ce271f8 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -780,6 +780,9 @@ fn test_serde_custom_tikv_config() { background_error_recovery_window: ReadableDuration::hours(1), txn_status_cache_capacity: 1000, memory_quota: ReadableSize::kb(123), + max_ts_drift_allowance: ReadableDuration::secs(333), + max_ts_sync_interval: ReadableDuration::secs(44), + action_on_invalid_max_ts: "error".to_owned(), }; value.coprocessor = CopConfig { split_region_on_table: false, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index e6af548894b..658d0b7b890 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -105,6 +105,9 @@ enable-ttl = true ttl-check-poll-interval = "0s" txn-status-cache-capacity = 1000 memory-quota = "123KB" +max-ts-drift-allowance = "333s" +max-ts-sync-interval = "44s" +action-on-invalid-max-ts = "error" [storage.block-cache] capacity = "40GB" diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 52eb3563dff..4b1b7492474 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1930,7 +1930,7 @@ fn test_prewrite_check_max_commit_ts() { let (cluster, client, ctx) = new_cluster(); let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); - cm.update_max_ts(100.into()); + cm.update_max_ts(100.into(), "").unwrap(); let mut req = PrewriteRequest::default(); req.set_context(ctx.clone()); From 0bcb0d2ec68f0d0f60e95f96c6c044aeab821a3c Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 25 Dec 2024 10:58:03 +0800 Subject: [PATCH 63/86] in_memory_engine: do not force disable in memory engine for replica read (#18022) ref tikv/tikv#16141 This commit rollback PR #17927 as it is not the root cause. This rollback can all use IME for following read scenario. NOTE: We decide not cherry-pick it back to v8.5 as there may be other potential issue. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/raftkv/mod.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 28f590c95ad..53ee40438ed 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -621,19 +621,13 @@ where type IMSnap = RegionSnapshot>; type IMSnapshotRes = impl Future> + Send; fn async_in_memory_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::IMSnapshotRes { - let replica_read = ctx.pb_ctx.get_replica_read(); - async_snapshot(&mut self.router, ctx).map_ok(move |region_snap| { + async_snapshot(&mut self.router, ctx).map_ok(|region_snap| { // TODO: Remove replace_snapshot. Taking a snapshot and replacing it // with a new one is a bit confusing. // A better way to build an in-memory snapshot is to return // `HybridEngineSnapshot, RegionCacheMemoryEngine>>;` // so the `replace_snapshot` can be removed. - region_snap.replace_snapshot(move |disk_snap, mut pinned| { - // Disable in-memory-engine snapshot for now as there may be some bugs. - // TODO: we may remove this restriction once we fix the related bug. - if replica_read { - pinned = None; - } + region_snap.replace_snapshot(move |disk_snap, pinned| { HybridEngineSnapshot::from_observed_snapshot(disk_snap, pinned) }) }) From 615a6549cf6388367da874baa53937501c3efb88 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 25 Dec 2024 14:01:42 +0800 Subject: [PATCH 64/86] In-memory Engine: fix pre-load uninitialized peer panic (#18052) close tikv/tikv#18046 Avoid loading region into IME when it is uninitialized to prevent panic on encoding region end key. This is because `MsgPreLoadRegionRequest` is sent before leader issue a transfer leader request. Signed-off-by: Neil Shen --- .../src/observer/load_eviction.rs | 11 +++- .../failpoints/cases/test_in_memory_engine.rs | 50 ++++++++++++++++++- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/components/hybrid_engine/src/observer/load_eviction.rs b/components/hybrid_engine/src/observer/load_eviction.rs index 8a3728a0618..289288aebab 100644 --- a/components/hybrid_engine/src/observer/load_eviction.rs +++ b/components/hybrid_engine/src/observer/load_eviction.rs @@ -267,9 +267,16 @@ impl RoleObserver for LoadEvictionObserver { } impl ExtraMessageObserver for LoadEvictionObserver { - fn on_extra_message(&self, r: &Region, extra_msg: &ExtraMessage) { + fn on_extra_message(&self, region: &Region, extra_msg: &ExtraMessage) { if extra_msg.get_type() == ExtraMessageType::MsgPreLoadRegionRequest { - self.cache_engine.load_region(r); + if region.get_peers().is_empty() { + // MsgPreLoadRegionRequest is sent before leader issue a + // transfer leader request. It is possible that the peer + // is not initialized yet. + warn!("ime skip pre-load an uninitialized region"; "region" => ?region); + return; + } + self.cache_engine.load_region(region); } } } diff --git a/tests/failpoints/cases/test_in_memory_engine.rs b/tests/failpoints/cases/test_in_memory_engine.rs index 11915e83836..aeb2b04454f 100644 --- a/tests/failpoints/cases/test_in_memory_engine.rs +++ b/tests/failpoints/cases/test_in_memory_engine.rs @@ -37,7 +37,7 @@ use test_coprocessor::{ }; use test_raftstore::{ configure_for_merge, get_tso, must_get_equal, new_learner_peer, new_peer, new_put_cf_cmd, - new_server_cluster_with_hybrid_engine, CloneFilterFactory, Cluster, Direction, + new_server_cluster_with_hybrid_engine, sleep_ms, CloneFilterFactory, Cluster, Direction, RegionPacketFilter, ServerCluster, }; use test_util::eventually; @@ -1153,3 +1153,51 @@ fn test_region_rollback_merge() { }, ); } + +// IME must not panic when pre-load an uninitialized peer. +#[test] +fn test_transfer_leader_pre_load_uninitialized_peer() { + let mut cluster = new_server_cluster_with_hybrid_engine(0, 2); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + cluster.run_conf_change(); + + let region = pd_client.get_region(b"").unwrap(); + assert!( + !region.get_peers().iter().any(|p| p.get_store_id() == 2), + "{:?}", + region + ); + + // Load the region in leader. + let region_cache_engine = cluster.sim.rl().get_region_cache_engine(1); + region_cache_engine + .load_region(CacheRegion::from_region(®ion)) + .unwrap(); + // Put some key to trigger load + cluster.must_put(b"k", b"val"); + eventually(Duration::from_millis(100), Duration::from_secs(5), || { + region_cache_engine + .snapshot(CacheRegion::from_region(®ion), 100, 100) + .is_ok() + }); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(region.get_id(), 2) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + + let peer1 = new_peer(2, 2); + pd_client.must_add_peer(region.get_id(), peer1.clone()); + cluster.must_region_exist(region.get_id(), 2); + + // IME will send a MsgPreLoadRegion message before transferring leader, + // and this message must not cause panic. + cluster.transfer_leader(region.get_id(), new_peer(2, 2)); + // Give some time for handling MsgPreLoadRegion message. + sleep_ms(100); + cluster.clear_send_filters(); + cluster.must_transfer_leader(region.get_id(), new_peer(2, 2)); +} From 34fc67832b2cdf0739e41a92fb7137c3b90e3053 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 26 Dec 2024 18:02:00 +0800 Subject: [PATCH 65/86] build: bump tikv pkg version (#18062) ref tikv/tikv#15990 build: bump tikv pkg version Signed-off-by: ti-chi-bot --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4ef3094d93..2f7aaeed907 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7231,7 +7231,7 @@ dependencies = [ [[package]] name = "tikv" -version = "8.5.0-alpha" +version = "9.0.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 979bd8598d2..f3dd3734a05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "8.5.0-alpha" +version = "9.0.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 6ff4b9d4bf63dab5ffefb5be76524e6d23b26f71 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 26 Dec 2024 21:23:30 +0800 Subject: [PATCH 66/86] metrics: let panel series overrides compatible with additional group_by (#18061) close tikv/tikv#18060 Use regex expression in panel seriesOverrides to let it compatible with the optional "additional_groupby" alias. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/common.py | 6 +- metrics/grafana/tikv_details.json | 276 +++++++++++------------ metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 143 insertions(+), 141 deletions(-) diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py index 371b0762166..58b3424d043 100644 --- a/metrics/grafana/common.py +++ b/metrics/grafana/common.py @@ -1117,7 +1117,8 @@ def legend(prefix, labels): ], series_overrides=[ series_override( - alias="count", + # use regex because the real alias is "count ${additional_groupby}" + alias="/^count/", fill=2, yaxis=2, zindex=-3, @@ -1127,7 +1128,8 @@ def legend(prefix, labels): transform_negative_y=True, ), series_override( - alias="avg", + # use regex because the real alias is "avg ${additional_groupby}" + alias="/^avg/", fill=7, ), ], diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 97b1f9e4c96..3c6bdda0a41 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -4378,7 +4378,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -4391,7 +4391,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -5055,7 +5055,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -5068,7 +5068,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -8398,7 +8398,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -8411,7 +8411,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -8704,7 +8704,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -8717,7 +8717,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -9010,7 +9010,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -9023,7 +9023,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -9316,7 +9316,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -9329,7 +9329,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -9622,7 +9622,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -9635,7 +9635,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -9956,7 +9956,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -9969,7 +9969,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -10157,7 +10157,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -10170,7 +10170,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -10358,7 +10358,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -10371,7 +10371,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -14480,7 +14480,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -14493,7 +14493,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -14681,7 +14681,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -14694,7 +14694,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -14882,7 +14882,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -14895,7 +14895,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -15083,7 +15083,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -15096,7 +15096,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -15284,7 +15284,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -15297,7 +15297,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -15485,7 +15485,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -15498,7 +15498,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -15686,7 +15686,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -15699,7 +15699,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -15887,7 +15887,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -15900,7 +15900,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -16088,7 +16088,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -16101,7 +16101,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -16289,7 +16289,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -16302,7 +16302,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -16490,7 +16490,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -16503,7 +16503,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -16691,7 +16691,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -16704,7 +16704,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -16892,7 +16892,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -16905,7 +16905,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -17240,7 +17240,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -17253,7 +17253,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -17546,7 +17546,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -17559,7 +17559,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -17852,7 +17852,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -17865,7 +17865,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -18158,7 +18158,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -18171,7 +18171,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -18464,7 +18464,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -18477,7 +18477,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -18770,7 +18770,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -18783,7 +18783,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -19931,7 +19931,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -19944,7 +19944,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -20237,7 +20237,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -20250,7 +20250,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -20543,7 +20543,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -20556,7 +20556,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -24102,7 +24102,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -24115,7 +24115,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -24408,7 +24408,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -24421,7 +24421,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -27127,7 +27127,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -27140,7 +27140,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -27639,7 +27639,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -27652,7 +27652,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -28477,7 +28477,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -28490,7 +28490,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -34129,7 +34129,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -34142,7 +34142,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -39508,7 +39508,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -39521,7 +39521,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -41193,7 +41193,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -41206,7 +41206,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -41499,7 +41499,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -41512,7 +41512,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -45588,7 +45588,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -45601,7 +45601,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -45789,7 +45789,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -45802,7 +45802,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -45990,7 +45990,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -46003,7 +46003,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -46381,7 +46381,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -46394,7 +46394,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -46582,7 +46582,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -46595,7 +46595,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -46783,7 +46783,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -46796,7 +46796,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -46984,7 +46984,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -46997,7 +46997,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -47717,7 +47717,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -47730,7 +47730,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -48170,7 +48170,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -48183,7 +48183,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -48770,7 +48770,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -48783,7 +48783,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -49412,7 +49412,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -49425,7 +49425,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -49613,7 +49613,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -49626,7 +49626,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -49814,7 +49814,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -49827,7 +49827,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -50015,7 +50015,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -50028,7 +50028,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -51119,7 +51119,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -51132,7 +51132,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -51866,7 +51866,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -51879,7 +51879,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -52067,7 +52067,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -52080,7 +52080,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -52268,7 +52268,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -52281,7 +52281,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -52689,7 +52689,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -52702,7 +52702,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -54680,7 +54680,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -54693,7 +54693,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -55029,7 +55029,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -55042,7 +55042,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -56000,7 +56000,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -56013,7 +56013,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -65773,7 +65773,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -65786,7 +65786,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -65974,7 +65974,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -65987,7 +65987,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -66737,7 +66737,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -66750,7 +66750,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -75242,7 +75242,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -75255,7 +75255,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -76298,7 +76298,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -76311,7 +76311,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, @@ -77073,7 +77073,7 @@ "repeatDirection": null, "seriesOverrides": [ { - "alias": "count", + "alias": "/^count/", "bars": false, "dashLength": 1, "dashes": true, @@ -77086,7 +77086,7 @@ "zindex": -3 }, { - "alias": "avg", + "alias": "/^avg/", "bars": false, "fill": 7, "fillBelowTo": null, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 34c5c48ac6a..337e25b67ee 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -fb206d6a1e887c038e752582478ff75d8b508d40f402392f7d152126891272a9 ./metrics/grafana/tikv_details.json +6f498305f4f2832c024c725cae0fc7c90236091f3dafbcf3d1eee2b0e16e7b45 ./metrics/grafana/tikv_details.json From 83feb16e101c72e60841cd61b4e23030a0348e54 Mon Sep 17 00:00:00 2001 From: ekexium Date: Fri, 27 Dec 2024 16:37:12 +0800 Subject: [PATCH 67/86] concurrency_manager: double check via PD TSO before reporting error of invalid max-ts update (#18057) close tikv/tikv#18055 concurrency_manager: double check via PD TSO before reporting error of invalid max-ts update Signed-off-by: ekexium --- Cargo.lock | 1 + components/concurrency_manager/Cargo.toml | 1 + .../benches/update_max_ts.rs | 2 + components/concurrency_manager/src/lib.rs | 134 +++++++++++++++++- components/server/src/server.rs | 9 +- components/server/src/server2.rs | 12 +- src/storage/config_manager.rs | 4 + 7 files changed, 144 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2f7aaeed907..b1d179ed5ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1563,6 +1563,7 @@ dependencies = [ "lazy_static", "online_config", "parking_lot 0.12.1", + "pd_client", "prometheus", "rand 0.8.5", "serde", diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index dc9cf1b558a..b98e721b593 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -13,6 +13,7 @@ kvproto = { workspace = true } lazy_static = "1.4.0" online_config = { workspace = true } parking_lot = "0.12" +pd_client = { workspace = true } prometheus = "0.13" serde = { version = "1.0.194", features = ["derive"] } slog = { workspace = true } diff --git a/components/concurrency_manager/benches/update_max_ts.rs b/components/concurrency_manager/benches/update_max_ts.rs index 2c8dfd58120..df2e61aeb42 100644 --- a/components/concurrency_manager/benches/update_max_ts.rs +++ b/components/concurrency_manager/benches/update_max_ts.rs @@ -13,6 +13,8 @@ fn benchmark_update_max_ts(c: &mut Criterion) { latest_ts, limit_valid_time, ActionOnInvalidMaxTs::Error, + None, + Duration::ZERO, ); cm.set_max_ts_limit(TimeStamp::new(4000)); diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index 20b2b39ba22..31c57693baa 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -29,9 +29,10 @@ use std::{ use crossbeam::atomic::AtomicCell; use lazy_static::lazy_static; +use pd_client::{PdClient, PdFuture}; use prometheus::{register_int_gauge, IntGauge}; use thiserror::Error; -use tikv_util::{error, time::Instant}; +use tikv_util::{error, future::block_on_timeout, time::Instant}; use txn_types::{Key, Lock, TimeStamp}; pub use self::{ @@ -58,12 +59,24 @@ const DEFAULT_LIMIT_VALID_DURATION: Duration = Duration::from_secs(60); // between TiKV and PD. pub const LIMIT_VALID_TIME_MULTIPLIER: u32 = 3; +const TSO_TIMEOUT: Duration = Duration::from_secs(5); + #[derive(Copy, Clone, PartialEq, Eq)] struct MaxTsLimit { limit: TimeStamp, update_time: Instant, } +pub trait TSOProvider: Send + Sync { + fn get_tso(&self) -> PdFuture; +} + +impl TSOProvider for T { + fn get_tso(&self) -> PdFuture { + PdClient::get_tso(self) + } +} + // Pay attention that the async functions of ConcurrencyManager should not hold // the mutex. #[derive(Clone)] @@ -82,6 +95,10 @@ pub struct ConcurrencyManager { limit_valid_duration: Duration, action_on_invalid_max_ts: Arc, + max_ts_drift_allowance_ms: Arc, + + tso: Option>, + time_provider: Arc, } @@ -91,6 +108,8 @@ impl ConcurrencyManager { latest_ts, DEFAULT_LIMIT_VALID_DURATION, ActionOnInvalidMaxTs::Panic, + None, + Duration::ZERO, ) } @@ -98,6 +117,8 @@ impl ConcurrencyManager { latest_ts: TimeStamp, limit_valid_duration: Duration, action_on_invalid_max_ts: ActionOnInvalidMaxTs, + tso: Option>, + max_ts_drift_allowance: Duration, ) -> Self { let initial_limit = MaxTsLimit { limit: TimeStamp::new(0), @@ -113,6 +134,10 @@ impl ConcurrencyManager { )), limit_valid_duration, time_provider: Arc::new(CoarseInstantTimeProvider), + tso, + max_ts_drift_allowance_ms: Arc::new(AtomicU64::new( + max_ts_drift_allowance.as_millis() as u64 + )), } } @@ -122,6 +147,8 @@ impl ConcurrencyManager { limit_valid_duration: Duration, action_on_invalid_max_ts: ActionOnInvalidMaxTs, time_provider: Arc, + tso: Option>, + max_ts_drift_allowance: Duration, ) -> Self { let initial_limit = MaxTsLimit { limit: TimeStamp::new(0), @@ -136,6 +163,10 @@ impl ConcurrencyManager { )), limit_valid_duration, time_provider, + tso, + max_ts_drift_allowance_ms: Arc::new(AtomicU64::new( + max_ts_drift_allowance.as_millis() as u64 + )), } } @@ -180,7 +211,7 @@ impl ConcurrencyManager { if duration_to_last_limit_update < self.limit_valid_duration { // limit is valid let source = source.into_error_source(); - self.report_error(new_ts, limit.limit, source, false)?; + self.double_check(new_ts, limit.limit, source, false)?; } else { // limit is stale // use an approximate limit to avoid false alerts caused by failed limit updates @@ -192,7 +223,7 @@ impl ConcurrencyManager { if new_ts > approximate_limit { let source = source.into_error_source(); - self.report_error(new_ts, approximate_limit, source, true)?; + self.double_check(new_ts, approximate_limit, source, true)?; } } } @@ -205,6 +236,42 @@ impl ConcurrencyManager { Ok(()) } + // new_ts is greater than limit, or the approximate limit. + // To avoid false positive and guarantee TiKV availability, we need to + // double-check the new_ts with PD TSO. + fn double_check( + &self, + new_ts: TimeStamp, + limit: TimeStamp, + source: impl slog::Value + Display, + using_approximate: bool, + ) -> Result<(), crate::InvalidMaxTsUpdate> { + error!("possible invalid max-ts update; double checking"; + "attempted_ts" => new_ts, + "max_allowed" => limit.into_inner(), + "source" => &source, + "using_approximate" => using_approximate, + "TSO_TIMEOUT" => ?TSO_TIMEOUT, + ); + if let Some(tso) = &self.tso { + match block_on_timeout(tso.get_tso(), TSO_TIMEOUT) { + Ok(Ok(ts)) => { + self.set_max_ts_limit(ts); + } + Ok(Err(e)) => { + error!("failed to fetch from TSO for double checking"; "err" => ?e); + } + Err(()) => { + error!("timeout when fetching from TSO for double checking"; "timeout" => ?TSO_TIMEOUT); + } + } + } + if new_ts > self.max_ts_limit.load().limit { + self.report_error(new_ts, limit, source, using_approximate)?; + } + Ok(()) + } + fn report_error( &self, new_ts: TimeStamp, @@ -246,12 +313,17 @@ impl ConcurrencyManager { /// # Note /// If the new limit is smaller than the current limit, this operation will /// have no effect and return silently. - pub fn set_max_ts_limit(&self, limit: TimeStamp) { - if limit.is_max() { + pub fn set_max_ts_limit(&self, ts_from_tso: TimeStamp) { + if ts_from_tso.is_max() { error!("max_ts_limit cannot be set to u64::max"); return; } + let limit = TimeStamp::compose( + ts_from_tso.physical() + self.max_ts_drift_allowance_ms.load(Ordering::SeqCst), + ts_from_tso.logical(), + ); + loop { let current = self.max_ts_limit.load(); @@ -363,6 +435,11 @@ impl ConcurrencyManager { pub fn set_action_on_invalid_max_ts(&self, action: ActionOnInvalidMaxTs) { self.action_on_invalid_max_ts.store(action); } + + pub fn set_max_ts_drift_allowance(&self, allowance: Duration) { + self.max_ts_drift_allowance_ms + .store(allowance.as_millis() as u64, Ordering::SeqCst); + } } #[derive(Debug, Clone, Copy, PartialEq)] @@ -512,8 +589,9 @@ impl TimeProvider for CoarseInstantTimeProvider { #[cfg(test)] mod tests { - use std::sync::Mutex; + use std::{future::ready, sync::Mutex}; + use futures::FutureExt; use txn_types::LockType; use super::*; @@ -629,6 +707,8 @@ mod tests { TimeStamp::new(100), DEFAULT_LIMIT_VALID_DURATION, ActionOnInvalidMaxTs::Error, + None, + Duration::ZERO, ); // Initially limit should be 0 @@ -671,6 +751,8 @@ mod tests { TimeStamp::new(100), DEFAULT_LIMIT_VALID_DURATION, ActionOnInvalidMaxTs::Error, + None, + Duration::ZERO, ); // Set limit to 200 @@ -703,6 +785,8 @@ mod tests { Duration::from_secs(60), ActionOnInvalidMaxTs::Error, time_provider.clone(), + None, + Duration::ZERO, ); cm.set_max_ts_limit(TimeStamp::new(200)); @@ -726,6 +810,8 @@ mod tests { Duration::from_secs(60), ActionOnInvalidMaxTs::Error, time_provider.clone(), + None, + Duration::ZERO, ); cm.set_max_ts_limit(TimeStamp::new(200)); @@ -758,4 +844,40 @@ mod tests { .unwrap(); assert_eq!(cm.max_ts().into_inner(), 500); } + + struct MockPD { + tso: AtomicU64, + } + + impl MockPD { + fn new(ts: u64) -> Self { + Self { + tso: AtomicU64::new(ts), + } + } + } + + impl TSOProvider for MockPD { + fn get_tso(&self) -> PdFuture { + ready(Ok(TimeStamp::new(self.tso.fetch_add(1, Ordering::SeqCst)))).boxed() + } + } + + #[test] + fn test_pd_tso_jump_not_panic() { + let mock_pd = Arc::new(MockPD::new(100)); + let cm = ConcurrencyManager::new_with_config( + TimeStamp::new(100), + DEFAULT_LIMIT_VALID_DURATION, + ActionOnInvalidMaxTs::Panic, + Some(mock_pd.clone()), + Duration::ZERO, + ); + + cm.set_max_ts_limit(TimeStamp::new(200)); + // PD TSO jumps from 100 to 300 + mock_pd.tso.store(300, Ordering::SeqCst); + cm.update_max_ts(TimeStamp::new(300), "test_source".to_string()) + .unwrap(); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 0f69c09ce73..b8ba53641ab 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -114,7 +114,7 @@ use tikv::{ config::EngineType, config_manager::StorageConfigManger, kv::LocalTablets, - mvcc::{MvccConsistencyCheckObserver, TimeStamp}, + mvcc::MvccConsistencyCheckObserver, txn::{ flow_controller::{EngineFlowController, FlowController}, txn_status_cache::TxnStatusCache, @@ -411,6 +411,8 @@ where .as_str() .try_into() .unwrap(), + Some(pd_client.clone()), + config.storage.max_ts_drift_allowance.0, ); // use different quota for front-end and back-end requests @@ -1170,19 +1172,16 @@ where let pd_client = self.pd_client.clone(); let max_ts_sync_interval = self.core.config.storage.max_ts_sync_interval.into(); - let cfg_controller = self.cfg_controller.as_ref().unwrap().clone(); self.core .background_worker .spawn_interval_async_task(max_ts_sync_interval, move || { let cm = cm.clone(); let pd_client = pd_client.clone(); - let allowance_ms = - cfg_controller.get_current().storage.max_ts_drift_allowance.as_millis(); async move { let pd_tso = pd_client.get_tso().await; if let Ok(ts) = pd_tso { - cm.set_max_ts_limit(TimeStamp::compose(ts.physical() + allowance_ms, 0)); + cm.set_max_ts_limit(ts); } else { warn!("failed to get tso from pd in background, the max_ts validity check could be skipped"); } diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index d2732dd42eb..5381a365de8 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -101,7 +101,7 @@ use tikv::{ config::EngineType, config_manager::StorageConfigManger, kv::LocalTablets, - mvcc::{MvccConsistencyCheckObserver, TimeStamp}, + mvcc::MvccConsistencyCheckObserver, txn::{ flow_controller::{FlowController, TabletFlowController}, txn_status_cache::TxnStatusCache, @@ -335,6 +335,8 @@ where .as_str() .try_into() .unwrap(), + Some(pd_client.clone()), + config.storage.max_ts_drift_allowance.0, ); // use different quota for front-end and back-end requests @@ -963,22 +965,16 @@ where let pd_client = self.pd_client.clone(); let max_ts_sync_interval = self.core.config.storage.max_ts_sync_interval.into(); - let cfg_controller = self.cfg_controller.as_ref().unwrap().clone(); self.core .background_worker .spawn_interval_async_task(max_ts_sync_interval, move || { let cm = cm.clone(); let pd_client = pd_client.clone(); - let allowance_ms = cfg_controller - .get_current() - .storage - .max_ts_drift_allowance - .as_millis(); async move { let pd_tso = pd_client.get_tso().await; if let Ok(ts) = pd_tso { - cm.set_max_ts_limit(TimeStamp::compose(ts.physical() + allowance_ms, 0)); + cm.set_max_ts_limit(ts); } else { warn!("failed to get tso from pd in background"); } diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index fac10a40161..c1676f96b04 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -116,6 +116,10 @@ impl ConfigManager self.concurrency_manager .set_action_on_invalid_max_ts(action); } + if let Some(v) = change.remove("max_ts_drift_allowance") { + let dur_v: ReadableDuration = v.into(); + self.concurrency_manager.set_max_ts_drift_allowance(dur_v.0); + } Ok(()) } } From 27acfb345a34f58b0238c2038ace88ffc6cd6aae Mon Sep 17 00:00:00 2001 From: CbcWestwolf <1004626265@qq.com> Date: Mon, 30 Dec 2024 15:03:54 +0800 Subject: [PATCH 68/86] charset: fix the `cast` for gbk/gb18030 charset (#18067) close tikv/tikv#17618 Fix a bug that wrongly truncates the string when the charset is gbk/gb18030 Signed-off-by: cbcwestwolf <1004626265@qq.com> --- .../tidb_query_datatype/src/codec/convert.rs | 42 ++++++++++++++----- .../src/codec/mysql/charset.rs | 12 +++++- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 75195bc3135..94fd1fbe253 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -5,7 +5,7 @@ use std::{borrow::Cow, fmt::Display}; use tipb::FieldType; use super::{ - mysql::{RoundMode, DEFAULT_FSP}, + mysql::{charset::MULTI_BYTES_CHARSETS, RoundMode, DEFAULT_FSP}, Error, Result, }; // use crate::{self, FieldTypeTp, UNSPECIFIED_LENGTH}; @@ -13,7 +13,7 @@ use crate::{ codec::{ data_type::*, error::ERR_DATA_OUT_OF_RANGE, - mysql::{charset, decimal::max_or_min_dec, Res}, + mysql::{decimal::max_or_min_dec, Res}, }, expr::{EvalContext, Flag}, Collation, FieldTypeAccessor, FieldTypeTp, UNSPECIFIED_LENGTH, @@ -713,9 +713,10 @@ pub fn produce_str_with_specified_tp<'a>( return Ok(s); } let flen = flen as usize; - // flen is the char length, not byte length, for UTF8 charset, we need to - // calculate the char count and truncate to flen chars if it is too long. - if chs == charset::CHARSET_UTF8 || chs == charset::CHARSET_UTF8MB4 { + // flen is the char length, not byte length, for UTF8 and GBK/GB18030 charset, + // we need to calculate the char count and truncate to flen chars if it is + // too long. + if MULTI_BYTES_CHARSETS.contains(chs) { let (char_count, truncate_pos) = { let s = &String::from_utf8_lossy(&s); let mut truncate_pos = 0; @@ -1142,7 +1143,7 @@ mod tests { ERR_DATA_OUT_OF_RANGE, ERR_M_BIGGER_THAN_D, ERR_TRUNCATE_WRONG_VALUE, WARN_DATA_TRUNCATED, }, - mysql::{Res, UNSPECIFIED_FSP}, + mysql::{charset, Res, UNSPECIFIED_FSP}, }, expr::{EvalConfig, EvalContext, Flag}, Collation, FieldTypeFlag, @@ -2221,6 +2222,19 @@ mod tests { ("世界,中国", 4, charset::CHARSET_ASCII), ("世界,中国", 5, charset::CHARSET_ASCII), ("世界,中国", 6, charset::CHARSET_ASCII), + // GBK/GB18030 + ("世界,中国", 1, charset::CHARSET_GBK), + ("世界,中国", 2, charset::CHARSET_GBK), + ("世界,中国", 3, charset::CHARSET_GBK), + ("世界,中国", 4, charset::CHARSET_GBK), + ("世界,中国", 5, charset::CHARSET_GBK), + ("世界,中国", 6, charset::CHARSET_GBK), + ("世界,中国", 1, charset::CHARSET_GB18030), + ("世界,中国", 2, charset::CHARSET_GB18030), + ("世界,中国", 3, charset::CHARSET_GB18030), + ("世界,中国", 4, charset::CHARSET_GB18030), + ("世界,中国", 5, charset::CHARSET_GB18030), + ("世界,中国", 6, charset::CHARSET_GB18030), ]; let cfg = EvalConfig::from_flag(Flag::TRUNCATE_AS_WARNING); @@ -2232,10 +2246,10 @@ mod tests { ft.set_flen(char_num); let bs = s.as_bytes(); let r = produce_str_with_specified_tp(&mut ctx, Cow::Borrowed(bs), &ft, false); - assert!(r.is_ok(), "{}, {}, {}", s, char_num, cs); + assert!(r.is_ok(), "{}, {}, {}, {}", s, char_num, cs, r.unwrap_err()); let p = r.unwrap(); - if cs == charset::CHARSET_UTF8MB4 || cs == charset::CHARSET_UTF8 { + if MULTI_BYTES_CHARSETS.contains(cs) { let ns: String = s.chars().take(char_num as usize).collect(); assert_eq!(p.as_ref(), ns.as_bytes(), "{}, {}, {}", s, char_num, cs); } else { @@ -2271,7 +2285,7 @@ mod tests { ft.set_flen(char_num); let bs = s.as_bytes(); let r = produce_str_with_specified_tp(&mut ctx, Cow::Borrowed(bs), &ft, true); - assert!(r.is_ok(), "{}, {}, {}", s, char_num, cs); + assert!(r.is_ok(), "{}, {}, {}, {}", s, char_num, cs, r.unwrap_err()); let p = r.unwrap(); assert_eq!(p.len(), char_num as usize, "{}, {}, {}", s, char_num, cs); @@ -2306,7 +2320,15 @@ mod tests { ft.set_charset(cs.to_string()); ft.set_flen(char_num); let r = produce_str_with_specified_tp(&mut ctx, Cow::Borrowed(&s), &ft, true); - assert!(r.is_ok(), "{:?}, {}, {}, {:?}", &s, char_num, cs, result); + assert!( + r.is_ok(), + "{:?}, {}, {}, {:?}, {}", + &s, + char_num, + cs, + result, + r.unwrap_err() + ); let p = r.unwrap(); assert_eq!(p, result, "{:?}, {}, {}, {:?}", &s, char_num, cs, result); diff --git a/components/tidb_query_datatype/src/codec/mysql/charset.rs b/components/tidb_query_datatype/src/codec/mysql/charset.rs index cbdd5a01315..92876a510d3 100644 --- a/components/tidb_query_datatype/src/codec/mysql/charset.rs +++ b/components/tidb_query_datatype/src/codec/mysql/charset.rs @@ -1,5 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +use lazy_static::*; + /// `CHARSET_BIN` is used for marking binary charset. pub const CHARSET_BIN: &str = "binary"; /// `CHARSET_UTF8` is the default charset for string types. @@ -15,6 +17,12 @@ pub const CHARSET_LATIN1: &str = "latin1"; pub const CHARSET_GBK: &str = "gbk"; /// `CHARSET_GB18030` is another Chinese character set containing GBK. pub const CHARSET_GB18030: &str = "gb18030"; +// For a new implemented multi-byte charset, add it to MULTI_BYTES_CHARSETS -/// All utf8 charsets. -pub const UTF8_CHARSETS: &[&str] = &[CHARSET_UTF8, CHARSET_UTF8MB4, CHARSET_ASCII]; +lazy_static! { + pub static ref MULTI_BYTES_CHARSETS: collections::HashSet<&'static str> = + [CHARSET_UTF8, CHARSET_UTF8MB4, CHARSET_GBK, CHARSET_GB18030,] + .iter() + .cloned() + .collect(); +} From 03a5c532604a761e47a1f7e2e6ae326b6e6da776 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Mon, 30 Dec 2024 22:34:36 +0800 Subject: [PATCH 69/86] server: print more information when default not found error is encountered (#18066) close tikv/tikv#18065 Print more information in logs when default not found error is encounterred. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 1 + components/error_code/src/coprocessor.rs | 3 +- components/txn_types/Cargo.toml | 1 + components/txn_types/src/types.rs | 2 + src/coprocessor/endpoint.rs | 14 +++- src/coprocessor/error.rs | 7 ++ src/storage/mod.rs | 22 +++++ src/storage/mvcc/mod.rs | 2 + src/storage/mvcc/reader/point_getter.rs | 6 ++ src/storage/mvcc/reader/scanner/mod.rs | 6 ++ tests/failpoints/cases/test_coprocessor.rs | 94 +++++++++++++++++++++- 11 files changed, 153 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b1d179ed5ac..d025033adc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7916,6 +7916,7 @@ dependencies = [ "codec", "collections", "error_code", + "fail", "farmhash", "kvproto", "log_wrappers", diff --git a/components/error_code/src/coprocessor.rs b/components/error_code/src/coprocessor.rs index dfa4749795f..259fd78f21a 100644 --- a/components/error_code/src/coprocessor.rs +++ b/components/error_code/src/coprocessor.rs @@ -18,5 +18,6 @@ define_error_codes!( STORAGE_ERROR => ("StorageError", "", ""), INVALID_CHARACTER_STRING => ("InvalidCharacterString", "", ""), - INVALID_MAX_TS_UPDATE => ("InvalidMaxTsUpdate", "", "") + INVALID_MAX_TS_UPDATE => ("InvalidMaxTsUpdate", "", ""), + DEFAULT_NOT_FOUND => ("DefaultNotFound", "", "") ); diff --git a/components/txn_types/Cargo.toml b/components/txn_types/Cargo.toml index 19eb2f99b30..f87d470c205 100644 --- a/components/txn_types/Cargo.toml +++ b/components/txn_types/Cargo.toml @@ -11,6 +11,7 @@ byteorder = "1.2" codec = { workspace = true } collections = { workspace = true } error_code = { workspace = true } +fail = "0.5" farmhash = "1.1.5" kvproto = { workspace = true } log_wrappers = { workspace = true } diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index e2cc91c0cd8..8766ccf96e9 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -5,6 +5,7 @@ use std::fmt::{self, Debug, Display, Formatter}; use bitflags::bitflags; use byteorder::{ByteOrder, NativeEndian}; use collections::HashMap; +use fail::fail_point; use kvproto::kvrpcpb::{self, Assertion}; use tikv_util::{ codec, @@ -23,6 +24,7 @@ pub const SHORT_VALUE_MAX_LEN: usize = 255; pub const SHORT_VALUE_PREFIX: u8 = b'v'; pub fn is_short_value(value: &[u8]) -> bool { + fail_point!("is_short_value_always_false", |_| { false }); value.len() <= SHORT_VALUE_MAX_LEN } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 40189b6f7b6..770df34d773 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -503,7 +503,15 @@ impl Endpoint { COPR_RESP_SIZE.inc_by(resp.data.len() as u64); resp } - Err(e) => make_error_response(e).into(), + Err(e) => { + if let Error::DefaultNotFound(errmsg) = &e { + error!("default not found in coprocessor request processing"; + "err" => errmsg, + "reqCtx" => ?&tracker.req_ctx, + ); + } + make_error_response(e).into() + } }; resp.set_exec_details(exec_details); resp.set_exec_details_v2(exec_details_v2); @@ -933,6 +941,10 @@ macro_rules! make_error_response_common { err.set_message(e.to_string()); $resp.set_region_error(err); } + Error::DefaultNotFound(_) => { + $tag = "default_not_found"; + $resp.set_other_error($e.to_string()); + } Error::Other(_) => { $tag = "other"; warn!("unexpected other error encountered processing coprocessor task"; diff --git a/src/coprocessor/error.rs b/src/coprocessor/error.rs index 1ecb4479da3..719b4522e9c 100644 --- a/src/coprocessor/error.rs +++ b/src/coprocessor/error.rs @@ -33,6 +33,9 @@ pub enum Error { #[error("{0}")] InvalidMaxTsUpdate(#[from] concurrency_manager::InvalidMaxTsUpdate), + #[error("{0}")] + DefaultNotFound(String), + #[error("{0}")] Other(String), } @@ -91,6 +94,9 @@ impl From for Error { match err { MvccError(box MvccErrorInner::KeyIsLocked(info)) => Error::Locked(info), MvccError(box MvccErrorInner::Kv(kv_error)) => Error::from(kv_error), + e @ MvccError(box MvccErrorInner::DefaultNotFound { .. }) => { + Error::DefaultNotFound(e.to_string()) + } e => Error::Other(e.to_string()), } } @@ -141,6 +147,7 @@ impl ErrorCodeExt for Error { Error::MaxPendingTasksExceeded => error_code::coprocessor::MAX_PENDING_TASKS_EXCEEDED, Error::MemoryQuotaExceeded => error_code::coprocessor::MEMORY_QUOTA_EXCEEDED, Error::InvalidMaxTsUpdate(_) => error_code::coprocessor::INVALID_MAX_TS_UPDATE, + Error::DefaultNotFound { .. } => error_code::coprocessor::DEFAULT_NOT_FOUND, Error::Other(_) => error_code::UNKNOWN, } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 5af39078648..77ec505c060 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -700,6 +700,17 @@ impl Storage { r }) }); + if let Err( + e @ Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + mvcc::Error(box mvcc::ErrorInner::DefaultNotFound { .. }), + )))), + ) = &result + { + error!("default not found in storage get"; + "err" => ?e, + "RpcContext" => ?&ctx, + ); + } metrics::tls_collect_scan_details(CMD, &statistics); metrics::tls_collect_read_flow( ctx.get_region_id(), @@ -1312,6 +1323,17 @@ impl Storage { }); (result, stats) }); + if let Err( + e @ Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + mvcc::Error(box mvcc::ErrorInner::DefaultNotFound { .. }), + )))), + ) = &result + { + error!("default not found in storage batch_get"; + "err" => ?e, + "RpcContext" => ?&ctx, + ); + } metrics::tls_collect_scan_details(CMD, &stats); let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index e1d01f91562..f853d98b910 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -449,10 +449,12 @@ pub fn default_not_found_error(key: Vec, hint: &str) -> Error { hint, ); } else { + let bt = backtrace::Backtrace::new(); error!( "default value not found"; "key" => &log_wrappers::Value::key(&key), "hint" => hint, + "bt" => ?bt, ); Error::from(ErrorInner::DefaultNotFound { key }) } diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 474c789a31d..7026e3fe8ab 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -365,6 +365,12 @@ impl PointGetter { write_start_ts: TimeStamp, user_key: &Key, ) -> Result { + fail_point!("load_data_from_default_cf_default_not_found", |_| Err( + default_not_found_error( + user_key.clone().append_ts(write_start_ts).into_encoded(), + "load_data_from_default_cf", + ) + )); self.statistics.data.get += 1; // TODO: We can avoid this clone. let value = self diff --git a/src/storage/mvcc/reader/scanner/mod.rs b/src/storage/mvcc/reader/scanner/mod.rs index 7f4fc664bb8..1cf80923d4a 100644 --- a/src/storage/mvcc/reader/scanner/mod.rs +++ b/src/storage/mvcc/reader/scanner/mod.rs @@ -365,6 +365,12 @@ pub fn near_load_data_by_write( where I: Iterator, { + fail_point!("near_load_data_by_write_default_not_found", |_| Err( + default_not_found_error( + user_key.clone().append_ts(write_start_ts).into_encoded(), + "near_load_data_by_write", + ) + )); let seek_key = user_key.clone().append_ts(write_start_ts); match statistics.load_data_hint() { LoadDataHint::NearSeek => default_cursor.near_seek(&seek_key, &mut statistics.data)?, diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index be9d978b23a..cbf9e14972a 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -5,18 +5,20 @@ use std::{sync::Arc, thread, time::Duration}; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ - coprocessor::Request, - kvrpcpb::{Context, IsolationLevel}, + coprocessor::{KeyRange, Request}, + kvrpcpb::{Context, GetRequest, IsolationLevel, Mutation, Op}, tikvpb::TikvClient, }; use more_asserts::{assert_ge, assert_le}; +use pd_client::PdClient; use protobuf::Message; use raftstore::store::Bucket; use test_coprocessor::*; +use test_raftstore::{must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client}; use test_raftstore_macro::test_case; use test_storage::*; use tidb_query_datatype::{ - codec::{datum, Datum}, + codec::{datum, table::encode_row_key, Datum}, expr::EvalContext, }; use tikv_util::HandyRwLock; @@ -499,3 +501,89 @@ fn test_follower_buckets() { } fail::remove("skip_check_stale_read_safe"); } + +#[test] +fn test_default_not_found_log_info() { + let (mut cluster, _client, _ctx) = must_new_cluster_and_kv_client(); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let product = ProductTable::new(); + let row_key = encode_row_key(product.table_id(), 2); + + let r1 = cluster.get_region(row_key.as_slice()); + let region_id = r1.get_id(); + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader.clone()); + ctx.set_region_epoch(epoch); + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + // Write record. + fail::cfg("is_short_value_always_false", "return()").unwrap(); + let mut mutation = Mutation::default(); + let value = b"v2".to_vec(); + mutation.set_op(Op::Put); + mutation.set_key(row_key.clone()); + mutation.set_value(value.clone()); + let prewrite_ts = block_on(pd_client.get_tso()).unwrap().into_inner(); + must_kv_prewrite( + &client, + ctx.clone(), + vec![mutation], + row_key.clone(), + prewrite_ts, + ); + let commit_ts = block_on(pd_client.get_tso()).unwrap().into_inner(); + must_kv_commit( + &client, + ctx.clone(), + vec![row_key.clone()], + prewrite_ts, + commit_ts, + commit_ts, + ); + + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + let mut ctx = Context::default(); + ctx.set_region_id(r1.get_id()); + ctx.set_region_epoch(r1.get_region_epoch().clone()); + ctx.set_peer(test_raftstore::new_peer(1, 1)); + + // Read with coprocessor request. + let read_ts = block_on(pd_client.get_tso()).unwrap().into_inner(); + let mut cop_req = DagSelect::from(&product).build(); + cop_req.set_context(ctx.clone()); + cop_req.set_start_ts(read_ts); + let mut key_range = KeyRange::new(); + let start_key = encode_row_key(product.table_id(), 1); + let end_key = encode_row_key(product.table_id(), 3); + key_range.set_start(start_key); + key_range.set_end(end_key); + cop_req.mut_ranges().clear(); + cop_req.mut_ranges().push(key_range); + fail::cfg("near_load_data_by_write_default_not_found", "return()").unwrap(); + let cop_resp = client.coprocessor(&cop_req).unwrap(); + assert!(cop_resp.get_other_error().contains("default not found")); + + // Read with get request. + let mut get_req = GetRequest::default(); + get_req.set_context(ctx); + get_req.set_key(row_key.clone()); + get_req.set_version(read_ts); + fail::cfg("load_data_from_default_cf_default_not_found", "return()").unwrap(); + let get_resp = client.kv_get(&get_req).unwrap(); + assert!(get_resp.get_error().get_abort().contains("DefaultNotFound")); + fail::remove("is_short_value_always_false"); + fail::remove("near_load_data_by_write_default_not_found"); + fail::remove("load_data_from_default_cf_default_not_found"); +} From 9e66ed291f964da430920c84e0eff804c202d4e0 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 6 Jan 2025 11:45:13 +0800 Subject: [PATCH 70/86] engine_rocks: export compactions and flushes metrics (#18034) ref tikv/tikv#15990 Export the number of currently running background jobs to help diagnose potential compaction bottlenecks. Signed-off-by: Neil Shen Co-authored-by: Bisheng Huang --- components/engine_rocks/src/rocks_metrics.rs | 34 ++++++++++++++ .../engine_rocks/src/rocks_metrics_defs.rs | 2 + metrics/grafana/tikv_details.dashboard.py | 32 +++++++++++++- metrics/grafana/tikv_details.json | 44 ++++++++++++++++++- metrics/grafana/tikv_details.json.sha256 | 2 +- 5 files changed, 109 insertions(+), 5 deletions(-) diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index b3f7fd5a221..7f7294f0188 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -906,6 +906,8 @@ struct DbStats { block_cache_size: Option, blob_cache_size: Option, stall_num: Option<[u64; ROCKSDB_IOSTALL_KEY.len()]>, + num_running_compactions: Option, + num_running_flushes: Option, } pub struct RocksStatisticsReporter { @@ -1037,6 +1039,17 @@ impl StatisticsReporter for RocksStatisticsReporter { } } + // For compaction and flushes. + if let Some(v) = db.get_property_int(ROCKSDB_NUM_RUNNING_COMPACTIONS) { + *self + .db_stats + .num_running_compactions + .get_or_insert_default() += v; + } + if let Some(v) = db.get_property_int(ROCKSDB_NUM_RUNNING_FLUSHES) { + *self.db_stats.num_running_flushes.get_or_insert_default() += v; + } + // For snapshot *self.db_stats.num_snapshots.get_or_insert_default() += db.get_property_int(ROCKSDB_NUM_SNAPSHOTS).unwrap_or(0); @@ -1170,6 +1183,17 @@ impl StatisticsReporter for RocksStatisticsReporter { } } + if let Some(v) = self.db_stats.num_running_compactions { + STORE_ENGINE_NUM_RUNNING_COMPACTIONS_GAUGE_VEC + .with_label_values(&[&self.name]) + .set(v as i64); + } + if let Some(v) = self.db_stats.num_running_flushes { + STORE_ENGINE_NUM_RUNNING_FLUSHES_GAUGE_VEC + .with_label_values(&[&self.name]) + .set(v as i64); + } + if let Some(v) = self.db_stats.num_snapshots { STORE_ENGINE_NUM_SNAPSHOTS_GAUGE_VEC .with_label_values(&[&self.name]) @@ -1266,6 +1290,16 @@ lazy_static! { "Number of files at each level", &["db", "cf", "level"] ).unwrap(); + pub static ref STORE_ENGINE_NUM_RUNNING_COMPACTIONS_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( + "tikv_engine_num_running_compactions", + "Number of currently running compactions", + &["db"] + ).unwrap(); + pub static ref STORE_ENGINE_NUM_RUNNING_FLUSHES_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( + "tikv_engine_num_running_flushes", + "Number of currently running flushes", + &["db"] + ).unwrap(); pub static ref STORE_ENGINE_NUM_SNAPSHOTS_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_engine_num_snapshots", "Number of unreleased snapshots", diff --git a/components/engine_rocks/src/rocks_metrics_defs.rs b/components/engine_rocks/src/rocks_metrics_defs.rs index 1796886d7b9..21ab72bdd05 100644 --- a/components/engine_rocks/src/rocks_metrics_defs.rs +++ b/components/engine_rocks/src/rocks_metrics_defs.rs @@ -7,6 +7,8 @@ pub const ROCKSDB_TABLE_READERS_MEM: &str = "rocksdb.estimate-table-readers-mem" pub const ROCKSDB_CUR_SIZE_ALL_MEM_TABLES: &str = "rocksdb.cur-size-all-mem-tables"; pub const ROCKSDB_SIZE_ALL_MEM_TABLES: &str = "rocksdb.size-all-mem-tables"; pub const ROCKSDB_ESTIMATE_NUM_KEYS: &str = "rocksdb.estimate-num-keys"; +pub const ROCKSDB_NUM_RUNNING_FLUSHES: &str = "rocksdb.num-running-flushes"; +pub const ROCKSDB_NUM_RUNNING_COMPACTIONS: &str = "rocksdb.num-running-compactions"; pub const ROCKSDB_PENDING_COMPACTION_BYTES: &str = "rocksdb.\ estimate-pending-compaction-bytes"; pub const ROCKSDB_COMPRESSION_RATIO_AT_LEVEL: &str = "rocksdb.compression-ratio-at-level"; diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index f0d3e320627..f7a8b60791e 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -5471,8 +5471,8 @@ def RocksDB() -> RowPanel: [ graph_panel( title="Compaction operations", - description="The count of compaction and flush operations", - yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + description="The rate of completed compaction and flush operations (left axis) and the count of running operations (right axis).", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC, right_format=UNITS.SHORT), targets=[ target( expr=expr_sum_rate( @@ -5484,6 +5484,34 @@ def RocksDB() -> RowPanel: ), additional_groupby=True, ), + target( + expr=expr_sum( + "tikv_engine_num_running_compactions", + label_selectors=[ + 'db="$db"', + ], + by_labels=[], # override default by instance. + ), + legend_format="running-compactions", + additional_groupby=True, + ), + target( + expr=expr_sum( + "tikv_engine_num_running_flushes", + label_selectors=[ + 'db="$db"', + ], + by_labels=[], # override default by instance. + ), + legend_format="running-flushes", + additional_groupby=True, + ), + ], + series_overrides=[ + series_override( + alias="/running-.*/", + yaxis=2, + ), ], ), graph_panel( diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 3c6bdda0a41..259b309b14b 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -30064,7 +30064,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of compaction and flush operations", + "description": "The rate of completed compaction and flush operations (left axis) and the count of running operations (right axis).", "editable": true, "error": false, "fieldConfig": { @@ -30127,7 +30127,17 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/running-.*/", + "bars": false, + "fill": 1, + "fillBelowTo": null, + "lines": true, + "yaxis": 2, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, @@ -30146,6 +30156,36 @@ "refId": "", "step": 10, "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_num_running_compactions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "running-compactions {{$additional_groupby}}", + "metric": "", + "query": "sum((\n tikv_engine_num_running_compactions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_num_running_flushes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by ($additional_groupby) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "running-flushes {{$additional_groupby}}", + "metric": "", + "query": "sum((\n tikv_engine_num_running_flushes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by ($additional_groupby) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 337e25b67ee..91875a10527 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -6f498305f4f2832c024c725cae0fc7c90236091f3dafbcf3d1eee2b0e16e7b45 ./metrics/grafana/tikv_details.json +ed83686c297ee26a37c2e57c15382ee810b399e4294006435a1d8e4bf63d5419 ./metrics/grafana/tikv_details.json From 6bb07ba19874815be2e14a5983bb05903e788ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:43:43 +0800 Subject: [PATCH 71/86] compact_log_backup: record `min_input_ts` and `max_input_ts` in Compaction (#18085) close tikv/tikv#18084 `min_input_ts` and `max_input_ts` will present in a log files compaction. Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- cmd/tikv-ctl/src/cmd.rs | 13 +++++++--- cmd/tikv-ctl/src/main.rs | 9 +++++-- .../compact-log-backup/src/compaction/meta.rs | 25 ++++++++++++++++++- .../compact-log-backup/src/execute/test.rs | 2 +- .../compact-log-backup/src/test_util.rs | 3 ++- 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d025033adc0..d8dec4f7ba3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3724,7 +3724,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#17bdaaa37b9fbc3231cf90a3dec9ecbfa3f27e4e" +source = "git+https://github.com/pingcap/kvproto.git#c35d2b41011503a386db9ae2b7f4bcec653dab61" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index d11c76c4090..ae562292b2f 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -640,14 +640,19 @@ pub enum Cmd { ) )] name: String, - #[structopt(long = "from", help("from when we need to compact."))] + #[structopt( + long = "from", + help( + "from when we need to include files into the compaction.\ + files contains any record within the [--from, --until) will be selected." + ) + )] from_ts: u64, #[structopt( long = "until", help( - "until when we need to compact. \ - Also note that records out of the [--from, --until) range may also be compacted \ - if their neighbour in the same file needs to be compacted." + "until when we need to include files into the compaction.\ + files contains any record within the [--from, --until) will be selected." ) )] until_ts: u64, diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index eeb8bb63f0a..9b8852c62b1 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -471,8 +471,13 @@ fn main() { ((log_to_term, checkpoint), with_status_server), (save_meta, with_lock), ); - exec.run(hooks) - .expect("failed to execute compact-log-backup") + match exec.run(hooks) { + Ok(()) => tikv_util::info!("Compact log backup successfully."), + Err(err) => { + tikv_util::error!("Failed to compact log backup."; "err" => %err, "err_verbose" => ?err); + std::process::exit(1); + } + } } // Commands below requires either the data dir or the host. cmd => { diff --git a/components/compact-log-backup/src/compaction/meta.rs b/components/compact-log-backup/src/compaction/meta.rs index 62d500528a7..30491b1095a 100644 --- a/components/compact-log-backup/src/compaction/meta.rs +++ b/components/compact-log-backup/src/compaction/meta.rs @@ -162,12 +162,23 @@ impl Ord for SortByOffset { /// Collecting metadata of subcomapctions. /// /// Finally, it calculates which files can be deleted. -#[derive(Default, Debug)] +#[derive(Debug)] pub struct CompactionRunInfoBuilder { files: HashMap, BTreeSet>, compaction: brpb::LogFileCompaction, } +impl Default for CompactionRunInfoBuilder { + fn default() -> Self { + let mut this = Self { + files: Default::default(), + compaction: Default::default(), + }; + this.compaction.input_min_ts = u64::MAX; + this + } +} + /// A set of deletable log files from the same metadata. pub struct ExpiringFilesOfMeta { meta_path: Arc, @@ -230,6 +241,8 @@ impl CompactionRunInfoBuilder { .insert(SortByOffset(file.id.clone())); } self.compaction.artifacts_hash ^= c.origin.crc64(); + self.compaction.input_min_ts = self.compaction.input_min_ts.min(c.origin.input_min_ts); + self.compaction.input_max_ts = self.compaction.input_max_ts.max(c.origin.input_max_ts); } pub fn mut_meta(&mut self) -> &mut brpb::LogFileCompaction { @@ -382,6 +395,11 @@ mod test { let mig = coll.mig(st.storage().as_ref()).await.unwrap(); assert_eq!(mig.edit_meta.len(), 1); assert!(mig.edit_meta[0].destruct_self); + + assert_eq!(mig.compactions.len(), 1); + let c = &mig.compactions[0]; + assert_eq!(c.input_min_ts, 10); + assert_eq!(c.input_max_ts, 25); } #[tokio::test] @@ -439,5 +457,10 @@ mod test { _ => unreachable!(), }; mig.edit_meta.iter().for_each(check); + + assert_eq!(mig.compactions.len(), 1); + let c = &mig.compactions[0]; + assert_eq!(c.input_min_ts, 10); + assert_eq!(c.input_max_ts, 20); } } diff --git a/components/compact-log-backup/src/execute/test.rs b/components/compact-log-backup/src/execute/test.rs index 78ab92ec0b6..68e21bbd477 100644 --- a/components/compact-log-backup/src/execute/test.rs +++ b/components/compact-log-backup/src/execute/test.rs @@ -209,7 +209,7 @@ async fn load_locks(storage: &dyn ExternalStorage) -> Vec { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn test_consistency_guard_() { +async fn test_consistency_guard() { let st = TmpStorage::create(); let strg = st.storage().as_ref(); put_checkpoint(strg, 1, 42).await; diff --git a/components/compact-log-backup/src/test_util.rs b/components/compact-log-backup/src/test_util.rs index 7f3763ceed8..08d32a61bbb 100644 --- a/components/compact-log-backup/src/test_util.rs +++ b/components/compact-log-backup/src/test_util.rs @@ -6,6 +6,7 @@ use std::{ ops::Not, path::{Path, PathBuf}, sync::{Arc, Mutex}, + u64, }; use engine_rocks::RocksEngine; @@ -253,7 +254,7 @@ impl LogFileBuilder { is_meta: false, content: zstd::Encoder::new(Cursor::new(vec![]), 3).unwrap(), - min_ts: 0, + min_ts: u64::MAX, max_ts: 0, min_key: vec![], max_key: vec![], From 12d98b94c7231214ed06a89df6a1f54a6ef9ef50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:06:31 +0800 Subject: [PATCH 72/86] compact_log_backup: fix typo (#18090) ref tikv/tikv#15990 Fixed a typo: `Migartion` -> `Migration`. Signed-off-by: hillium --- components/compact-log-backup/src/compaction/meta.rs | 4 ++-- components/compact-log-backup/src/storage.rs | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/components/compact-log-backup/src/compaction/meta.rs b/components/compact-log-backup/src/compaction/meta.rs index 30491b1095a..1f23556c4c9 100644 --- a/components/compact-log-backup/src/compaction/meta.rs +++ b/components/compact-log-backup/src/compaction/meta.rs @@ -15,7 +15,7 @@ use super::{ use crate::{ errors::Result, storage::{ - LoadFromExt, LogFile, LogFileId, MetaFile, MigartionStorageWrapper, PhysicalLogFile, + LoadFromExt, LogFile, LogFileId, MetaFile, MigrationStorageWrapper, PhysicalLogFile, StreamMetaStorage, }, }; @@ -251,7 +251,7 @@ impl CompactionRunInfoBuilder { pub async fn write_migration(&self, s: &dyn ExternalStorage) -> Result<()> { let migration = self.migration_of(self.find_expiring_files(s).await?); - let wrapped_storage = MigartionStorageWrapper::new(s); + let wrapped_storage = MigrationStorageWrapper::new(s); wrapped_storage.write(migration.into()).await?; Ok(()) } diff --git a/components/compact-log-backup/src/storage.rs b/components/compact-log-backup/src/storage.rs index 2e55a831e42..dbb8fde0b77 100644 --- a/components/compact-log-backup/src/storage.rs +++ b/components/compact-log-backup/src/storage.rs @@ -558,16 +558,16 @@ impl Default for VersionedMigration { } } -pub struct MigartionStorageWrapper<'a> { +pub struct MigrationStorageWrapper<'a> { storage: &'a dyn ExternalStorage, - migartions_prefix: &'a str, + migrations_prefix: &'a str, } -impl<'a> MigartionStorageWrapper<'a> { +impl<'a> MigrationStorageWrapper<'a> { pub fn new(storage: &'a dyn ExternalStorage) -> Self { Self { storage, - migartions_prefix: MIGRATION_PREFIX, + migrations_prefix: MIGRATION_PREFIX, } } @@ -583,7 +583,7 @@ impl<'a> MigartionStorageWrapper<'a> { retry_expr!( self.storage .write( - &format!("{}/{}", self.migartions_prefix, name), + &format!("{}/{}", self.migrations_prefix, name), UnpinReader(Box::new(Cursor::new(&bytes))), bytes.len() as u64 ) @@ -596,7 +596,7 @@ impl<'a> MigartionStorageWrapper<'a> { pub async fn largest_id(&self) -> Result { self.storage - .iter_prefix(self.migartions_prefix) + .iter_prefix(self.migrations_prefix) .err_into::() .map(|v| { v.and_then(|v| match id_of_migration(&v.key) { From ae6009117bc6fada7b8dad94bbb1ed050f4a53c9 Mon Sep 17 00:00:00 2001 From: ekexium Date: Mon, 6 Jan 2025 20:36:13 +0800 Subject: [PATCH 73/86] concurrency_manager: make max-ts checker more robust (#18080) ref tikv/tikv#18055 When validating max-ts updates, do not report error or panic unless confirmed by PD TSO. This reduces both false positive and false negative cases. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 77 ++++++++++ components/concurrency_manager/Cargo.toml | 1 + components/concurrency_manager/src/lib.rs | 177 ++++++++++++++-------- 3 files changed, 193 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d8dec4f7ba3..940225bd459 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,6 +112,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + [[package]] name = "anyhow" version = "1.0.75" @@ -1561,6 +1567,7 @@ dependencies = [ "futures 0.3.15", "kvproto", "lazy_static", + "mockall", "online_config", "parking_lot 0.12.1", "pd_client", @@ -2037,6 +2044,12 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "downcast" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" + [[package]] name = "dyn-clone" version = "1.0.4" @@ -2578,6 +2591,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fragile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" + [[package]] name = "fs2" version = "0.4.3" @@ -4117,6 +4136,32 @@ dependencies = [ "tempdir", ] +[[package]] +name = "mockall" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2" +dependencies = [ + "cfg-if 1.0.0", + "downcast", + "fragile", + "mockall_derive", + "predicates", + "predicates-tree", +] + +[[package]] +name = "mockall_derive" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898" +dependencies = [ + "cfg-if 1.0.0", + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "more-asserts" version = "0.2.1" @@ -4877,6 +4922,32 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +[[package]] +name = "predicates" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" +dependencies = [ + "anstyle", + "predicates-core", +] + +[[package]] +name = "predicates-core" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" + +[[package]] +name = "predicates-tree" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "pretty_assertions" version = "1.4.0" @@ -6703,6 +6774,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + [[package]] name = "test_backup" version = "0.0.1" diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index b98e721b593..39ba558c469 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -11,6 +11,7 @@ crossbeam-skiplist = { workspace = true } fail = "0.5" kvproto = { workspace = true } lazy_static = "1.4.0" +mockall = "0.13" online_config = { workspace = true } parking_lot = "0.12" pd_client = { workspace = true } diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index 31c57693baa..3e4e2dd34fc 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -29,10 +29,11 @@ use std::{ use crossbeam::atomic::AtomicCell; use lazy_static::lazy_static; +use mockall::automock; use pd_client::{PdClient, PdFuture}; use prometheus::{register_int_gauge, IntGauge}; use thiserror::Error; -use tikv_util::{error, future::block_on_timeout, time::Instant}; +use tikv_util::{error, future::block_on_timeout, time::Instant, warn}; use txn_types::{Key, Lock, TimeStamp}; pub use self::{ @@ -50,7 +51,7 @@ lazy_static! { register_int_gauge!("tikv_concurrency_manager_max_ts", "Current value of max_ts").unwrap(); } -const DEFAULT_LIMIT_VALID_DURATION: Duration = Duration::from_secs(60); +const DEFAULT_LIMIT_VALID_DURATION: Duration = Duration::from_secs(45); // It is suggested that limit_valid_duration = sync_interval * // LIMIT_VALID_TIME_MULTIPLIER, to balance between @@ -67,6 +68,7 @@ struct MaxTsLimit { update_time: Instant, } +#[automock] pub trait TSOProvider: Send + Sync { fn get_tso(&self) -> PdFuture; } @@ -125,6 +127,14 @@ impl ConcurrencyManager { update_time: Instant::now(), }; + if limit_valid_duration >= max_ts_drift_allowance { + error!("improper setting: limit_valid_duration >= max_ts_drift_allowance; \ + consider increasing max-ts-drift-allowance or decreasing max-ts-sync-interval"; + "limit_valid_duration" => ?limit_valid_duration, + "max_ts_drift_allowance" => ?max_ts_drift_allowance, + ); + } + ConcurrencyManager { max_ts: Arc::new(AtomicU64::new(latest_ts.into_inner())), max_ts_limit: Arc::new(AtomicCell::new(initial_limit)), @@ -197,12 +207,6 @@ impl ConcurrencyManager { // check that new_ts is less than or equal to the limit if !limit.limit.is_zero() && new_ts > limit.limit { - // NOTE: `limit` and `last_update` are read non-atomically as a whole, so they - // can be inconsistent, i.e. they may not be from the same event of - // setting the limit. The consequence is that we may mistakenly - // treat an "invalid" limit as a "valid" one. This is acceptable - // because the limit is just an assertion, and the inconsistency - // is not harmful. let last_update = limit.update_time; let now = self.time_provider.now(); assert!(now >= last_update); @@ -246,17 +250,19 @@ impl ConcurrencyManager { source: impl slog::Value + Display, using_approximate: bool, ) -> Result<(), crate::InvalidMaxTsUpdate> { - error!("possible invalid max-ts update; double checking"; + warn!("possible invalid max-ts update; double checking"; "attempted_ts" => new_ts, - "max_allowed" => limit.into_inner(), + "limit" => limit.into_inner(), "source" => &source, "using_approximate" => using_approximate, "TSO_TIMEOUT" => ?TSO_TIMEOUT, ); + let mut tso_confirmed = false; if let Some(tso) = &self.tso { match block_on_timeout(tso.get_tso(), TSO_TIMEOUT) { Ok(Ok(ts)) => { self.set_max_ts_limit(ts); + tso_confirmed = true; } Ok(Err(e)) => { error!("failed to fetch from TSO for double checking"; "err" => ?e); @@ -266,8 +272,9 @@ impl ConcurrencyManager { } } } - if new_ts > self.max_ts_limit.load().limit { - self.report_error(new_ts, limit, source, using_approximate)?; + let new_limit = self.max_ts_limit.load(); + if new_ts > new_limit.limit { + self.report_error(new_ts, new_limit, source, using_approximate, tso_confirmed)?; } Ok(()) } @@ -275,32 +282,43 @@ impl ConcurrencyManager { fn report_error( &self, new_ts: TimeStamp, - limit: TimeStamp, + limit: MaxTsLimit, source: impl slog::Value + Display, using_approximate: bool, + tso_confirmed: bool, ) -> Result<(), InvalidMaxTsUpdate> { - let can_panic = !using_approximate; - error!("invalid max_ts update"; - "attempted_ts" => new_ts, - "max_allowed" => limit.into_inner(), - "source" => &source, - "using_approximate" => using_approximate, - ); + if tso_confirmed { + error!("invalid max_ts update"; + "attempted_ts" => new_ts, + "limit" => limit.limit.into_inner(), + "limit_update_time" => ?limit.update_time, + "source" => &source, + "using_approximate" => using_approximate, + ); + } else { + warn!("possible invalid max_ts update"; + "attempted_ts" => new_ts, + "limit" => limit.limit.into_inner(), + "limit_update_time" => ?limit.update_time, + "source" => &source, + "using_approximate" => using_approximate, + ); + } + match self.action_on_invalid_max_ts.load() { - ActionOnInvalidMaxTs::Panic if can_panic => { + ActionOnInvalidMaxTs::Panic if tso_confirmed => { panic!( "invalid max_ts update: {} exceeds the limit {}, source={}", new_ts, - limit.into_inner(), + limit.limit.into_inner(), source ); } - ActionOnInvalidMaxTs::Error => Err(InvalidMaxTsUpdate { + ActionOnInvalidMaxTs::Error if tso_confirmed => Err(InvalidMaxTsUpdate { attempted_ts: new_ts, - max_allowed: limit, + limit: limit.limit, }), - ActionOnInvalidMaxTs::Log => Ok(()), - ActionOnInvalidMaxTs::Panic => Ok(()), + _ => Ok(()), } } @@ -518,10 +536,10 @@ impl AtomicActionOnInvalidMaxTs { } #[derive(Debug, Error, Clone)] -#[error("invalid max_ts update: {attempted_ts} exceeds the limit {max_allowed}")] +#[error("invalid max_ts update: {attempted_ts} exceeds the limit {limit}")] pub struct InvalidMaxTsUpdate { pub attempted_ts: TimeStamp, - pub max_allowed: TimeStamp, + pub limit: TimeStamp, } pub trait ValueDisplay: slog::Value + Display {} @@ -597,14 +615,14 @@ mod tests { use super::*; #[derive(Clone)] - struct MockTimeProvider { + struct StubTimeProvider { current_time: Arc>, } - impl MockTimeProvider { + impl StubTimeProvider { /// Creates a new MockTimeProvider initialized with the given instant. fn new(start_time: Instant) -> Self { - MockTimeProvider { + StubTimeProvider { current_time: Arc::new(Mutex::new(start_time)), } } @@ -618,7 +636,7 @@ mod tests { } } - impl TimeProvider for MockTimeProvider { + impl TimeProvider for StubTimeProvider { fn now(&self) -> Instant { let time = self.current_time.lock().unwrap(); *time @@ -703,11 +721,16 @@ mod tests { #[test] fn test_max_ts_limit() { + let mut stub_pd = MockTSOProvider::new(); + stub_pd + .expect_get_tso() + .return_once(|| ready(Ok(160.into())).boxed()); + let stub_pd = Arc::new(stub_pd); let cm = ConcurrencyManager::new_with_config( TimeStamp::new(100), DEFAULT_LIMIT_VALID_DURATION, ActionOnInvalidMaxTs::Error, - None, + Some(stub_pd), Duration::ZERO, ); @@ -747,11 +770,17 @@ mod tests { #[test] fn test_max_ts_updates_with_monotonic_limit() { + let mut stub_pd = MockTSOProvider::new(); + // Assertion: should fail to update max_ts to 250 and query PD + stub_pd + .expect_get_tso() + .return_once(|| ready(Ok(201.into())).boxed()); + let stub_pd = Arc::new(stub_pd); let cm = ConcurrencyManager::new_with_config( TimeStamp::new(100), DEFAULT_LIMIT_VALID_DURATION, ActionOnInvalidMaxTs::Error, - None, + Some(stub_pd), Duration::ZERO, ); @@ -770,22 +799,27 @@ mod tests { assert!(result.is_err()); if let Err(e) = result { assert_eq!(e.attempted_ts, TimeStamp::new(250)); - assert_eq!(e.max_allowed, TimeStamp::new(200)); + assert_eq!(e.limit, TimeStamp::new(201)); } } #[test] fn test_limit_valid_duration_boundary() { let start_time = Instant::now(); - let mock_time = MockTimeProvider::new(start_time); - let time_provider = Arc::new(mock_time.clone()); + let stub_time = StubTimeProvider::new(start_time); + let time_provider = Arc::new(stub_time.clone()); + let mut stub_pd = MockTSOProvider::new(); + stub_pd + .expect_get_tso() + .return_once(|| ready(Ok(200.into())).boxed()); + let stub_pd = Arc::new(stub_pd); let cm = ConcurrencyManager::new_with_time_provider( TimeStamp::new(100), Duration::from_secs(60), ActionOnInvalidMaxTs::Error, time_provider.clone(), - None, + Some(stub_pd), Duration::ZERO, ); @@ -802,8 +836,8 @@ mod tests { #[test] fn test_max_ts_limit_expired_allows_update() { let start_time = Instant::now(); - let mock_time = MockTimeProvider::new(start_time); - let time_provider = Arc::new(mock_time.clone()); + let stub_time = StubTimeProvider::new(start_time); + let time_provider = Arc::new(stub_time.clone()); let cm = ConcurrencyManager::new_with_time_provider( TimeStamp::new(100), @@ -816,7 +850,7 @@ mod tests { cm.set_max_ts_limit(TimeStamp::new(200)); - mock_time.advance(Duration::from_secs(61)); + stub_time.advance(Duration::from_secs(61)); // Updating to 250 should be allowed, since the limit should be invalidated cm.update_max_ts(TimeStamp::new(250), "test_source".to_string()) @@ -827,7 +861,18 @@ mod tests { #[test] #[should_panic(expected = "invalid max_ts update")] fn test_panic_on_invalid_max_ts_enabled() { - let cm = ConcurrencyManager::new(TimeStamp::new(100)); + let mut stub_pd = MockTSOProvider::new(); + stub_pd + .expect_get_tso() + .return_once(|| ready(Ok(201.into())).boxed()); + let stub_pd = Arc::new(stub_pd); + let cm = ConcurrencyManager::new_with_config( + TimeStamp::new(100), + DEFAULT_LIMIT_VALID_DURATION, + ActionOnInvalidMaxTs::Panic, + Some(stub_pd), + Duration::ZERO, + ); cm.set_max_ts_limit(TimeStamp::new(200)); @@ -845,38 +890,46 @@ mod tests { assert_eq!(cm.max_ts().into_inner(), 500); } - struct MockPD { - tso: AtomicU64, - } - - impl MockPD { - fn new(ts: u64) -> Self { - Self { - tso: AtomicU64::new(ts), - } - } - } + #[test] + fn test_pd_tso_jump_not_panic() { + let mut stub_pd = MockTSOProvider::new(); + // The double check procedure gets latest_ts=300 from TSO + stub_pd + .expect_get_tso() + .return_once(|| ready(Ok(300.into())).boxed()); + let stub_pd = Arc::new(stub_pd); + let cm = ConcurrencyManager::new_with_config( + TimeStamp::new(100), + DEFAULT_LIMIT_VALID_DURATION, + ActionOnInvalidMaxTs::Panic, + Some(stub_pd.clone()), + Duration::ZERO, + ); - impl TSOProvider for MockPD { - fn get_tso(&self) -> PdFuture { - ready(Ok(TimeStamp::new(self.tso.fetch_add(1, Ordering::SeqCst)))).boxed() - } + cm.set_max_ts_limit(TimeStamp::new(200)); + // PD TSO jumps from 100 to 300 + cm.update_max_ts(TimeStamp::new(300), "test_source".to_string()) + .unwrap(); } #[test] - fn test_pd_tso_jump_not_panic() { - let mock_pd = Arc::new(MockPD::new(100)); + fn test_do_not_panic_under_pd_tso_jump_and_network_partition() { + let mut stub_pd = MockTSOProvider::new(); + // Network partition between PD and TiKV + stub_pd + .expect_get_tso() + .return_once(|| std::future::pending().boxed()); + let stub_pd = Arc::new(stub_pd); let cm = ConcurrencyManager::new_with_config( TimeStamp::new(100), DEFAULT_LIMIT_VALID_DURATION, ActionOnInvalidMaxTs::Panic, - Some(mock_pd.clone()), + Some(stub_pd.clone()), Duration::ZERO, ); + cm.set_max_ts_limit(200.into()); - cm.set_max_ts_limit(TimeStamp::new(200)); // PD TSO jumps from 100 to 300 - mock_pd.tso.store(300, Ordering::SeqCst); cm.update_max_ts(TimeStamp::new(300), "test_source".to_string()) .unwrap(); } From 02ae1f6d49017f434883ba05ba837bc32f5688fd Mon Sep 17 00:00:00 2001 From: wuhuizuo Date: Wed, 8 Jan 2025 07:50:01 +0800 Subject: [PATCH 74/86] build: update docker files (#17908) close tikv/tikv#17894 build: update Dockerfile for build and test Signed-off-by: wuhuizuo Co-authored-by: Ti Chi Robot --- Dockerfile | 145 +++++++++++++----------------------------------- Dockerfile.test | 62 ++++++--------------- Makefile | 2 +- 3 files changed, 59 insertions(+), 150 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7f1c691bede..c2536f13ad1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,115 +1,50 @@ # This Docker image contains a minimal build environment for TiKV # -# It contains all the tools necessary to reproduce official production builds of TiKV +# It contains all the tools necessary to reproduce develop builds of TiKV +# This file may be outdated, you can check the latest version at: +# https://github.com/PingCAP-QE/artifacts/blob/main/dockerfiles/cd/builders/tikv/Dockerfile -# We need to use CentOS 7 because many of our users choose this as their deploy machine. -# Since the glibc it uses (2.17) is from 2012 (https://sourceware.org/glibc/wiki/Glibc%20Timeline) -# it is our lowest common denominator in terms of distro support. - -# Some commands in this script are structured in order to reduce the number of layers Docker -# generates. Unfortunately Docker is limited to only 125 layers: -# https://github.com/moby/moby/blob/a9507c6f76627fdc092edc542d5a7ef4a6df5eec/layer/layer.go#L50-L53 - -# We require epel packages, so enable the fedora EPEL repo then install dependencies. -# Install the system dependencies -# Attempt to clean and rebuild the cache to avoid 404s - -# To avoid rebuilds we first install all Cargo dependencies - - -# The prepare image avoid ruining the cache of the builder -FROM centos:7.6.1810 as prepare -WORKDIR /tikv - -# This step will always ruin the cache -# There isn't a way with docker to wildcard COPY and preserve the directory structure -COPY . . -RUN mkdir /output -RUN for component in $(find . -type f -name 'Cargo.toml' -exec dirname {} \; | sort -u); do \ - mkdir -p "/output/${component}/src" \ - && touch "/output/${component}/src/lib.rs" \ - && cp "${component}/Cargo.toml" "/output/${component}/Cargo.toml" \ - ; done - - -FROM centos:7.6.1810 as builder - -RUN yum install -y epel-release && \ - yum clean all && \ - yum makecache - -RUN yum install -y centos-release-scl && \ - yum install -y \ - devtoolset-8 \ - perl cmake3 && \ - yum clean all - -# CentOS gives cmake 3 a weird binary name, so we link it to something more normal -# This is required by many build scripts, including ours. -RUN ln -s /usr/bin/cmake3 /usr/bin/cmake -ENV LIBRARY_PATH /usr/local/lib:$LIBRARY_PATH -ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH - -# Install protoc -RUN curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip" -RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/ -ENV PATH /usr/local/bin/:$PATH - -# Install Rustup -RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y +# build requires: +# - docker >= v20.10 +# +# build steps: +# - git clone --recurse-submodules https://github.com/tikv/tikv.git tikv +# - cd tikv +# - docker build -t tikv -f Dockerfile . + +########### stage: builder +FROM quay.io/rockylinux/rockylinux:8.10.20240528-ubi as builder + +# install packages. +RUN --mount=type=cache,target=/var/cache/dnf \ + dnf upgrade-minimal -y && \ + dnf --enablerepo=powertools install -y \ + dwz make git findutils gcc gcc-c++ cmake curl openssl-devel perl python3 \ + libstdc++-static + +# install protoc. +# renovate: datasource=github-release depName=protocolbuffers/protobuf +ARG PROTOBUF_VER=v3.15.8 +RUN FILE=$([ "$(arch)" = "aarch64" ] && echo "protoc-${PROTOBUF_VER#?}-linux-aarch_64.zip" || echo "protoc-${PROTOBUF_VER#?}-linux-$(arch).zip"); \ + curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/${PROTOBUF_VER}/${FILE}" && unzip "$FILE" -d /usr/local/ && rm -f "$FILE" + +# install rust toolchain +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s - -y --default-toolchain none ENV PATH /root/.cargo/bin/:$PATH -RUN rustup self update - -WORKDIR /tikv -COPY rust-toolchain.toml ./ - -# For cargo -COPY scripts ./scripts -COPY etc ./etc -COPY Cargo.lock ./Cargo.lock - -COPY --from=prepare /output/ ./ -RUN mkdir -p ./cmd/tikv-ctl/src ./cmd/tikv-server/src && \ - echo 'fn main() {}' > ./cmd/tikv-ctl/src/main.rs && \ - echo 'fn main() {}' > ./cmd/tikv-server/src/main.rs && \ - for cargotoml in $(find . -type f -name "Cargo.toml"); do \ - sed -i '/fuzz/d' ${cargotoml} ; \ - done +########### stage: building +FROM builder as building +COPY . /tikv +RUN --mount=type=cache,target=/tikv/target \ + ROCKSDB_SYS_STATIC=1 make dist_release -C /tikv +RUN /tikv/bin/tikv-server --version -COPY Makefile ./ -RUN source /opt/rh/devtoolset-8/enable && make build_dist_release +########### stage: Final image +FROM ghcr.io/pingcap-qe/bases/tikv-base:v1.9.2 -# Remove fingerprints for when we build the real binaries. -RUN rm -rf ./target/release/.fingerprint/tikv-* && \ - for i in $(find . -type f -name 'Cargo.toml' -exec dirname {} \; | sort -u); do \ - rm -rf ./target/release/.fingerprint/$(basename ${i})-*; \ - done - -# Add full source code -COPY cmd/ ./cmd/ -COPY components/ ./components/ -COPY src/ ./src/ - -# Build real binaries now -ARG GIT_FALLBACK="Unknown (no git or not git repo)" -ARG GIT_HASH=${GIT_FALLBACK} -ARG GIT_TAG=${GIT_FALLBACK} -ARG GIT_BRANCH=${GIT_FALLBACK} -ENV TIKV_BUILD_GIT_HASH=${GIT_HASH} -ENV TIKV_BUILD_GIT_TAG=${GIT_TAG} -ENV TIKV_BUILD_GIT_BRANCH=${GIT_BRANCH} -RUN source /opt/rh/devtoolset-8/enable && make build_dist_release - -# Export to a clean image -FROM pingcap/alpine-glibc -COPY --from=builder /tikv/target/release/tikv-server /tikv-server -COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl - -# FIXME: Figure out why libstdc++ is not staticly linked. -RUN apk add --no-cache \ - curl libstdc++ +ENV MALLOC_CONF="prof:true,prof_active:false" +COPY --from=building /tikv/bin/tikv-server /tikv-server +COPY --from=building /tikv/bin/tikv-ctl /tikv-ctl EXPOSE 20160 20180 - ENTRYPOINT ["/tikv-server"] diff --git a/Dockerfile.test b/Dockerfile.test index 56f16662c08..fc8cc9b509c 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -1,54 +1,28 @@ # This Docker image contains a minimal build environment for TiKV # -# It contains all the tools necessary to reproduce official production builds of TiKV +# It contains all the tools necessary to prepare the unit test env of TiKV -# We need to use CentOS 7 because many of our users choose this as their deploy machine. -# Since the glibc it uses (2.17) is from 2012 (https://sourceware.org/glibc/wiki/Glibc%20Timeline) -# it is our lowest common denominator in terms of distro support. +########### stage: builder +FROM quay.io/rockylinux/rockylinux:8.10.20240528-ubi -# Some commands in this script are structured in order to reduce the number of layers Docker -# generates. Unfortunately Docker is limited to only 125 layers: -# https://github.com/moby/moby/blob/a9507c6f76627fdc092edc542d5a7ef4a6df5eec/layer/layer.go#L50-L53 +# install packages. +RUN --mount=type=cache,target=/var/cache/dnf \ + dnf upgrade-minimal -y && \ + dnf --enablerepo=powertools install -y \ + dwz make git findutils gcc gcc-c++ cmake curl openssl-devel perl python3 \ + libstdc++-static -# We require epel packages, so enable the fedora EPEL repo then install dependencies. -# Install the system dependencies -# Attempt to clean and rebuild the cache to avoid 404s +# install protoc. +# renovate: datasource=github-release depName=protocolbuffers/protobuf +ARG PROTOBUF_VER=v3.15.8 +RUN FILE=$([ "$(arch)" = "aarch64" ] && echo "protoc-${PROTOBUF_VER#?}-linux-aarch_64.zip" || echo "protoc-${PROTOBUF_VER#?}-linux-$(arch).zip"); \ + curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/${PROTOBUF_VER}/${FILE}" && unzip "$FILE" -d /usr/local/ && rm -f "$FILE" -# To avoid rebuilds we first install all Cargo dependencies - - -# The prepare image avoid ruining the cache of the builder -FROM centos:7.6.1810 as builder - -RUN yum install -y epel-release && \ - yum clean all && \ - yum makecache - -RUN yum install -y centos-release-scl && \ - yum install -y \ - devtoolset-8 \ - perl cmake3 && \ - yum clean all - -# CentOS gives cmake 3 a weird binary name, so we link it to something more normal -# This is required by many build scripts, including ours. -RUN ln -s /usr/bin/cmake3 /usr/bin/cmake -ENV LIBRARY_PATH /usr/local/lib:$LIBRARY_PATH -ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH - -# Install protoc -RUN curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip" -RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/ -ENV PATH /usr/local/bin/:$PATH - -# Install Rustup -RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y -ENV PATH /root/.cargo/bin/:$PATH -RUN rustup self update +# install rust toolchain +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s - -y --default-toolchain none +ENV PATH /root/.cargo/bin:$PATH WORKDIR /tikv COPY rust-toolchain.toml ./ -RUN cargo install cargo-nextest --locked - -ENTRYPOINT ["sh", "-c", "source /opt/rh/devtoolset-8/enable && \"$@\"", "-s"] +RUN cargo install cargo-nextest@0.9.85 --locked diff --git a/Makefile b/Makefile index 0d02893a141..02c8e2f5bfd 100644 --- a/Makefile +++ b/Makefile @@ -427,7 +427,7 @@ docker_shell: . docker run -it -v $(shell pwd):/tikv \ ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ - /bin/bash + bash ## The driver for script/run-cargo.sh ## ---------------------------------- From 222019fbfd919afd60ad2179b6be3f2ebd47eff5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 8 Jan 2025 18:07:55 +0800 Subject: [PATCH 75/86] log-backup: added manual flush RPCs (#18027) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#18026 Added a new RPC endpoint `flush_now` for the service `LogBackup`. Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/checkpoint_manager.rs | 4 +- components/backup-stream/src/endpoint.rs | 104 ++++++++++++------ components/backup-stream/src/router.rs | 20 ++++ components/backup-stream/src/service.rs | 51 ++++++++- .../backup-stream/tests/integration/mod.rs | 53 +++++++-- components/backup-stream/tests/suite.rs | 29 ++++- 6 files changed, 207 insertions(+), 54 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 602b8b43bec..63f7a472bf7 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -896,8 +896,8 @@ pub mod tests { let r = flush_observer.after(&task, rts).await; assert_eq!(r.is_ok(), true); - let serivce_id = format!("backup-stream-{}-{}", task, store_id); - let r = pd_cli.get_service_safe_point(serivce_id).unwrap(); + let service_id = format!("backup-stream-{}-{}", task, store_id); + let r = pd_cli.get_service_safe_point(service_id).unwrap(); assert_eq!(r.into_inner(), rts - 1); } } diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 4dd35e926b0..af8f3a99de8 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -5,6 +5,7 @@ use std::{ collections::HashSet, fmt, marker::PhantomData, + mem::ManuallyDrop, sync::{Arc, Mutex}, time::Duration, }; @@ -64,7 +65,7 @@ use crate::{ metadata::{store::MetaStore, MetadataClient, MetadataEvent, StreamTask}, metrics::{self, TaskStatus}, observer::BackupStreamObserver, - router::{self, ApplyEvents, FlushContext, Router, TaskSelector}, + router::{self, ApplyEvents, FlushContext, Router, TaskSelector, TaskSelectorRef}, subscription_manager::{RegionSubscriptionManager, ResolvedRegions}, subscription_track::{Ref, RefMut, ResolveResult, SubscriptionTracer}, try_send, @@ -92,7 +93,7 @@ pub struct Endpoint { // Note: some of the fields are public so test cases are able to access them. pub range_router: Router, observer: BackupStreamObserver, - pool: Runtime, + pool: ManuallyDrop, region_operator: Sender, failover_time: Option, config: BackupStreamConfig, @@ -106,6 +107,13 @@ pub struct Endpoint { pub initial_scan_semaphore: Arc, } +impl Drop for Endpoint { + fn drop(&mut self) { + // SAFETY: won't access thread pool after dropping. + unsafe { ManuallyDrop::take(&mut self.pool).shutdown_background() } + } +} + impl Endpoint where R: RegionInfoProvider + 'static + Clone, @@ -190,7 +198,7 @@ where range_router, scheduler, observer, - pool, + pool: ManuallyDrop::new(pool), store_id, regions: accessor, engine: PhantomData, @@ -857,20 +865,36 @@ where } } - pub fn on_force_flush(&self, task: String) { + pub fn on_force_flush(&self, task: TaskSelectorRef<'_>, sender: Sender) { self.pool.block_on(async move { - let handler_res = self.range_router.get_task_handler(&task); - // This should only happen in testing, it would be to unwrap... - let _ = handler_res.unwrap().set_flushing_status_cas(false, true); - let mts = self.prepare_min_ts().await; - let sched = self.scheduler.clone(); - self.region_op(ObserveOp::ResolveRegions { - callback: Box::new(move |res| { - try_send!(sched, Task::ExecFlush(task, res)); - }), - min_ts: mts, - }) - .await; + info!("Triggering force flush."; "selector" => ?task); + let handlers = self.range_router.select_task_handler(task); + for hnd in handlers { + let mts = self.prepare_min_ts().await; + let sched = self.scheduler.clone(); + let sender = sender.clone(); + match hnd.set_flushing_status_cas(false, true) { + Ok(_) => { + self.region_op(ObserveOp::ResolveRegions { + callback: Box::new(move |res| { + try_send!( + sched, + Task::ExecFlush(hnd.task.info.name.to_owned(), res, sender) + ); + }), + min_ts: mts, + }) + .await; + } + Err(_) => { + let res = FlushResult { + task: hnd.task.info.name.to_owned(), + error: Some(Error::Other(box_err!("task is flushing"))), + }; + let _ = sender.send(res).await; + } + } + } }); } @@ -879,9 +903,10 @@ where let mts = self.prepare_min_ts().await; let sched = self.scheduler.clone(); info!("min_ts prepared for flushing"; "min_ts" => %mts); + let (tx, _) = tokio::sync::mpsc::channel(1); self.region_op(ObserveOp::ResolveRegions { callback: Box::new(move |res| { - try_send!(sched, Task::ExecFlush(task, res)); + try_send!(sched, Task::ExecFlush(task, res, tx)); }), min_ts: mts, }) @@ -889,14 +914,20 @@ where }) } - fn on_exec_flush(&mut self, task: String, resolved: ResolvedRegions) { + fn on_exec_flush(&mut self, task: String, resolved: ResolvedRegions, cb: Sender) { self.checkpoint_mgr.freeze(); - self.pool - .spawn(root!("flush"; self.do_flush(task, resolved).map(|r| { - if let Err(err) = r { - err.report("during updating flush status") - } - }))); + let fut = self.do_flush(task.clone(), resolved); + self.pool.spawn(root!("flush"; async move { + let res = fut.await; + if let Err(ref err) = &res { + err.report("during updating flush status") + } + // If nobody waits us, it is no need to construct the result. + if !cb.is_closed() { + let flush_res = FlushResult { task, error: res.err() }; + let _ = cb.send(flush_res).await; + } + })); } fn update_global_checkpoint(&self, task: String) -> future![()] { @@ -1024,7 +1055,7 @@ where Task::BatchEvent(events) => self.do_backup(events), Task::Flush(task) => self.on_flush(task), Task::ModifyObserve(op) => self.on_modify_observe(op), - Task::ForceFlush(task) => self.on_force_flush(task), + Task::ForceFlush(sel, cb) => self.on_force_flush(sel.reference(), cb), Task::FatalError(task, err) => self.on_fatal_error(task, err), Task::ChangeConfig(cfg) => { self.on_update_change_config(cfg); @@ -1041,7 +1072,7 @@ where } } Task::MarkFailover(t) => self.failover_time = Some(t), - Task::ExecFlush(task, min_ts) => self.on_exec_flush(task, min_ts), + Task::ExecFlush(task, min_ts, cb) => self.on_exec_flush(task, min_ts, cb), Task::RegionCheckpointsOp(s) => self.handle_region_checkpoints_op(s), Task::UpdateGlobalCheckpoint(task) => self.on_update_global_checkpoint(task), } @@ -1239,6 +1270,12 @@ impl fmt::Debug for RegionCheckpointOperation { } } +#[derive(Debug)] +pub struct FlushResult { + pub task: String, + pub error: Option, +} + pub enum Task { WatchTask(TaskOp), BatchEvent(Vec), @@ -1246,19 +1283,16 @@ pub enum Task { /// Change the observe status of some region. ModifyObserve(ObserveOp), /// Convert status of some task into `flushing` and do flush then. - ForceFlush(String), + ForceFlush(TaskSelector, Sender), /// FatalError pauses the task and set the error. FatalError(TaskSelector, Box), - /// Run the callback when see this message. Only for test usage. - /// NOTE: Those messages for testing are not guarded by `#[cfg(test)]` for - /// now, because the integration test would not enable test config when - /// compiling (why?) + /// Run the callback when see this message. Sync( // Run the closure if ... Box, // This returns `true`. // The argument should be `self`, but there are too many generic argument for `self`... - // So let the caller in test cases downcast this to the type they need manually... + // So let the caller downcast this to the type they need manually... Box bool + Send>, ), /// Mark the store as a failover store. @@ -1271,7 +1305,7 @@ pub enum Task { Flush(String), /// Execute the flush with the calculated resolved result. /// This is an internal command only issued by the `Flush` task. - ExecFlush(String, ResolvedRegions), + ExecFlush(String, ResolvedRegions, Sender), /// The command for getting region checkpoints. RegionCheckpointsOp(RegionCheckpointOperation), /// update global-checkpoint-ts to storage. @@ -1377,7 +1411,7 @@ impl fmt::Debug for Task { Self::ChangeConfig(arg0) => f.debug_tuple("ChangeConfig").field(arg0).finish(), Self::Flush(arg0) => f.debug_tuple("Flush").field(arg0).finish(), Self::ModifyObserve(op) => f.debug_tuple("ModifyObserve").field(op).finish(), - Self::ForceFlush(arg0) => f.debug_tuple("ForceFlush").field(arg0).finish(), + Self::ForceFlush(sel, _) => f.debug_tuple("ForceFlush").field(sel).finish(), Self::FatalError(task, err) => { f.debug_tuple("FatalError").field(task).field(err).finish() } @@ -1386,7 +1420,7 @@ impl fmt::Debug for Task { .debug_tuple("MarkFailover") .field(&format_args!("{:?} ago", t.saturating_elapsed())) .finish(), - Self::ExecFlush(arg0, arg1) => f + Self::ExecFlush(arg0, arg1, _) => f .debug_tuple("ExecFlush") .field(arg0) .field(&arg1.global_checkpoint()) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index ce16776d9be..ed307871a46 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -619,6 +619,26 @@ impl RouterInner { r.get_value_by_point(key).cloned() } + pub fn select_task_handler( + &self, + selector: TaskSelectorRef<'_>, + ) -> impl Iterator> { + self.tasks + .iter() + .filter(|entry| { + let (name, info) = entry.pair(); + selector.matches( + name.as_str(), + info.ranges + .iter() + .map(|(s, e)| (s.as_slice(), e.as_slice())), + ) + }) + .map(|entry| entry.value().clone()) + .collect::>() + .into_iter() + } + pub fn select_task(&self, selector: TaskSelectorRef<'_>) -> Vec { self.tasks .iter() diff --git a/components/backup-stream/src/service.rs b/components/backup-stream/src/service.rs index e639f44a731..3e1db6f1d0d 100644 --- a/components/backup-stream/src/service.rs +++ b/components/backup-stream/src/service.rs @@ -2,13 +2,15 @@ use std::collections::HashSet; -use grpcio::RpcContext; +use futures::future::FutureExt; +use grpcio::{RpcContext, RpcStatus, RpcStatusCode}; use kvproto::{logbackuppb::*, metapb::Region}; -use tikv_util::{warn, worker::Scheduler}; +use tikv_util::{info, warn, worker::Scheduler}; use crate::{ checkpoint_manager::{GetCheckpointResult, RegionIdWithVersion}, endpoint::{RegionCheckpointOperation, RegionSet}, + router::TaskSelector, try_send, Task, }; @@ -40,6 +42,51 @@ impl From for RegionIdentity { } impl LogBackup for BackupStreamGrpcService { + fn flush_now( + &mut self, + ctx: grpcio::RpcContext<'_>, + _req: FlushNowRequest, + sink: grpcio::UnarySink, + ) { + info!("Client requests force flush."; "cli" => %ctx.peer()); + let mut resp = FlushNowResponse::new(); + let (tx, mut rx) = tokio::sync::mpsc::channel(1); + let task = Task::ForceFlush(TaskSelector::All, tx); + if let Err(err) = self.endpoint.schedule(task) { + ctx.spawn( + sink.fail(RpcStatus::with_message( + RpcStatusCode::INTERNAL, + format!( + "failed to schedule the command, maybe busy or shutting down: {}", + err + ), + )) + .map(|res| { + if let Err(err) = res { + warn!("flush_now: failed to send an error response to client"; "err" => %err) + } + }), + ); + return; + }; + + ctx.spawn(async move { + while let Some(item) = rx.recv().await { + let mut res = FlushResult::new(); + res.set_success(item.error.is_none()); + if let Some(err) = item.error { + res.set_error_message(err.to_string()); + } + res.set_task_name(item.task); + resp.results.push(res); + } + + if let Err(err) = sink.success(resp.clone()).await { + warn!("flush_now: failed to send success response to client"; "err" => %err, "resp" => ?resp); + } + }) + } + fn get_last_flush_ts_of_region( &mut self, _ctx: RpcContext<'_>, diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index ed161c68047..8b33a46558f 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -190,12 +190,10 @@ mod all { suite.write_records(0, 128, 1).await; let ts = suite.just_async_commit_prewrite(256, 1); suite.write_records(258, 128, 1).await; - suite.force_flush_files("test_async_commit"); - std::thread::sleep(Duration::from_secs(4)); + suite.force_flush_files_and_wait("test_async_commit").await; assert_eq!(suite.global_checkpoint(), 256); suite.just_commit_a_key(make_record_key(1, 256), TimeStamp::new(256), ts); - suite.force_flush_files("test_async_commit"); - suite.wait_for_flush(); + suite.force_flush_files_and_wait("test_async_commit").await; let cp = suite.global_checkpoint(); assert!(cp > 256, "it is {:?}", cp); }); @@ -425,22 +423,22 @@ mod all { let leader = suite.cluster.leader_of_region(1).unwrap(); suite.must_shuffle_leader(1); let round2 = run_async_test(suite.write_records(256, 128, 1)); + let (tx, mut rx) = tokio::sync::mpsc::channel(1); suite .endpoints .get(&leader.store_id) .unwrap() .scheduler() - .schedule(Task::ForceFlush("r".to_owned())) + .schedule(Task::ForceFlush(TaskSelector::All, tx)) .unwrap(); - suite.sync(); - std::thread::sleep(Duration::from_secs(2)); + while rx.blocking_recv().is_some() {} + suite.check_for_write_records( suite.flushed_files.path(), round1.iter().map(|x| x.as_slice()), ); assert!(suite.global_checkpoint() > 256); - suite.force_flush_files("r"); - suite.wait_for_flush(); + run_async_test(suite.force_flush_files_and_wait("r")); assert!(suite.global_checkpoint() > 512); suite.check_for_write_records( suite.flushed_files.path(), @@ -495,12 +493,11 @@ mod all { #[test] fn update_config() { - let suite = SuiteBuilder::new_named("network_partition") - .nodes(1) - .build(); + let suite = SuiteBuilder::new_named("update_config").nodes(1).build(); let mut basic_config = BackupStreamConfig::default(); basic_config.initial_scan_concurrency = 4; suite.run(|| Task::ChangeConfig(basic_config.clone())); + suite.sync(); suite.wait_with(|e| { assert_eq!(e.initial_scan_semaphore.available_permits(), 4,); true @@ -513,4 +510,36 @@ mod all { true }); } + + #[test] + fn force_flush() { + let mut suite = SuiteBuilder::new_named("force_flush").nodes(1).build(); + suite.must_register_task(1, "force_flush"); + let recs = run_async_test(suite.write_records(0, 128, 1)); + let mut strm = suite.flush_stream(true); + let tso = suite.tso(); + + suite.for_each_log_backup_cli(|_id, c| { + let res = c.flush_now(Default::default()).unwrap(); + assert_eq!(res.results.len(), 1); + assert!(res.results[0].error_message.is_empty(), "{:?}", res); + assert!(res.results[0].success, "{:?}", res); + }); + + let Some((_, resp)) = run_async_test(strm.next()) else { + panic!("subscribe stream close early") + }; + assert_eq!(resp.events.len(), 1, "{:?}", resp.events); + assert!( + resp.events[0].checkpoint > tso.into_inner(), + "{:?}, {}", + resp.events[0], + tso + ); + + suite.check_for_write_records( + suite.flushed_files.path(), + recs.iter().map(|v| v.as_slice()), + ) + } } diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index e3254f16acc..a828ddee83c 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -595,10 +595,33 @@ impl Suite { ts } + pub fn for_each_log_backup_cli(&self, mut cb: impl FnMut(u64, &LogBackupClient)) { + for (k, v) in self.log_backup_cli.iter() { + cb(*k, v) + } + } + pub fn force_flush_files(&self, task: &str) { - // TODO: use the callback to make the test more stable. - self.run(|| Task::ForceFlush(task.to_owned())); - self.sync(); + // Force flush but not wait... + // Then the case may use `wait_flush`... + let _ = self.force_flush_files_and_wait(task); + } + + pub fn force_flush_files_and_wait(&self, task: &str) -> impl Future + '_ { + let (tx, mut rx) = tokio::sync::mpsc::channel(1); + self.run(|| Task::ForceFlush(TaskSelector::ByName(task.to_owned()), tx.clone())); + drop(tx); + + async move { + while let Some(res) = tokio::time::timeout(Duration::from_secs(30), rx.recv()) + .await + .expect("flush not finish after 30s") + { + if let Some(ref err) = res.error { + panic!("failed to flush: {}", err) + } + } + } } pub fn run(&self, mut t: impl FnMut() -> Task) { From 2f10d3f6ce98e5a06956605cf6c8a7035eb68785 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Thu, 9 Jan 2025 22:48:37 +0800 Subject: [PATCH 76/86] support ignore keys by ts during download (#17951) close tikv/tikv#18105, ref pingcap/tidb#58238 Adapt ignore rules to make the download can skip some keys larger then specify timestamp Signed-off-by: 3pointer Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- components/sst_importer/src/metrics.rs | 7 +- components/sst_importer/src/sst_importer.rs | 240 +++++++++++++++++++- 3 files changed, 246 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 940225bd459..1e5f2dcbd1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3743,7 +3743,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#c35d2b41011503a386db9ae2b7f4bcec653dab61" +source = "git+https://github.com/pingcap/kvproto.git#3b77f2c65c635567751d27afd6c58c7958645785" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/sst_importer/src/metrics.rs b/components/sst_importer/src/metrics.rs index 0b7b1f4ff70..f8785f721b2 100644 --- a/components/sst_importer/src/metrics.rs +++ b/components/sst_importer/src/metrics.rs @@ -104,7 +104,12 @@ lazy_static! { .unwrap(); pub static ref INPORTER_APPLY_COUNT: IntCounterVec = register_int_counter_vec!( "tikv_import_apply_count", - "Bucketed histogram of importer apply count", + "The operations of importer apply keys", + &["type"] + ).unwrap(); + pub static ref INPORTER_DOWNLOAD_COMPACT_KEYS_COUNT: IntCounterVec = register_int_counter_vec!( + "tikv_import_download_compact_keys_count", + "The operations of importer download keys from compacted SST files", &["type"] ).unwrap(); pub static ref EXT_STORAGE_CACHE_COUNT: IntCounterVec = register_int_counter_vec!( diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index b40bf442aea..bde5a205f3f 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -1231,6 +1231,8 @@ impl SstImporter { let direct_retval = (|| -> Result> { if rewrite_rule.old_key_prefix != rewrite_rule.new_key_prefix || rewrite_rule.new_timestamp != 0 + || rewrite_rule.ignore_after_timestamp != 0 + || rewrite_rule.ignore_before_timestamp != 0 { // must iterate if we perform key rewrite return Ok(None); @@ -1357,6 +1359,29 @@ impl SstImporter { let mut value = Cow::Borrowed(iter.value()); + if rewrite_rule.ignore_after_timestamp != 0 { + let ts = Key::decode_ts_from(iter.key())?; + if ts > TimeStamp::new(rewrite_rule.ignore_after_timestamp) { + iter.next()?; + INPORTER_DOWNLOAD_COMPACT_KEYS_COUNT + .with_label_values(&["after"]) + .inc(); + continue; + } + } + if rewrite_rule.ignore_before_timestamp != 0 { + // Let the client decide the ts here for default/write CF. + // Normally the ts in default CF is less than the ts in write CF. + let ts = Key::decode_ts_from(iter.key())?; + if ts < TimeStamp::new(rewrite_rule.ignore_before_timestamp) { + iter.next()?; + INPORTER_DOWNLOAD_COMPACT_KEYS_COUNT + .with_label_values(&["before"]) + .inc(); + continue; + } + } + if rewrite_rule.new_timestamp != 0 { data_key = Key::from_encoded(data_key) .truncate_ts() @@ -1369,7 +1394,7 @@ impl SstImporter { })? .append_ts(TimeStamp::new(rewrite_rule.new_timestamp)) .into_encoded(); - if meta.get_cf_name() == CF_WRITE { + if cf_name == CF_WRITE { let mut write = WriteRef::parse(iter.value()).map_err(|e| { Error::BadFormat(format!( "write {}: {}", @@ -2140,6 +2165,19 @@ mod tests { Ok((ext_sst_dir, backend, meta)) } + fn new_compacted_file_rewrite_rule( + old_key_prefix: &[u8], + new_key_prefix: &[u8], + new_timestamp: u64, + ignore_before_timestamp: u64, + ignore_after_timestamp: u64, + ) -> RewriteRule { + let mut rule = new_rewrite_rule(old_key_prefix, new_key_prefix, new_timestamp); + rule.ignore_before_timestamp = ignore_before_timestamp; + rule.ignore_after_timestamp = ignore_after_timestamp; + rule + } + fn new_rewrite_rule( old_key_prefix: &[u8], new_key_prefix: &[u8], @@ -2852,6 +2890,206 @@ mod tests { ] ); } + #[test] + fn test_download_compacted_sst_with_key_rewrite_ts_default() { + // performs the download. + let importer_dir = tempfile::tempdir().unwrap(); + let cfg = Config::default(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); + + // creates a sample SST file. + let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_default().unwrap(); + let db = create_sst_test_engine().unwrap(); + let downloads = vec![ + ( + // no filter + new_compacted_file_rewrite_rule(b"t123", b"t567", 0, 0, 0), + vec![ + (get_encoded_key(b"t567_r01", 1), b"abc".to_vec()), + (get_encoded_key(b"t567_r04", 3), b"xyz".to_vec()), + (get_encoded_key(b"t567_r07", 7), b"pqrst".to_vec()), + ], + ), + ( + // filter key between ts range [2, 6], + new_compacted_file_rewrite_rule(b"t123", b"t123", 0, 2, 6), + vec![(get_encoded_key(b"t123_r04", 3), b"xyz".to_vec())], + ), + ( + // filter key between ts range [7, 18] + new_compacted_file_rewrite_rule(b"t123", b"t567", 0, 7, 18), + vec![(get_encoded_key(b"t567_r07", 7), b"pqrst".to_vec())], + ), + ]; + for case in downloads { + let _ = importer + .download( + &meta, + &backend, + "sample_default.sst", + &case.0, + None, + Limiter::new(f64::INFINITY), + db.clone(), + ) + .unwrap() + .unwrap(); + + // verifies that the file is saved to the correct place. + // (the file size may be changed, so not going to check the file size) + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; + assert!(sst_file_path.is_file()); + + // verifies the SST content is correct. + let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); + sst_reader.verify_checksum().unwrap(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); + iter.seek_to_first().unwrap(); + assert_eq!(collect(iter), case.1); + } + } + + #[test] + fn test_download_compacted_sst_with_key_rewrite_ts_write() { + // performs the download. + let importer_dir = tempfile::tempdir().unwrap(); + let cfg = Config::default(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); + + // creates a sample SST file. + let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_write().unwrap(); + let db = create_sst_test_engine().unwrap(); + let downloads = vec![ + ( + // filter key between ts range [4, 8] + new_compacted_file_rewrite_rule(b"t123", b"t567", 0, 4, 8), + vec![ + ( + get_encoded_key(b"t567_r01", 5), + get_write_value(WriteType::Put, 1, None), + ), + ( + get_encoded_key(b"t567_r02", 5), + get_write_value(WriteType::Delete, 1, None), + ), + ( + get_encoded_key(b"t567_r04", 4), + get_write_value(WriteType::Put, 3, None), + ), + ( + get_encoded_key(b"t567_r07", 8), + get_write_value(WriteType::Put, 7, None), + ), + ( + get_encoded_key(b"t567_r13", 8), + get_write_value(WriteType::Put, 7, Some(b"www".to_vec())), + ), + ], + ), + ( + // filter key between ts range [5, 6] + new_compacted_file_rewrite_rule(b"t123", b"t567", 0, 5, 6), + vec![ + ( + get_encoded_key(b"t567_r01", 5), + get_write_value(WriteType::Put, 1, None), + ), + ( + get_encoded_key(b"t567_r02", 5), + get_write_value(WriteType::Delete, 1, None), + ), + ], + ), + ( + // filter key between ts range [4, 5] + new_compacted_file_rewrite_rule(b"t123", b"t567", 0, 4, 5), + vec![ + ( + get_encoded_key(b"t567_r01", 5), + get_write_value(WriteType::Put, 1, None), + ), + ( + get_encoded_key(b"t567_r02", 5), + get_write_value(WriteType::Delete, 1, None), + ), + ( + get_encoded_key(b"t567_r04", 4), + get_write_value(WriteType::Put, 3, None), + ), + ], + ), + ( + // no filter + new_compacted_file_rewrite_rule(b"t123", b"t567", 0, 0, 0), + vec![ + ( + get_encoded_key(b"t567_r01", 5), + get_write_value(WriteType::Put, 1, None), + ), + ( + get_encoded_key(b"t567_r02", 5), + get_write_value(WriteType::Delete, 1, None), + ), + ( + get_encoded_key(b"t567_r04", 4), + get_write_value(WriteType::Put, 3, None), + ), + ( + get_encoded_key(b"t567_r07", 8), + get_write_value(WriteType::Put, 7, None), + ), + ( + get_encoded_key(b"t567_r13", 8), + get_write_value(WriteType::Put, 7, Some(b"www".to_vec())), + ), + ], + ), + ( + // no rewrite rule, but has filter ts range [5, 5] + new_compacted_file_rewrite_rule(b"t123", b"t123", 0, 5, 5), + vec![ + ( + get_encoded_key(b"t123_r01", 5), + get_write_value(WriteType::Put, 1, None), + ), + ( + get_encoded_key(b"t123_r02", 5), + get_write_value(WriteType::Delete, 1, None), + ), + ], + ), + ]; + for case in downloads { + let _ = importer + .download( + &meta, + &backend, + "sample_write.sst", + &case.0, + None, + Limiter::new(f64::INFINITY), + db.clone(), + ) + .unwrap() + .unwrap(); + + // verifies that the file is saved to the correct place. + // (the file size may be changed, so not going to check the file size) + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; + assert!(sst_file_path.is_file()); + + // verifies the SST content is correct. + let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); + sst_reader.verify_checksum().unwrap(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); + iter.seek_to_first().unwrap(); + assert_eq!(collect(iter), case.1); + } + } #[test] fn test_download_sst_with_key_rewrite_ts_write() { From 7a5a4a1e42998a7fa64428b8f6ac1235488236af Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:48:34 +0800 Subject: [PATCH 77/86] gc_worker: Do not do delete_files_in_range on lock cf which has potential risk to affect data correctness (#18092) close tikv/tikv#18091 gc_worker: Do not do delete_files_in_range on lock cf which has potential risk to affect data correctness Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/gc_worker/gc_worker.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 8689b958e3d..3fff47fab42 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -755,11 +755,14 @@ impl GcRunnerCore { let start_data_key = keys::data_key(start_key.as_encoded()); let end_data_key = keys::data_end_key(end_key.as_encoded()); + let unsafe_delete_cfs = &[CF_DEFAULT, CF_WRITE]; let cfs = &[CF_LOCK, CF_DEFAULT, CF_WRITE]; // First, use DeleteStrategy::DeleteFiles to free as much disk space as possible + // CF_LOCK is not proper to be handled in DeleteFiles mode because it might make + // some deleted locks show up again, and has risk to affect data correctness. let delete_files_start_time = Instant::now(); - for cf in cfs { + for cf in unsafe_delete_cfs { local_storage .delete_ranges_cf( &WriteOptions::default(), From 2f3f32d9a0de2ebdb987c00b2419761cfbda4556 Mon Sep 17 00:00:00 2001 From: ekexium Date: Mon, 13 Jan 2025 12:10:28 +0800 Subject: [PATCH 78/86] resolved-ts: skip updating last_pd_tso if tso fetch fails (#17990) close tikv/tikv#17989 If tso fetch fails, skip updating last_pd_tso. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/advance.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 9b7def6d8d8..a20e5c6b80d 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -106,10 +106,12 @@ impl AdvanceTsWorker { let last_pd_tso = self.last_pd_tso.clone(); let fut = async move { - // Ignore get tso errors since we will retry every `advdance_ts_interval`. + // Ignore get tso errors since we will retry every `advance_ts_interval`. let mut min_ts = pd_client.get_tso().await.unwrap_or_default(); if let Ok(mut last_pd_tso) = last_pd_tso.try_lock() { - *last_pd_tso = Some((min_ts, Instant::now())); + if !min_ts.is_zero() { + *last_pd_tso = Some((min_ts, Instant::now())); + } } let mut ts_source = TsSource::PdTso; From 8fc44fe4f93c6989e69c6d96e496a49bdf4da3f6 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Mon, 13 Jan 2025 18:13:25 +0800 Subject: [PATCH 79/86] txn: fix duplicate locks returned in scan locks (#18115) close tikv/tikv#16818 Fix duplicated keys returned scanning locks. Signed-off-by: cfzjywxk --- components/raftstore/src/store/fsm/peer.rs | 4 + components/test_raftstore/src/cluster.rs | 11 +++ components/test_raftstore/src/util.rs | 9 +- src/storage/mvcc/reader/reader.rs | 22 +++-- tests/failpoints/cases/test_kv_service.rs | 108 ++++++++++++++++++++- 5 files changed, 142 insertions(+), 12 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 54cef0ab58f..1f09e67edad 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3820,6 +3820,10 @@ where None => { self.propose_pending_batch_raft_command(); if self.propose_locks_before_transfer_leader(msg) { + fail_point!( + "finish_proposing_transfer_cmd_after_proposing_locks", + |_| {} + ); // If some pessimistic locks are just proposed, we propose another // TransferLeader command instead of transferring leader immediately. info!("propose transfer leader command"; diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 0a165ec0528..374533b427c 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1465,6 +1465,17 @@ impl Cluster { .unwrap() } + pub fn try_transfer_leader_with_timeout( + &mut self, + region_id: u64, + leader: metapb::Peer, + timeout: Duration, + ) -> Result { + let epoch = self.get_region_epoch(region_id); + let transfer_leader = new_admin_request(region_id, &epoch, new_transfer_leader_cmd(leader)); + self.call_command_on_leader(transfer_leader, timeout) + } + pub fn get_snap_dir(&self, node_id: u64) -> String { self.sim.rl().get_snap_dir(node_id) } diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index f6f513b767b..1cd4a17d730 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1305,8 +1305,13 @@ pub fn kv_pessimistic_lock_with_ttl( } pub fn must_kv_pessimistic_lock(client: &TikvClient, ctx: Context, key: Vec, ts: u64) { - let resp = kv_pessimistic_lock(client, ctx, vec![key], ts, ts, false); - assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + let resp = kv_pessimistic_lock(client, ctx.clone(), vec![key], ts, ts, false); + assert!( + !resp.has_region_error(), + "{:?}, ctx:{:?}", + resp.get_region_error(), + ctx + ); assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 454250737be..5e26f964521 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -350,30 +350,40 @@ impl MvccReader { Ok(None) }; - let mut locks = Vec::with_capacity(limit.min(memory_locks.len())); + let mut locks: Vec<(Key, Lock)> = Vec::with_capacity(limit.min(memory_locks.len())); let mut memory_iter = memory_locks.into_iter(); let mut memory_pair = memory_iter.next(); let mut storage_pair = next_pair_from_storage()?; let has_remain = loop { - match (memory_pair.as_ref(), storage_pair.as_ref()) { + let next_key = match (memory_pair.as_ref(), storage_pair.as_ref()) { (Some((memory_key, _)), Some((storage_key, _))) => { if storage_key <= memory_key { - locks.push(storage_pair.take().unwrap()); + let next_key = storage_pair.take().unwrap(); storage_pair = next_pair_from_storage()?; + next_key } else { - locks.push(memory_pair.take().unwrap()); + let next_key = memory_pair.take().unwrap(); memory_pair = memory_iter.next(); + next_key } } (Some(_), None) => { - locks.push(memory_pair.take().unwrap()); + let next_key = memory_pair.take().unwrap(); memory_pair = memory_iter.next(); + next_key } (None, Some(_)) => { - locks.push(storage_pair.take().unwrap()); + let next_key = storage_pair.take().unwrap(); storage_pair = next_pair_from_storage()?; + next_key } (None, None) => break memory_has_remain, + }; + // The same key could exist in both memory and storage when there is ongoing + // leader transfer, split or merge on this region. In this case, duplicated + // keys should be ignored. + if locks.is_empty() || locks.last().unwrap().0 != next_key.0 { + locks.push(next_key); } if limit > 0 && locks.len() >= limit { break memory_pair.is_some() || storage_pair.is_some() || memory_has_remain; diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index c8777282787..c51b6232ba6 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -2,18 +2,20 @@ use std::{sync::Arc, time::Duration}; +use engine_traits::{Peekable, CF_LOCK}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::{PrewriteRequestPessimisticAction::SkipPessimisticCheck, *}, tikvpb::TikvClient, }; use test_raftstore::{ - configure_for_lease_read, must_kv_commit, must_kv_have_locks, must_kv_prewrite, - must_kv_prewrite_with, must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with, - try_kv_prewrite_with_impl, + configure_for_lease_read, must_kv_commit, must_kv_have_locks, must_kv_pessimistic_lock, + must_kv_prewrite, must_kv_prewrite_with, must_new_cluster_and_kv_client_mul, + must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with, try_kv_prewrite_with_impl, }; use test_raftstore_macro::test_case; -use tikv_util::{config::ReadableDuration, HandyRwLock}; +use tikv_util::{config::ReadableDuration, store::new_peer, HandyRwLock}; +use txn_types::Key; #[test_case(test_raftstore::must_new_cluster_and_kv_client)] #[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] @@ -270,3 +272,101 @@ fn test_storage_do_not_update_txn_status_cache_on_write_error() { must_kv_have_locks(&client, ctx, 29, b"k2", b"k3", &[(b"k2", Op::Put, 20, 20)]); fail::remove(cache_hit_fp); } + +#[test] +fn test_scan_locks_with_in_progress_transfer_leader() { + let (mut cluster, _, mut ctx) = must_new_cluster_and_kv_client_mul(3); + cluster.pd_client.disable_default_operator(); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + let leader_peer = cluster.leader_of_region(1).unwrap(); + ctx.set_peer(leader_peer.clone()); + let k1 = b"k1"; + let k2 = b"k2"; + let leader_region = cluster.get_region(k1); + ctx.set_region_epoch(leader_region.get_region_epoch().clone()); + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader_peer.get_store_id())); + let client = TikvClient::new(channel); + + // Create both pessimistic locks. + let start_ts = 10; + must_kv_pessimistic_lock(&client, ctx.clone(), k1.to_vec(), start_ts); + must_kv_pessimistic_lock(&client, ctx.clone(), k2.to_vec(), start_ts); + + // Ensure the pessimistic locks are written to the memory but not the storage. + let engine = cluster.get_engine(leader_peer.get_store_id()); + let cf_res = engine + .get_value_cf( + CF_LOCK, + keys::data_key(Key::from_raw(k1).as_encoded()).as_slice(), + ) + .unwrap(); + assert!(cf_res.is_none()); + let cf_res = engine + .get_value_cf( + CF_LOCK, + keys::data_key(Key::from_raw(k2).as_encoded()).as_slice(), + ) + .unwrap(); + assert!(cf_res.is_none()); + + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx.clone()); + scan_lock_req.max_version = start_ts + 10; + scan_lock_req.limit = 256; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req.clone()).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.get_locks().len(), 2); + assert_eq!(scan_lock_resp.locks.to_vec()[0].lock_version, start_ts); + assert_eq!(scan_lock_resp.locks.to_vec()[0].key, k1); + assert_eq!(scan_lock_resp.locks.to_vec()[1].lock_version, start_ts); + assert_eq!(scan_lock_resp.locks.to_vec()[1].key, k2); + + // Propose the transfer leader command but only trigger proposing pessimistic + // locks. + fail::cfg( + "finish_proposing_transfer_cmd_after_proposing_locks", + "return", + ) + .unwrap(); + let _ = cluster.try_transfer_leader_with_timeout(1, new_peer(2, 2), Duration::from_secs(1)); + + // Verify locks exist both in memory and storage. + let timer = tikv_util::time::Instant::now(); + let timeout = Duration::from_secs(5); + loop { + let cf_res = engine + .get_value_cf( + CF_LOCK, + keys::data_key(Key::from_raw(k1).as_encoded()).as_slice(), + ) + .unwrap(); + if cf_res.is_some() { + break; + } + std::thread::sleep(Duration::from_secs(1)); + if timer.saturating_elapsed() >= timeout { + assert!(cf_res.is_some()); + break; + } + } + let snapshot = cluster.must_get_snapshot_of_region(1); + let txn_ext = snapshot.txn_ext.unwrap(); + let guard = txn_ext.pessimistic_locks.read(); + assert!(guard.get(&Key::from_raw(k1)).is_some()); + assert!(guard.get(&Key::from_raw(k2)).is_some()); + drop(guard); + + fail::remove("finish_proposing_transfer_cmd_after_proposing_locks"); + + // Verify there should be no duplicate locks returned. + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req.clone()).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.locks.len(), 2); + assert_eq!(scan_lock_resp.locks.to_vec()[0].lock_version, start_ts); + assert_eq!(scan_lock_resp.locks.to_vec()[0].key, k1); + assert_eq!(scan_lock_resp.locks.to_vec()[1].lock_version, start_ts); + assert_eq!(scan_lock_resp.locks.to_vec()[1].key, k2); +} From 85d583c835d71459f996309eeb10d8282f5d1df1 Mon Sep 17 00:00:00 2001 From: zyguan Date: Tue, 14 Jan 2025 14:32:19 +0800 Subject: [PATCH 80/86] txn: prevent 1pc locks from being skipped when reading with max-ts (#18095) close tikv/tikv#18117 Introduce a new field `use_one_pc` to the `Lock` struct to indicate whether the txn uses 1pc, and use it to prevent locks from being skipped when reading with max-ts. Signed-off-by: zyguan --- components/txn_types/src/lock.rs | 12 ++++- src/storage/txn/actions/prewrite.rs | 66 +++++++++++++++++++++++++++- src/storage/txn/commands/prewrite.rs | 3 +- 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 26dc493262e..98650f1a8e4 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -84,6 +84,10 @@ pub struct Lock { pub txn_size: u64, pub min_commit_ts: TimeStamp, pub use_async_commit: bool, + // This field is only valid for in-memory locks and does not need to be persisted because: + // 1. the lock should be converted to a write directly when 1pc succeeds. + // 2. the field should be reverted to false (default value) when 1pc fails. + pub use_one_pc: bool, // Only valid when `use_async_commit` is true, and the lock is primary. Do not set // `secondaries` for secondaries. pub secondaries: Vec>, @@ -171,6 +175,7 @@ impl Lock { txn_size, min_commit_ts, use_async_commit: false, + use_one_pc: false, secondaries: Vec::default(), rollback_ts: Vec::default(), last_change: LastChange::default(), @@ -489,7 +494,11 @@ impl Lock { ))); } - if ts == TimeStamp::max() && raw_key == lock.primary && !lock.use_async_commit { + if ts == TimeStamp::max() + && raw_key == lock.primary + && !lock.use_async_commit + && !lock.use_one_pc + { // When `ts == TimeStamp::max()` (which means to get latest committed version // for primary key), and current key is the primary key, we ignore // this lock. @@ -1326,6 +1335,7 @@ mod tests { txn_size: 0, min_commit_ts: 20.into(), use_async_commit: false, + use_one_pc: false, secondaries: vec![], rollback_ts: vec![], last_change: LastChange::make_exist(8.into(), 2), diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 9e95a3ef63f..b618f63892b 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -618,6 +618,10 @@ impl<'a> PrewriteMutation<'a> { if let Some(secondary_keys) = self.secondary_keys { lock.use_async_commit = true; lock.secondaries = secondary_keys.to_owned(); + } else if try_one_pc { + // Set `use_one_pc` to true to prevent the in-memory lock from being skipped + // when reading with max-ts. + lock.use_one_pc = true; } let final_min_commit_ts = if lock.use_async_commit || try_one_pc { @@ -632,6 +636,7 @@ impl<'a> PrewriteMutation<'a> { fail_point!("after_calculate_min_commit_ts"); if let Err(Error(box ErrorInner::CommitTsTooLarge { .. })) = &res { try_one_pc = false; + lock.use_one_pc = false; lock.use_async_commit = false; lock.secondaries = Vec::new(); } @@ -916,6 +921,8 @@ fn amend_pessimistic_lock( } pub mod tests { + #[cfg(test)] + use std::borrow::Cow; #[cfg(test)] use std::sync::Arc; @@ -926,7 +933,7 @@ pub mod tests { #[cfg(test)] use tikv_kv::RocksEngine; #[cfg(test)] - use txn_types::OldValue; + use txn_types::{OldValue, TsSet}; use super::*; #[cfg(test)] @@ -1275,6 +1282,8 @@ pub mod tests { // success 1pc prewrite needs to be transformed to locks assert!(!must_locked(&mut engine, b"k1", 10).use_async_commit); assert!(!must_locked(&mut engine, b"k2", 10).use_async_commit); + assert!(!must_locked(&mut engine, b"k1", 10).use_one_pc); + assert!(!must_locked(&mut engine, b"k2", 10).use_one_pc); } pub fn try_pessimistic_prewrite_check_not_exists( @@ -2757,4 +2766,59 @@ pub mod tests { prewrite_err(&mut engine, key, value, key, 120, 130, Some(130)); must_unlocked(&mut engine, key); } + + #[test] + fn test_1pc_set_lock_use_one_pc() { + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let cm = ConcurrencyManager::new(42.into()); + + let snapshot = engine.snapshot(Default::default()).unwrap(); + + let mut txn = MvccTxn::new(10.into(), cm.clone()); + let mut reader = SnapshotReader::new(10.into(), snapshot, false); + + let k1 = b"k1"; + let k2 = b"k2"; + + prewrite( + &mut txn, + &mut reader, + &optimistic_async_props(k1, 10.into(), 50.into(), 2, true), + Mutation::make_put(Key::from_raw(k1), b"v1".to_vec()), + &None, + SkipPessimisticCheck, + None, + ) + .unwrap(); + prewrite( + &mut txn, + &mut reader, + &optimistic_async_props(k1, 10.into(), 50.into(), 1, true), + Mutation::make_put(Key::from_raw(k2), b"v2".to_vec()), + &None, + SkipPessimisticCheck, + None, + ) + .unwrap(); + + // lock.use_one_pc should be set to true when using 1pc. + assert_eq!(txn.guards.len(), 2); + txn.guards[0].with_lock(|l| assert!(l.as_ref().unwrap().use_one_pc)); + txn.guards[1].with_lock(|l| assert!(l.as_ref().unwrap().use_one_pc)); + + // read with max_ts should be blocked by the lock. + for &key in &[k1, k2] { + let k = Key::from_raw(key); + let res = cm.read_key_check(&k, |l| { + Lock::check_ts_conflict( + Cow::Borrowed(l), + &k, + TimeStamp::max(), + &TsSet::Empty, + crate::storage::IsolationLevel::Si, + ) + }); + assert!(res.is_err()); + } + } } diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 861083fb117..d9c7a65921b 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -988,7 +988,8 @@ fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { /// Change all 1pc locks in txn to 2pc locks. pub(in crate::storage::txn) fn fallback_1pc_locks(txn: &mut MvccTxn) { - for (key, lock, remove_pessimistic_lock) in std::mem::take(&mut txn.locks_for_1pc) { + for (key, mut lock, remove_pessimistic_lock) in std::mem::take(&mut txn.locks_for_1pc) { + lock.use_one_pc = false; let is_new_lock = !remove_pessimistic_lock; txn.put_lock(key, &lock, is_new_lock); } From c9c91b6f0767003d7354eb51398bea3151d6b6be Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 14 Jan 2025 16:16:58 +0800 Subject: [PATCH 81/86] *: extend task wait metrics and add configuration description (#18099) ref tikv/tikv#15990 * Increase task wait metrics upper limit from 2.5s to 42s to capture long task wait records that are crucial for investigating high latency issues * Add description for end-point-memory-quota configuration Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/metrics.rs | 2 +- components/tikv_util/src/yatp_pool/metrics.rs | 2 +- etc/config-template.toml | 12 ++++++++---- src/config/mod.rs | 1 + src/server/config.rs | 3 ++- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/components/resource_control/src/metrics.rs b/components/resource_control/src/metrics.rs index 594c6af486a..69ae0542a26 100644 --- a/components/resource_control/src/metrics.rs +++ b/components/resource_control/src/metrics.rs @@ -38,7 +38,7 @@ lazy_static! { "tikv_resource_control_priority_wait_duration", "Histogram of wait duration cause by priority quota limiter", &["priority"], - exponential_buckets(1e-5, 2.0, 18).unwrap() // 10us ~ 2.5s + exponential_buckets(1e-5, 2.0, 22).unwrap() // 10us ~ 42s ) .unwrap(); diff --git a/components/tikv_util/src/yatp_pool/metrics.rs b/components/tikv_util/src/yatp_pool/metrics.rs index a3e68b260db..53ab05e86b5 100644 --- a/components/tikv_util/src/yatp_pool/metrics.rs +++ b/components/tikv_util/src/yatp_pool/metrics.rs @@ -20,7 +20,7 @@ lazy_static! { "tikv_yatp_pool_schedule_wait_duration", "Histogram of yatp pool schedule wait duration.", &["name", "priority"], - exponential_buckets(1e-5, 2.0, 18).unwrap() // 10us ~ 2.5s + exponential_buckets(1e-5, 2.0, 22).unwrap() // 10us ~ 42s ) .unwrap(); } diff --git a/etc/config-template.toml b/etc/config-template.toml index 78aa3c31b77..f6c8427af75 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -237,9 +237,13 @@ ## Max time to handle Coprocessor requests before timeout. # end-point-request-max-handle-duration = "60s" +## Memory usage limit for TiKV handling coprocessor requests. +## By default, it will be set to 12.5% of the available memory of TiKV. +# end-point-memory-quota = "0B" + ## Max bytes that snapshot can interact with disk in one second. It should be ## set based on your disk performance. Only write flow is considered, if -## partiioned-raft-kv is used, read flow is also considered and it will be estimated +## partitioned-raft-kv is used, read flow is also considered and it will be estimated ## as read_size * 0.5 to get around errors from page cache. # snap-io-max-bytes-per-sec = "100MB" @@ -971,7 +975,7 @@ ## default: 0 # zstd-dict-size = 0 -## Whether to share blob cache with block cache. If set to true, Titan would use the shared block +## Whether to share blob cache with block cache. If set to true, Titan would use the shared block ## cache configured in `storage.block_cache` and ignore the setting of `blob-cache-size`. ## default: true # shared-blob-cache = true @@ -1220,13 +1224,13 @@ # ## Avoid outputing data (e.g. user keys) to info log. It currently does not avoid printing ## user data altogether, but greatly reduce those logs. -## +## ## Candidates: ## true | "on": Avoid outputing raw data to info log, raw data will be replaced with "?". ## false | "off": Output raw data to info log. ## "marker": Encapsulate the raw data with "‹..›" to info log. ## -## Default is false. +## Default is false. # redact-info-log = false ## Configurations for encryption at rest. Experimental. diff --git a/src/config/mod.rs b/src/config/mod.rs index b848dcefc22..60de30b2f83 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -7005,6 +7005,7 @@ mod tests { cfg.server.grpc_memory_pool_quota = default_cfg.server.grpc_memory_pool_quota; cfg.server.background_thread_count = default_cfg.server.background_thread_count; cfg.server.end_point_max_concurrency = default_cfg.server.end_point_max_concurrency; + cfg.server.end_point_memory_quota = default_cfg.server.end_point_memory_quota; cfg.storage.scheduler_worker_pool_size = default_cfg.storage.scheduler_worker_pool_size; cfg.rocksdb.max_background_jobs = default_cfg.rocksdb.max_background_jobs; cfg.rocksdb.max_background_flushes = default_cfg.rocksdb.max_background_flushes; diff --git a/src/server/config.rs b/src/server/config.rs index feba21a09f2..deb5676f9e9 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -40,7 +40,8 @@ const DEFAULT_ENDPOINT_REQUEST_MAX_HANDLE_SECS: u64 = 60; // Number of rows in each chunk for streaming coprocessor. const DEFAULT_ENDPOINT_STREAM_BATCH_ROW_LIMIT: usize = 128; -// By default, endpoint memory quota will be set to 12.5% of system memory. +// By default, endpoint memory quota will be set to 12.5% of the available +// memory of TiKV. // // TPCC check test shows that: // * The actual endpoint memory usage is about 3 times to memory quota. From a863bdb49b6d9fcf3b6f1a8c9d57eabff54937e8 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Thu, 16 Jan 2025 13:51:43 +0800 Subject: [PATCH 82/86] coprocessor: fix the check logic of external request (#18128) ref tikv/tikv#14474 Fix the request source check logic for external or internal Signed-off-by: cfzjywxk --- components/tracker/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index d03cd9502b2..5fbc9216048 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -107,7 +107,7 @@ impl RequestInfo { resource_group_tag: ctx.get_resource_group_tag().to_vec(), request_type, cid: 0, - is_external_req: ctx.get_request_source().starts_with("external"), + is_external_req: ctx.get_request_source().contains("external"), } } } From d4bc987ace0072862295ddfa3342c35b04d0f3ab Mon Sep 17 00:00:00 2001 From: lucasliang Date: Fri, 17 Jan 2025 14:19:55 +0800 Subject: [PATCH 83/86] rocksdb: resolve flow control issues caused by clock-skew problems. (#18102) close tikv/tikv#17995 Address clock-skew issues. Signed-off-by: lucasliang --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e5f2dcbd1a..2474f8093b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3814,7 +3814,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#2bb1e4e32b9e45cf3fd8210766a9db38eacd5e4d" +source = "git+https://github.com/tikv/rust-rocksdb.git#19eeae6dc7734af12475fbcf9d368168a0314085" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3833,7 +3833,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#2bb1e4e32b9e45cf3fd8210766a9db38eacd5e4d" +source = "git+https://github.com/tikv/rust-rocksdb.git#19eeae6dc7734af12475fbcf9d368168a0314085" dependencies = [ "bzip2-sys", "cc", @@ -5791,7 +5791,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#2bb1e4e32b9e45cf3fd8210766a9db38eacd5e4d" +source = "git+https://github.com/tikv/rust-rocksdb.git#19eeae6dc7734af12475fbcf9d368168a0314085" dependencies = [ "libc 0.2.151", "librocksdb_sys", diff --git a/Cargo.toml b/Cargo.toml index f3dd3734a05..db48dfefca4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ testexport = [ "engine_rocks/testexport", "engine_panic/testexport", "encryption/testexport", - "file_system/testexport" + "file_system/testexport", ] test-engine-kv-rocksdb = ["engine_test/test-engine-kv-rocksdb"] test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] From 18f44195c66bd75027c787fc746c5306db7bc64a Mon Sep 17 00:00:00 2001 From: Connor Date: Sat, 18 Jan 2025 08:23:58 +0800 Subject: [PATCH 84/86] *: Fix incorrect mapped allocation per thread metric (#18126) close tikv/tikv#18125 Fix incorrect mapped allocation per thread metric Not all thread builders are hooked by `thread_allocate_exclusive_arena`, so some threads are using shared arena, causing incorrect per thread allocation. Signed-off-by: Connor1996 --- components/batch-system/src/batch.rs | 1 - components/server/src/server.rs | 6 ++++-- components/server/src/server2.rs | 5 ++++- components/tikv_util/src/sys/thread.rs | 15 ++++++++------- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index 8cc873bd4b6..0b7f9f45489 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -583,7 +583,6 @@ where let t = thread::Builder::new() .name(name) .spawn_wrapper(move || { - tikv_alloc::thread_allocate_exclusive_arena().unwrap(); tikv_util::thread_group::set_properties(props); set_io_type(IoType::ForegroundWrite); poller.poll(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index b8ba53641ab..5cdc21609a4 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -122,7 +122,9 @@ use tikv::{ Engine, Storage, }, }; -use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; +use tikv_alloc::{ + add_thread_memory_accessor, remove_thread_memory_accessor, thread_allocate_exclusive_arena, +}; use tikv_util::{ check_environment_variables, config::VersionTrack, @@ -330,7 +332,7 @@ where // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. unsafe { add_thread_memory_accessor() }; - tikv_alloc::thread_allocate_exclusive_arena().unwrap(); + thread_allocate_exclusive_arena().unwrap(); }) .before_stop(|| { remove_thread_memory_accessor(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 5381a365de8..5b85a1c6842 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -109,7 +109,9 @@ use tikv::{ Engine, Storage, }, }; -use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; +use tikv_alloc::{ + add_thread_memory_accessor, remove_thread_memory_accessor, thread_allocate_exclusive_arena, +}; use tikv_util::{ check_environment_variables, config::VersionTrack, @@ -301,6 +303,7 @@ where // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. unsafe { add_thread_memory_accessor() }; + thread_allocate_exclusive_arena().unwrap(); }) .before_stop(|| { remove_thread_memory_accessor(); diff --git a/components/tikv_util/src/sys/thread.rs b/components/tikv_util/src/sys/thread.rs index 818d8795b31..9d119cc09c8 100644 --- a/components/tikv_util/src/sys/thread.rs +++ b/components/tikv_util/src/sys/thread.rs @@ -7,7 +7,9 @@ use std::{io, io::Result, sync::Mutex, thread}; use collections::HashMap; -use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; +use tikv_alloc::{ + add_thread_memory_accessor, remove_thread_memory_accessor, thread_allocate_exclusive_arena, +}; /// A cross-platform CPU statistics data structure. #[derive(Debug, Copy, Clone, Default, PartialEq)] @@ -430,6 +432,7 @@ impl StdThreadBuildWrapper for std::thread::Builder { call_thread_start_hooks(); // SAFETY: we will call `remove_thread_memory_accessor` at defer. unsafe { add_thread_memory_accessor() }; + thread_allocate_exclusive_arena().unwrap(); add_thread_name_to_map(); defer! {{ remove_thread_name_from_map(); @@ -452,9 +455,8 @@ impl ThreadBuildWrapper for tokio::runtime::Builder { // SAFETY: we will call `remove_thread_memory_accessor` at // `before-stop_wrapper`. // FIXME: What if the user only calls `after_start_wrapper`? - unsafe { - add_thread_memory_accessor(); - } + unsafe { add_thread_memory_accessor() }; + thread_allocate_exclusive_arena().unwrap(); add_thread_name_to_map(); start(); }) @@ -478,9 +480,8 @@ impl ThreadBuildWrapper for futures::executor::ThreadPoolBuilder { // SAFETY: we will call `remove_thread_memory_accessor` at // `before-stop_wrapper`. // FIXME: What if the user only calls `after_start_wrapper`? - unsafe { - add_thread_memory_accessor(); - } + unsafe { add_thread_memory_accessor() }; + thread_allocate_exclusive_arena().unwrap(); add_thread_name_to_map(); start(); }) From d43fea7acb639f16d87d6498449631a71071544e Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Sat, 18 Jan 2025 09:17:19 +0800 Subject: [PATCH 85/86] expr: support scalar function from_unixtime in tikv (#18112) close tikv/tikv#18111 Support scalar function from_unixtime in tikv Signed-off-by: wshwsh12 <793703860@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/codec/mysql/time/mod.rs | 13 ++ components/tidb_query_expr/src/impl_time.rs | 158 ++++++++++++++++++ components/tidb_query_expr/src/lib.rs | 2 + 3 files changed, 173 insertions(+) diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 97f756bc1fe..a537a6e7abe 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1484,6 +1484,19 @@ impl Time { Time::try_from_chrono_datetime(ctx, timestamp.naive_local(), time_type, fsp as i8) } + pub fn from_unixtime( + ctx: &mut EvalContext, + seconds: i64, + nanos: u32, + time_type: TimeType, + fsp: i8, + ) -> Result { + let timestamp = Utc.timestamp(seconds, nanos); + let timestamp = ctx.cfg.tz.from_utc_datetime(×tamp.naive_utc()); + let timestamp = timestamp.round_subsecs(fsp as u16); + Time::try_from_chrono_datetime(ctx, timestamp.naive_local(), time_type, fsp) + } + pub fn from_year( ctx: &mut EvalContext, year: u32, diff --git a/components/tidb_query_expr/src/impl_time.rs b/components/tidb_query_expr/src/impl_time.rs index 078cd2a85dd..8f226bc7900 100644 --- a/components/tidb_query_expr/src/impl_time.rs +++ b/components/tidb_query_expr/src/impl_time.rs @@ -1518,6 +1518,88 @@ pub fn sub_date_time_duration_interval_any_as_duration< ) } +#[rpn_fn(capture = [ctx, extra])] +#[inline] +pub fn from_unixtime_1_arg( + ctx: &mut EvalContext, + extra: &RpnFnCallExtra, + arg0: &Decimal, +) -> Result> { + eval_from_unixtime(ctx, extra.ret_field_type.get_decimal() as i8, *arg0) +} + +#[rpn_fn(capture = [ctx, extra])] +#[inline] +pub fn from_unixtime_2_arg( + ctx: &mut EvalContext, + extra: &RpnFnCallExtra, + arg0: &Decimal, + arg1: BytesRef, +) -> Result> { + let t = eval_from_unixtime(ctx, extra.ret_field_type.get_decimal() as i8, *arg0)?; + match t { + Some(t) => { + let res = t.date_format(std::str::from_utf8(arg1).map_err(Error::Encoding)?)?; + Ok(Some(res.into())) + } + None => Ok(None), + } +} + +// Port from TiDB's evalFromUnixTime +pub fn eval_from_unixtime( + ctx: &mut EvalContext, + mut fsp: i8, + unix_timestamp: Decimal, +) -> Result> { + // 0 <= unixTimeStamp <= 32536771199.999999 + if unix_timestamp.is_negative() { + return Ok(None); + } + let integral_part = unix_timestamp.as_i64().unwrap(); // Ignore Truncated error and Overflow error + // The max integralPart should not be larger than 32536771199. + // Refer to https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-28.html + if integral_part > 32536771199 { + return Ok(None); + } + // Split the integral part and fractional part of a decimal timestamp. + // e.g. for timestamp 12345.678, + // first get the integral part 12345, + // then (12345.678 - 12345) * (10^9) to get the decimal part and convert it to + // nanosecond precision. + let integer_decimal_tp = Decimal::from(integral_part); + let frac_decimal_tp = &unix_timestamp - &integer_decimal_tp; + if !frac_decimal_tp.is_ok() { + return Ok(None); + } + let frac_decimal_tp = frac_decimal_tp.unwrap(); + let nano = Decimal::from(NANOS_PER_SEC); + let x = &frac_decimal_tp * &nano; + if x.is_overflow() { + return Err(Error::overflow("DECIMAL", "").into()); + } + if x.is_truncated() { + return Err(Error::truncated().into()); + } + let x = x.unwrap(); + let fractional_part = x.as_i64(); // here fractionalPart is result multiplying the original fractional part by 10^9. + if fractional_part.is_overflow() { + return Err(Error::overflow("DECIMAL", "").into()); + } + let fractional_part = fractional_part.unwrap(); + if fsp < 0 { + fsp = MAX_FSP; + } + let tmp = DateTime::from_unixtime( + ctx, + integral_part, + fractional_part as u32, + TimeType::DateTime, + fsp, + )?; + Ok(Some(tmp)) +} + #[cfg(test)] mod tests { use std::{str::FromStr, sync::Arc}; @@ -3843,4 +3925,80 @@ mod tests { } } } + + #[test] + fn test_from_unixtime_1_arg() { + let cases = vec![ + (1451606400.0, 0, Some("2016-01-01 00:00:00")), + (1451606400.123456, 6, Some("2016-01-01 00:00:00.123456")), + (1451606400.999999, 6, Some("2016-01-01 00:00:00.999999")), + (1451606400.9999999, 6, Some("2016-01-01 00:00:01.000000")), + (1451606400.9999995, 6, Some("2016-01-01 00:00:01.000000")), + (1451606400.9999994, 6, Some("2016-01-01 00:00:00.999999")), + (1451606400.123, 3, Some("2016-01-01 00:00:00.123")), + (5000000000.0, 0, Some("2128-06-11 08:53:20")), + (32536771199.99999, 6, Some("3001-01-18 23:59:59.999990")), + (0.0, 6, Some("1970-01-01 00:00:00.000000")), + (-1.0, 6, None), + (32536771200.0, 6, None), + ]; + let mut ctx = EvalContext::default(); + for (datetime, fsp, expected) in cases { + let decimal = Decimal::from_f64(datetime).unwrap(); + let mut result_field_type: FieldType = FieldTypeTp::DateTime.into(); + result_field_type.set_decimal(fsp as i32); + + let (result, _) = RpnFnScalarEvaluator::new() + .push_param(decimal) + .evaluate_raw(result_field_type, ScalarFuncSig::FromUnixTime1Arg); + let output: Option = result.unwrap().into(); + + let expected = + expected.map(|arg1| DateTime::parse_datetime(&mut ctx, arg1, fsp, false).unwrap()); + assert_eq!(output, expected); + } + } + + #[test] + fn test_from_unixtime_2_arg() { + let cases = vec![ + ( + 1451606400.0, + "%Y %D %M %h:%i:%s %x", + 0, + Some("2016 1st January 12:00:00 2015"), + ), + ( + 1451606400.123456, + "%Y %D %M %h:%i:%s %x", + 6, + Some("2016 1st January 12:00:00 2015"), + ), + ( + 1451606400.999999, + "%Y %D %M %h:%i:%s %x", + 6, + Some("2016 1st January 12:00:00 2015"), + ), + ( + 1451606400.9999999, + "%Y %D %M %h:%i:%s %x", + 6, + Some("2016 1st January 12:00:01 2015"), + ), + ]; + for (datetime, format, fsp, expected) in cases { + let decimal = Decimal::from_f64(datetime).unwrap(); + let mut result_field_type: FieldType = FieldTypeTp::String.into(); + result_field_type.set_decimal(fsp); + let (result, _) = RpnFnScalarEvaluator::new() + .push_param(decimal) + .push_param(format) + .evaluate_raw(result_field_type, ScalarFuncSig::FromUnixTime2Arg); + let output: Option = result.unwrap().into(); + + let expected = expected.map(|str| str.as_bytes().to_vec()); + assert_eq!(output, expected); + } + } } diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 55cb4601f54..b47b0bdf63b 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -931,6 +931,8 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::SubDateDurationRealDatetime => sub_date_time_duration_interval_any_as_datetime_fn_meta::(), ScalarFuncSig::AddDateDurationDecimalDatetime => add_date_time_duration_interval_any_as_datetime_fn_meta::(), ScalarFuncSig::SubDateDurationDecimalDatetime => sub_date_time_duration_interval_any_as_datetime_fn_meta::(), + ScalarFuncSig::FromUnixTime1Arg => from_unixtime_1_arg_fn_meta(), + ScalarFuncSig::FromUnixTime2Arg => from_unixtime_2_arg_fn_meta(), _ => return Err(other_err!( "ScalarFunction {:?} is not supported in batch mode", value From c88260c276cc84029028726b60002c8898e856cc Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Mon, 20 Jan 2025 13:52:50 +0800 Subject: [PATCH 86/86] Support customized raft message rejection logic (#18114) close tikv/tikv#18113 Support customized raft message rejection logic Signed-off-by: Calvin Neo Signed-off-by: Calvin Neo Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: glorv --- components/server/src/server.rs | 5 +- components/server/src/server2.rs | 5 +- components/test_raftstore-v2/src/server.rs | 5 +- components/test_raftstore/src/server.rs | 5 +- src/server/metrics.rs | 5 ++ src/server/server.rs | 3 + src/server/service/kv.rs | 67 +++++++++++++++++++--- src/server/service/mod.rs | 4 +- tests/failpoints/cases/test_server.rs | 44 ++++++++++++++ 9 files changed, 128 insertions(+), 15 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 5cdc21609a4..a0ed481b92a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -102,7 +102,7 @@ use tikv::{ lock_manager::LockManager, raftkv::ReplicaReadLockChecker, resolve, - service::{DebugService, DiagnosticsService}, + service::{DebugService, DefaultGrpcMessageFilter, DiagnosticsService}, status_server::StatusServer, tablet_snap::NoSnapshotCache, ttl::TtlChecker, @@ -891,6 +891,9 @@ where debug_thread_pool, health_controller, self.resource_manager.clone(), + Arc::new(DefaultGrpcMessageFilter::new( + server_config.value().reject_messages_on_memory_ratio, + )), ) .unwrap_or_else(|e| fatal!("failed to create server: {}", e)); cfg_controller.register( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 5b85a1c6842..4c86db45845 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -91,7 +91,7 @@ use tikv::{ lock_manager::LockManager, raftkv::ReplicaReadLockChecker, resolve, - service::{DebugService, DiagnosticsService}, + service::{DebugService, DefaultGrpcMessageFilter, DiagnosticsService}, status_server::StatusServer, KvEngineFactoryBuilder, NodeV2, RaftKv2, Server, CPU_CORES_QUOTA_GAUGE, GRPC_THREAD_PREFIX, MEMORY_LIMIT_GAUGE, @@ -829,6 +829,9 @@ where debug_thread_pool, health_controller, self.resource_manager.clone(), + Arc::new(DefaultGrpcMessageFilter::new( + server_config.value().reject_messages_on_memory_ratio, + )), ) .unwrap_or_else(|e| fatal!("failed to create server: {}", e)); cfg_controller.register( diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 47830c77730..4c2906a9be9 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -61,7 +61,7 @@ use tikv::{ lock_manager::LockManager, raftkv::ReplicaReadLockChecker, resolve, - service::{DebugService, DiagnosticsService}, + service::{DebugService, DefaultGrpcMessageFilter, DiagnosticsService}, ConnectionBuilder, Error, Extension, NodeV2, PdStoreAddrResolver, RaftClient, RaftKv2, Result as ServerResult, Server, ServerTransport, }, @@ -644,6 +644,9 @@ impl ServerCluster { debug_thread_pool.clone(), health_controller.clone(), resource_manager.clone(), + Arc::new(DefaultGrpcMessageFilter::new( + server_cfg.value().reject_messages_on_memory_ratio, + )), ) .unwrap(); svr.register_service(create_diagnostics(diag_service.clone())); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index a50e226f640..a023f1541a2 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -67,7 +67,7 @@ use tikv::{ lock_manager::LockManager, raftkv::ReplicaReadLockChecker, resolve::{self, StoreAddrResolver}, - service::DebugService, + service::{DebugService, DefaultGrpcMessageFilter}, tablet_snap::NoSnapshotCache, ConnectionBuilder, Error, MultiRaftServer, PdStoreAddrResolver, RaftClient, RaftKv, Result as ServerResult, Server, ServerTransport, @@ -617,6 +617,9 @@ impl ServerCluster { debug_thread_pool.clone(), health_controller.clone(), resource_manager.clone(), + Arc::new(DefaultGrpcMessageFilter::new( + server_cfg.value().reject_messages_on_memory_ratio, + )), ) .unwrap(); svr.register_service(create_import_sst(import_service.clone())); diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 11ee6512831..3f3aa025f46 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -486,6 +486,11 @@ lazy_static! { "Count for rejected Raft append messages" ) .unwrap(); + pub static ref RAFT_SNAPSHOT_REJECTS: IntCounter = register_int_counter!( + "tikv_server_raft_snapshot_rejects", + "Count for rejected Raft snapshot messages" + ) + .unwrap(); pub static ref SNAP_LIMIT_TRANSPORT_BYTES_COUNTER: IntCounterVec = register_int_counter_vec!( "tikv_snapshot_limit_transport_bytes", "Total snapshot limit transport used", diff --git a/src/server/server.rs b/src/server/server.rs index e96fba10afd..ec88b776f4e 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -166,6 +166,7 @@ where debug_thread_pool: Arc, health_controller: HealthController, resource_manager: Option>, + raft_message_filter: Arc, ) -> Result { // A helper thread (or pool) for transport layer. let stats_pool = if cfg.value().stats_concurrency > 0 { @@ -211,6 +212,7 @@ where resource_manager, health_controller.clone(), health_feedback_interval, + raft_message_filter, ); let builder_factory = Box::new(BuilderFactory::new( kv_service, @@ -683,6 +685,7 @@ mod tests { debug_thread_pool, HealthController::new(), None, + Arc::new(DefaultGrpcMessageFilter::new(0.2)), ) .unwrap(); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 4dc65cec1b1..c6d1a3b2b8e 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -71,6 +71,41 @@ use crate::{ const GRPC_MSG_MAX_BATCH_SIZE: usize = 128; const GRPC_MSG_NOTIFY_SIZE: usize = 8; +pub trait RaftGrpcMessageFilter: Send + Sync { + fn should_reject_raft_message(&self, _: &RaftMessage) -> bool; + fn should_reject_snapshot(&self) -> bool; +} + +// The default filter is exported for other engines as reference. +#[derive(Clone)] +pub struct DefaultGrpcMessageFilter { + reject_messages_on_memory_ratio: f64, +} + +impl DefaultGrpcMessageFilter { + pub fn new(reject_messages_on_memory_ratio: f64) -> Self { + Self { + reject_messages_on_memory_ratio, + } + } +} + +impl RaftGrpcMessageFilter for DefaultGrpcMessageFilter { + fn should_reject_raft_message(&self, msg: &RaftMessage) -> bool { + fail::fail_point!("force_reject_raft_append_message", |_| true); + if msg.get_message().get_msg_type() == MessageType::MsgAppend { + needs_reject_raft_append(self.reject_messages_on_memory_ratio) + } else { + false + } + } + + fn should_reject_snapshot(&self) -> bool { + fail::fail_point!("force_reject_raft_snapshot_message", |_| true); + false + } +} + /// Service handles the RPC messages for the `Tikv` service. pub struct Service { cluster_id: u64, @@ -103,6 +138,8 @@ pub struct Service { health_controller: HealthController, health_feedback_interval: Option, health_feedback_seq: Arc, + + raft_message_filter: Arc, } impl Drop for Service { @@ -130,6 +167,7 @@ impl Clone for Service Service { resource_manager: Option>, health_controller: HealthController, health_feedback_interval: Option, + raft_message_filter: Arc, ) -> Self { let now_unix = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) @@ -174,6 +213,7 @@ impl Service { health_controller, health_feedback_interval, health_feedback_seq: Arc::new(AtomicU64::new(now_unix)), + raft_message_filter, } } @@ -181,7 +221,7 @@ impl Service { store_id: u64, ch: &E::RaftExtension, msg: RaftMessage, - reject: bool, + raft_msg_filter: &Arc, ) -> RaftStoreResult<()> { let to_store_id = msg.get_to_peer().get_store_id(); if to_store_id != store_id { @@ -190,8 +230,11 @@ impl Service { my_store_id: store_id, }); } - if reject && msg.get_message().get_msg_type() == MessageType::MsgAppend { - RAFT_APPEND_REJECTS.inc(); + + if raft_msg_filter.should_reject_raft_message(&msg) { + if msg.get_message().get_msg_type() == MessageType::MsgAppend { + RAFT_APPEND_REJECTS.inc(); + } let id = msg.get_region_id(); let peer_id = msg.get_message().get_from(); ch.report_reject_message(id, peer_id); @@ -753,16 +796,15 @@ impl Tikv for Service { let store_id = self.store_id; let ch = self.storage.get_engine().raft_extension(); - let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; + let ob = self.raft_message_filter.clone(); let res = async move { let mut stream = stream.map_err(Error::from); while let Some(msg) = stream.try_next().await? { RAFT_MESSAGE_RECV_COUNTER.inc(); - let reject = needs_reject_raft_append(reject_messages_on_memory_ratio); if let Err(err @ RaftStoreError::StoreNotMatch { .. }) = - Self::handle_raft_message(store_id, &ch, msg, reject) + Self::handle_raft_message(store_id, &ch, msg, &ob) { // Return an error here will break the connection, only do that for // `StoreNotMatch` to let tikv to resolve a correct address from PD @@ -807,7 +849,7 @@ impl Tikv for Service { let store_id = self.store_id; let ch = self.storage.get_engine().raft_extension(); - let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; + let ob = self.raft_message_filter.clone(); let res = async move { let mut stream = stream.map_err(Error::from); @@ -822,10 +864,10 @@ impl Tikv for Service { let len = batch_msg.get_msgs().len(); RAFT_MESSAGE_RECV_COUNTER.inc_by(len as u64); RAFT_MESSAGE_BATCH_SIZE.observe(len as f64); - let reject = needs_reject_raft_append(reject_messages_on_memory_ratio); + for msg in batch_msg.take_msgs().into_iter() { if let Err(err @ RaftStoreError::StoreNotMatch { .. }) = - Self::handle_raft_message(store_id, &ch, msg, reject) + Self::handle_raft_message(store_id, &ch, msg, &ob) { // Return an error here will break the connection, only do that for // `StoreNotMatch` to let tikv to resolve a correct address from PD @@ -862,6 +904,13 @@ impl Tikv for Service { stream: RequestStream, sink: ClientStreamingSink, ) { + if self.raft_message_filter.should_reject_snapshot() { + RAFT_SNAPSHOT_REJECTS.inc(); + let status = + RpcStatus::with_message(RpcStatusCode::UNAVAILABLE, "rejected by peer".to_string()); + ctx.spawn(sink.fail(status).map(|_| ())); + return; + }; let task = SnapTask::Recv { stream, sink }; if let Err(e) = self.snap_scheduler.schedule(task) { let err_msg = format!("{}", e); diff --git a/src/server/service/mod.rs b/src/server/service/mod.rs index 00369a4ceae..dc2c254afe2 100644 --- a/src/server/service/mod.rs +++ b/src/server/service/mod.rs @@ -10,8 +10,8 @@ pub use self::{ diagnostics::Service as DiagnosticsService, kv::{ batch_commands_request, batch_commands_response, future_flashback_to_version, - future_prepare_flashback_to_version, GrpcRequestDuration, MeasuredBatchResponse, - MeasuredSingleResponse, Service as KvService, + future_prepare_flashback_to_version, DefaultGrpcMessageFilter, GrpcRequestDuration, + MeasuredBatchResponse, MeasuredSingleResponse, RaftGrpcMessageFilter, Service as KvService, }, }; diff --git a/tests/failpoints/cases/test_server.rs b/tests/failpoints/cases/test_server.rs index dfbb883179c..c745442501e 100644 --- a/tests/failpoints/cases/test_server.rs +++ b/tests/failpoints/cases/test_server.rs @@ -156,3 +156,47 @@ fn test_serving_status() { thread::sleep(Duration::from_millis(200)); assert_eq!(check(), ServingStatus::Serving); } + +#[test] +fn test_raft_message_observer() { + let mut cluster = new_server_cluster(0, 3); + cluster.pd_client.disable_default_operator(); + let r1 = cluster.run_conf_change(); + + cluster.must_put(b"k1", b"v1"); + + fail::cfg("force_reject_raft_append_message", "return").unwrap(); + fail::cfg("force_reject_raft_snapshot_message", "return").unwrap(); + + cluster.pd_client.add_peer(r1, new_peer(2, 2)); + + std::thread::sleep(std::time::Duration::from_millis(500)); + + must_get_none(&cluster.get_engine(2), b"k1"); + + fail::remove("force_reject_raft_append_message"); + fail::remove("force_reject_raft_snapshot_message"); + + cluster.pd_client.must_have_peer(r1, new_peer(2, 2)); + cluster.pd_client.must_add_peer(r1, new_peer(3, 3)); + + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + fail::cfg("force_reject_raft_append_message", "return").unwrap(); + + let _ = cluster.async_put(b"k2", b"v2").unwrap(); + + std::thread::sleep(std::time::Duration::from_millis(500)); + + must_get_none(&cluster.get_engine(2), b"k2"); + must_get_none(&cluster.get_engine(3), b"k2"); + + fail::remove("force_reject_raft_append_message"); + + cluster.must_put(b"k3", b"v3"); + for id in 1..=3 { + must_get_equal(&cluster.get_engine(id), b"k3", b"v3"); + } + cluster.shutdown(); +}