Fix reconfiguring sub-replica causing data loss when myself change sh…

…ard_id (valkey-io#944) When reconfiguring sub-replica, there may a case that the sub-replica will use the old offset and win the election and cause the data loss if the old primary went down. In this case, sender is myself's primary, when executing updateShardId, not only the sender's shard_id is updated, but also the shard_id of myself is updated, casuing the subsequent areInSameShard check, that is, the full_sync_required check to fail. As part of the recent fix of valkey-io#885, the sub-replica needs to decide whether a full sync is required or not when switching shards. This shard membership check is supposed to be done against sub-replica's current shard_id, which however was lost in this code path. This then leads to sub-replica joining the other shard with a completely different and incorrect replication history. This is the only place where replicaof state can be updated on this path so the most natural fix would be to pull the chain replication reduction logic into this code block and before the updateShardId call. This one follow valkey-io#885 and closes valkey-io#942. Signed-off-by: Binbin <[email protected]> Co-authored-by: Ping Xie <[email protected]> Signed-off-by: Ping Xie <[email protected]>
PingXie · Sep 14, 2024 · e458f9c · e458f9c
1 parent d8a305c
commit e458f9c
Showing 1 changed file with 35 additions and 17 deletions.
diff --git a/tests/unit/cluster/replica-migration.tcl b/tests/unit/cluster/replica-migration.tcl
@@ -11,6 +11,14 @@ proc my_slot_allocation {masters replicas} {
     R [expr $masters-1] cluster addslots 0
 }
 
+proc get_my_primary_peer {srv_idx} {
+    set role_response [R $srv_idx role]
+    set primary_ip [lindex $role_response 1]
+    set primary_port [lindex $role_response 2]
+    set primary_peer "$primary_ip:$primary_port"
+    return $primary_peer
+}
+
 proc test_migrated_replica {type} {
     test "Migrated replica reports zero repl offset and rank, and fails to win election - $type" {
         # Write some data to primary 0, slot 1, make a small repl_offset.
@@ -190,7 +198,7 @@ proc test_nonempty_replica {type} {
         if {$type == "sigstop"} {
             resume_process $primary0_pid
 
-            # Waiting the old primary go online and become a replica.
+            # Wait for the old primary to go online and become a replica.
             wait_for_condition 1000 50 {
                 [s 0 role] eq {slave}
             } else {
@@ -208,14 +216,27 @@ start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout
     test_nonempty_replica "sigstop"
 } my_slot_allocation cluster_allocate_replicas ;# start_cluster
 
-start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} {
-    test_nonempty_replica "sigstop"
-} my_slot_allocation cluster_allocate_replicas ;# start_cluster
+proc test_sub_replica {type} {
+    test "Sub-replica reports zero repl offset and rank, and fails to win election - $type" {
+        # Write some data to primary 0, slot 1, make a small repl_offset.
+        for {set i 0} {$i < 1024} {incr i} {
+            R 0 incr key_991803
+        }
+        assert_equal {1024} [R 0 get key_991803]
 
-        # Record the current primary node, server 7 will be migrated later.
-        set old_role_response [R 7 role]
-        set old_primary_ip [lindex [R 7 role] 1]
-        set old_primary_port [lindex [R 7 role] 2]
+        # Write some data to primary 3, slot 0, make a big repl_offset.
+        for {set i 0} {$i < 10240} {incr i} {
+            R 3 incr key_977613
+        }
+        assert_equal {10240} [R 3 get key_977613]
+
+        R 3 config set cluster-replica-validity-factor 0
+        R 7 config set cluster-replica-validity-factor 0
+        R 3 config set cluster-allow-replica-migration yes
+        R 7 config set cluster-allow-replica-migration no
+
+        # 10s, make sure primary 0 will hang in the save.
+        R 0 config set rdb-key-save-delay 100000000
 
         # Move slot 0 from primary 3 to primary 0.
         set addr "[srv 0 host]:[srv 0 port]"
@@ -227,18 +248,16 @@ start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout
             fail "valkey-cli --cluster rebalance returns non-zero exit code, output below:\n$result"
         }
 
-        # Wait for server 7 role response to change.
+        # Make sure server 3 and server 7 becomes a replica of primary 0.
         wait_for_condition 1000 50 {
-            [R 7 role] ne $old_role_response
+            [get_my_primary_peer 3] eq $addr &&
+            [get_my_primary_peer 7] eq $addr
         } else {
             puts "R 3 role: [R 3 role]"
             puts "R 7 role: [R 7 role]"
-            fail "Server 7 role response has not changed"
+            fail "Server 3 and 7 role response has not changed"
         }
 
-        wait_for_cluster_propagation
-        wait_for_cluster_state "ok"
-
         # Make sure server 7 got a sub-replica log.
         verify_log_message -7 "*I'm a sub-replica!*" 0
 
@@ -316,9 +335,8 @@ start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout
 } my_slot_allocation cluster_allocate_replicas ;# start_cluster
 
 start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} {
-    test "valkey-cli make source node ignores NOREPLICAS error when doing the last CLUSTER SETSLOT" {
-        R 3 config set cluster-allow-replica-migration no
-        R 7 config set cluster-allow-replica-migration yes
+    test_nonempty_replica "sigstop"
+} my_slot_allocation cluster_allocate_replicas ;# start_cluster
 
         # Move slot 0 from primary 3 to primary 0.
         set addr "[srv 0 host]:[srv 0 port]"