From 9c25096ea2b37193cfc5033eb32cc49c79623d73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chrz=C4=85szcz?=
 <pawel.chrzaszcz@erlang-solutions.com>
Date: Mon, 8 Apr 2024 15:48:43 +0200
Subject: [PATCH 1/7] Make sure each node appears only once in discovery_nodes

Otherwise, if you move the node to another cluster, old cluster would
still connect to it, and the whole system would be in an inconsistent
state.

In particular, this change allows split-cluster rolling upgrade,
where a new cluster is gradually formed from the restarted nodes.
---
 src/mongoose_cets_discovery_rdbms.erl | 29 ++++++++++++++-------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/mongoose_cets_discovery_rdbms.erl b/src/mongoose_cets_discovery_rdbms.erl
index d5487bb287c..3e0042e81db 100644
--- a/src/mongoose_cets_discovery_rdbms.erl
+++ b/src/mongoose_cets_discovery_rdbms.erl
@@ -4,8 +4,8 @@
 -export([init/1, get_nodes/1]).
 
 %% these functions are exported for testing purposes only.
--export([select/1, insert_new/5, update_existing/4, delete_node_from_db/2]).
--ignore_xref([select/1, insert_new/5, update_existing/4, delete_node_from_db/2]).
+-export([select/1, insert_new/5, update_existing/3, delete_node_from_db/1]).
+-ignore_xref([select/1, insert_new/5, update_existing/3, delete_node_from_db/1]).
 
 -include("mongoose_logger.hrl").
 
@@ -73,11 +73,12 @@ try_register(ClusterName, Node, State = #{node_ip_binary := Address})
     NodeNum =
         case AlreadyRegistered of
             true ->
-                 update_existing(ClusterName, Node, Address, Timestamp),
+                 update_existing(Node, Address, Timestamp),
                  {value, {_, Num, _Addr, _TS}} = lists:keysearch(Node, 1, Rows),
                  Num;
             false ->
                  Num = first_free_num(lists:usort(Nums)),
+                 delete_node_from_db(Node), % Delete node if it was a member of another cluster
                  %% Could fail with duplicate node_num reason.
                  %% In this case just wait for the next get_nodes call.
                  case insert_new(ClusterName, Node, Num, Address, Timestamp) of
@@ -85,7 +86,7 @@ try_register(ClusterName, Node, State = #{node_ip_binary := Address})
                      {updated, 1} -> Num
                  end
         end,
-    RunCleaningResult = run_cleaning(ClusterName, Timestamp, Rows, State),
+    RunCleaningResult = run_cleaning(Timestamp, Rows, State),
     %% This could be used for debugging
     Info = #{already_registered => AlreadyRegistered, timestamp => Timestamp,
              address => Address,
@@ -95,12 +96,12 @@ try_register(ClusterName, Node, State = #{node_ip_binary := Address})
 skip_expired_nodes(Nodes, {removed, ExpiredNodes}) ->
     (Nodes -- ExpiredNodes).
 
-run_cleaning(ClusterName, Timestamp, Rows, State) ->
+run_cleaning(Timestamp, Rows, State) ->
     #{expire_time := ExpireTime, node_name_to_insert := CurrentNode} = State,
     ExpiredNodes = [DbNode || {DbNode, _Num, _Addr, DbTS} <- Rows,
                               is_expired(DbTS, Timestamp, ExpireTime),
                               DbNode =/= CurrentNode],
-    [delete_node_from_db(ClusterName, DbNode) || DbNode <- ExpiredNodes],
+    [delete_node_from_db(DbNode) || DbNode <- ExpiredNodes],
     case ExpiredNodes of
         [] -> ok;
         [_ | _] ->
@@ -122,9 +123,9 @@ prepare() ->
     mongoose_rdbms:prepare(cets_disco_insert_new, T,
                            [cluster_name, node_name, node_num, address, updated_timestamp], insert_new()),
     mongoose_rdbms:prepare(cets_disco_update_existing, T,
-                           [updated_timestamp, address, cluster_name, node_name], update_existing()),
+                           [updated_timestamp, address, node_name], update_existing()),
     mongoose_rdbms:prepare(cets_delete_node_from_db, T,
-                           [cluster_name, node_name], delete_node_from_db()).
+                           [node_name], delete_node_from_db()).
 
 select() ->
     <<"SELECT node_name, node_num, address, updated_timestamp FROM discovery_nodes WHERE cluster_name = ?">>.
@@ -141,16 +142,16 @@ insert_new(ClusterName, NodeName, NodeNum, Address, UpdatedTimestamp) ->
                            [ClusterName, NodeName, NodeNum, Address, UpdatedTimestamp]).
 
 update_existing() ->
-    <<"UPDATE discovery_nodes SET updated_timestamp = ?, address = ? WHERE cluster_name = ? AND node_name = ?">>.
+    <<"UPDATE discovery_nodes SET updated_timestamp = ?, address = ? WHERE node_name = ?">>.
 
-update_existing(ClusterName, NodeName, Address, UpdatedTimestamp) ->
-    mongoose_rdbms:execute(global, cets_disco_update_existing, [UpdatedTimestamp, Address, ClusterName, NodeName]).
+update_existing(NodeName, Address, UpdatedTimestamp) ->
+    mongoose_rdbms:execute(global, cets_disco_update_existing, [UpdatedTimestamp, Address, NodeName]).
 
 delete_node_from_db() ->
-    <<"DELETE FROM discovery_nodes WHERE cluster_name = ? AND node_name = ?">>.
+    <<"DELETE FROM discovery_nodes WHERE node_name = ?">>.
 
-delete_node_from_db(ClusterName, Node) ->
-    mongoose_rdbms:execute_successfully(global, cets_delete_node_from_db, [ClusterName, Node]).
+delete_node_from_db(Node) ->
+    mongoose_rdbms:execute_successfully(global, cets_delete_node_from_db, [Node]).
 
 %% in seconds
 timestamp() ->

From 2304e2c96856bf1c24df8bd83b0d1a843a7c731a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chrz=C4=85szcz?=
 <pawel.chrzaszcz@erlang-solutions.com>
Date: Mon, 8 Apr 2024 15:52:12 +0200
Subject: [PATCH 2/7] Set 'node_name' as the primary key in discovery_nodes.

This ensures consistency: each node appears only once.
---
 priv/mssql2012.sql | 2 +-
 priv/mysql.sql     | 2 +-
 priv/pg.sql        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/priv/mssql2012.sql b/priv/mssql2012.sql
index 8824014318c..e686afe1f4a 100644
--- a/priv/mssql2012.sql
+++ b/priv/mssql2012.sql
@@ -758,7 +758,7 @@ CREATE TABLE discovery_nodes (
     node_num INT NOT NULL,
     address varchar(250) NOT NULL DEFAULT '', -- empty means we should ask DNS
     updated_timestamp BIGINT NOT NULL, -- in seconds
-    PRIMARY KEY (cluster_name, node_name)
+    PRIMARY KEY (node_name)
 );
 CREATE UNIQUE INDEX i_discovery_nodes_node_num ON discovery_nodes(cluster_name, node_num);
 
diff --git a/priv/mysql.sql b/priv/mysql.sql
index a99f4a44a08..664b5bae0a1 100644
--- a/priv/mysql.sql
+++ b/priv/mysql.sql
@@ -547,7 +547,7 @@ CREATE TABLE discovery_nodes (
     node_num INT UNSIGNED NOT NULL,
     address varchar(250) NOT NULL DEFAULT '', -- empty means we should ask DNS
     updated_timestamp BIGINT NOT NULL, -- in seconds
-    PRIMARY KEY (cluster_name, node_name)
+    PRIMARY KEY (node_name)
 );
 CREATE UNIQUE INDEX i_discovery_nodes_node_num USING BTREE ON discovery_nodes(cluster_name, node_num);
 
diff --git a/priv/pg.sql b/priv/pg.sql
index e480087478b..66874f4eff5 100644
--- a/priv/pg.sql
+++ b/priv/pg.sql
@@ -489,7 +489,7 @@ CREATE TABLE discovery_nodes (
     node_num INT NOT NULL,
     address varchar(250) NOT NULL DEFAULT '', -- empty means we should ask DNS
     updated_timestamp BIGINT NOT NULL, -- in seconds
-    PRIMARY KEY (cluster_name, node_name)
+    PRIMARY KEY (node_name)
 );
 CREATE UNIQUE INDEX i_discovery_nodes_node_num ON discovery_nodes USING BTREE(cluster_name, node_num);
 

From 8f737af14d44d2c0802dfee630d69dd7f0fbe194 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chrz=C4=85szcz?=
 <pawel.chrzaszcz@erlang-solutions.com>
Date: Mon, 8 Apr 2024 15:53:15 +0200
Subject: [PATCH 3/7] Update pgsql migration

- Add missing 'caps' table
- Update primary key for 'discovery_nodes'
---
 priv/migrations/pgsql_6.2.0_x.x.x.sql | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/priv/migrations/pgsql_6.2.0_x.x.x.sql b/priv/migrations/pgsql_6.2.0_x.x.x.sql
index 50d2f47baf0..01e65c186c2 100644
--- a/priv/migrations/pgsql_6.2.0_x.x.x.sql
+++ b/priv/migrations/pgsql_6.2.0_x.x.x.sql
@@ -10,3 +10,15 @@ ALTER TABLE rostergroups ADD PRIMARY KEY (server, username, jid, grp);
 -- Store information whether the message is of type "groupchat" in the user's archive
 ALTER TABLE mam_message
 ADD COLUMN is_groupchat boolean NOT NULL DEFAULT false;
+
+-- Create table for mod_caps
+CREATE TABLE caps (
+    node varchar(250) NOT NULL,
+    sub_node varchar(250) NOT NULL,
+    features text NOT NULL,
+    PRIMARY KEY (node, sub_node)
+);
+
+-- In case of duplicates, you need to remove stale rows manually or wait for cleanup
+ALTER TABLE discovery_nodes
+DROP CONSTRAINT discovery_nodes_pkey, ADD PRIMARY KEY (node_name);

From cf0e7f441e1266fa57052ba7c970230869123631 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chrz=C4=85szcz?=
 <pawel.chrzaszcz@erlang-solutions.com>
Date: Mon, 8 Apr 2024 15:54:01 +0200
Subject: [PATCH 4/7] Update mysql migration

- Add missing 'caps' table
- Update primary key for 'discovery_nodes'
---
 priv/migrations/mysql_6.2.0_x.x.x.sql | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/priv/migrations/mysql_6.2.0_x.x.x.sql b/priv/migrations/mysql_6.2.0_x.x.x.sql
index 48eea8e950f..a6a0e3aef33 100644
--- a/priv/migrations/mysql_6.2.0_x.x.x.sql
+++ b/priv/migrations/mysql_6.2.0_x.x.x.sql
@@ -10,3 +10,15 @@ ALTER TABLE rostergroups MODIFY COLUMN grp VARCHAR(255), ADD PRIMARY KEY(server,
 -- Store information whether the message is of type "groupchat" in the user's archive
 ALTER TABLE mam_message
 ADD COLUMN is_groupchat boolean NOT NULL DEFAULT false;
+
+-- Create table for mod_caps
+CREATE TABLE caps (
+    node varchar(250) NOT NULL,
+    sub_node varchar(250) NOT NULL,
+    features text NOT NULL,
+    PRIMARY KEY (node, sub_node)
+);
+
+-- In case of duplicates, you need to remove stale rows manually or wait for cleanup
+ALTER TABLE discovery_nodes
+DROP PRIMARY KEY, ADD PRIMARY KEY (node_name);

From a962eda4de55bdaf3cabb5ec2365070eadb11e7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chrz=C4=85szcz?=
 <pawel.chrzaszcz@erlang-solutions.com>
Date: Mon, 8 Apr 2024 15:54:17 +0200
Subject: [PATCH 5/7] Update mssql migration

- Add missing 'caps' table
- Update primary key for 'discovery_nodes'
  It is necessary to use a dynamic query because primary key is unnamed.
- Fix mistakes, e.g. missing 'NOT NULL', syntax error for 'ALTER TABLE'.

All migrations tested manually.
---
 priv/migrations/mssql_6.2.0_x.x.x.sql | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/priv/migrations/mssql_6.2.0_x.x.x.sql b/priv/migrations/mssql_6.2.0_x.x.x.sql
index bb099988704..792cec67c20 100644
--- a/priv/migrations/mssql_6.2.0_x.x.x.sql
+++ b/priv/migrations/mssql_6.2.0_x.x.x.sql
@@ -1,15 +1,33 @@
 -- Update roster schema
-DROP INDEX i_rosteru_server_user_jid ON rosterusers;
 DROP INDEX i_rosteru_server_user ON rosterusers;
 DROP INDEX i_rosteru_jid ON rosterusers;
 ALTER TABLE rosterusers
+DROP CONSTRAINT rosterusers$i_rosteru_server_user_jid;
+ALTER TABLE rosterusers
 ADD CONSTRAINT PK_rosterusers PRIMARY KEY CLUSTERED (server ASC, username ASC, jid ASC);
 
-DROP INDEX i_rosteru_jid ON rostergroups;
+DROP INDEX i_rosterg_server_user_jid ON rostergroups;
+ALTER TABLE rostergroups
+ALTER COLUMN grp VARCHAR(250) NOT NULL;
 ALTER TABLE rostergroups
-ALTER COLUMN grp VARCHAR(250),
 ADD CONSTRAINT PK_rostergroups PRIMARY KEY CLUSTERED (server ASC, username ASC, jid ASC, grp ASC);
 
 -- Store information whether the message is of type "groupchat" in the user's archive
 ALTER TABLE mam_message
 ADD is_groupchat smallint NOT NULL DEFAULT 0;
+
+-- Create table for mod_caps
+CREATE TABLE caps (
+    node varchar(250) NOT NULL,
+    sub_node varchar(250) NOT NULL,
+    features text NOT NULL,
+    PRIMARY KEY (node, sub_node)
+);
+
+-- Delete PK constraint before replacing it with a new one
+DECLARE @pk VARCHAR(max) = (SELECT CONSTRAINT_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS
+                            WHERE TABLE_NAME='discovery_nodes' AND CONSTRAINT_TYPE='PRIMARY KEY');
+EXEC('ALTER TABLE discovery_nodes DROP CONSTRAINT ' + @pk);
+
+-- In case of duplicates, you need to remove stale rows manually or wait for cleanup
+ALTER TABLE discovery_nodes ADD PRIMARY KEY (node_name);

From c3b5982d7c68f16ad52aa828ead799cd5f5a2257 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chrz=C4=85szcz?=
 <pawel.chrzaszcz@erlang-solutions.com>
Date: Mon, 8 Apr 2024 15:56:41 +0200
Subject: [PATCH 6/7] Update big tests for CETS discovery

- Add test for moving nodes to a new cluster
- Make sure it's not possible to have duplicate node name
- Reduce verbosity by using more helpers
---
 big_tests/tests/cets_disco_SUITE.erl   | 131 ++++++++++++++-----------
 big_tests/tests/graphql_cets_SUITE.erl |   3 +-
 2 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/big_tests/tests/cets_disco_SUITE.erl b/big_tests/tests/cets_disco_SUITE.erl
index 6f690691b4c..b03dc68cbed 100644
--- a/big_tests/tests/cets_disco_SUITE.erl
+++ b/big_tests/tests/cets_disco_SUITE.erl
@@ -22,6 +22,7 @@ file_cases() ->
 
 rdbms_cases() ->
     [rdbms_backend,
+     rdbms_backend_supports_cluster_change,
      rdbms_backend_supports_auto_cleaning,
      rdbms_backend_node_doesnt_remove_itself,
      rdbms_backend_db_queries,
@@ -71,7 +72,7 @@ end_per_testcase(Name, Config) when Name == address_please_returns_ip;
                                     Name == address_please_returns_ip_127_0_0_1_from_db ->
     stop_cets_discovery(),
     Config;
-end_per_testcase(_CaseName, Config) ->
+end_per_testcase(_CaseName, _Config) ->
     unmock(mim()),
     unmock(mim2()).
 
@@ -91,21 +92,33 @@ rdbms_backend(_Config) ->
     Opts1 = #{cluster_name => CN, node_name_to_insert => <<"test1">>},
     Opts2 = #{cluster_name => CN, node_name_to_insert => <<"test2">>},
 
-    State1 = disco_init(mim(), Opts1),
-    {{ok, Nodes1_2}, State1_2} = disco_get_nodes(mim(), State1),
-    ?assertMatch(#{last_query_info := #{already_registered := false}}, State1_2),
-    ?assertEqual([], Nodes1_2),
+    init_and_get_nodes(mim(), Opts1, []),
 
     %% "test2" node can see "test1" on initial registration
-    State2 = disco_init(mim2(), Opts2),
-    {{ok, Nodes2_2}, State2_2} = disco_get_nodes(mim2(), State2),
-    ?assertMatch(#{last_query_info := #{already_registered := false}}, State2_2),
-    ?assertEqual([test1], Nodes2_2),
+    State2 = init_and_get_nodes(mim2(), Opts2, [test1]),
 
     %% "test2" node can see "test1" on update
-    {{ok, Nodes2_3}, State2_3} = disco_get_nodes(mim2(), State2_2),
-    ?assertEqual(lists:sort([test1, test2]), lists:sort(Nodes2_3)),
-    ?assertMatch(#{last_query_info := #{already_registered := true}}, State2_3).
+    get_nodes(mim2(), State2, [test1, test2]).
+
+rdbms_backend_supports_cluster_change(_Config) ->
+    CN1 = random_cluster_name(?FUNCTION_NAME),
+    CN2 = <<CN1/binary, "_new">>,
+    Opts1 = #{cluster_name => CN1, node_name_to_insert => <<"test1">>},
+    Opts2 = #{cluster_name => CN1, node_name_to_insert => <<"test2">>},
+
+    %% Nodes test1 and test2 are in CN1, and they become connected
+    State1 = init_and_get_nodes(mim(), Opts1, []),
+    State2 = init_and_get_nodes(mim2(), Opts2, [test1]),
+    get_nodes(mim(), State1, [test1, test2]),
+
+    %% Node test1 moves to CN2, and the nodes are disconnected
+    NewState1 = init_and_get_nodes(mim(), Opts1#{cluster_name := CN2}, []),
+    get_nodes(mim2(), State2, [test2]),
+    NewState1A = get_nodes(mim(), NewState1, [test1]),
+
+    %% Node test2 moves to CN2, and the nodes are connected again
+    init_and_get_nodes(mim2(), Opts2#{cluster_name := CN2}, [test1]),
+    get_nodes(mim(), NewState1A, [test1, test2]).
 
 rdbms_backend_supports_auto_cleaning(_Config) ->
     Timestamp = month_ago(),
@@ -115,24 +128,17 @@ rdbms_backend_supports_auto_cleaning(_Config) ->
     Opts2 = #{cluster_name => CN, node_name_to_insert => <<"test2">>},
 
     %% test1 row is written with an old (mocked) timestamp
-    State1 = disco_init(mim(), Opts1),
-    {{ok, Nodes1_2}, State1_2} = disco_get_nodes(mim(), State1),
-    {{ok, Nodes1_3}, State1_3} = disco_get_nodes(mim(), State1_2),
-    ?assertEqual([], Nodes1_2),
-    ?assertEqual([test1], Nodes1_3),
-    ?assertMatch(#{last_query_info := #{timestamp := Timestamp}}, State1_2),
-    ?assertMatch(#{last_query_info := #{timestamp := Timestamp}}, State1_3),
+    State1 = init_and_get_nodes(mim(), Opts1, []),
+    ?assertMatch(#{last_query_info := #{timestamp := Timestamp}}, State1),
+    State1A = get_nodes(mim(), State1, [test1]),
+    ?assertMatch(#{last_query_info := #{timestamp := Timestamp}}, State1A),
 
     %% test2 would clean test1 registration
     %% We don't mock on mim2 node, so timestamps would differ
-    State2 = disco_init(mim2(), Opts2),
-    {{ok, Nodes2_2}, State2_2} = disco_get_nodes(mim2(), State2),
-    ?assertEqual([], Nodes2_2),
-    ?assertMatch(#{last_query_info := #{run_cleaning_result := {removed, [<<"test1">>]}}},
-                 State2_2),
-    {{ok, Nodes2_3}, State2_3} = disco_get_nodes(mim2(), State2),
-    ?assertEqual([test2], Nodes2_3),
-    #{last_query_info := #{last_rows := SelectedRows}} = State2_3,
+    State2 = init_and_get_nodes(mim2(), Opts2, []),
+    ?assertMatch(#{last_query_info := #{run_cleaning_result := {removed, [<<"test1">>]}}}, State2),
+    State2A = get_nodes(mim2(), State2, [test2]),
+    #{last_query_info := #{last_rows := SelectedRows}} = State2A,
     ?assertMatch(1, length(SelectedRows)).
 
 rdbms_backend_node_doesnt_remove_itself(_Config) ->
@@ -143,24 +149,17 @@ rdbms_backend_node_doesnt_remove_itself(_Config) ->
     Opts2 = #{cluster_name => CN, node_name_to_insert => <<"test2">>},
 
     %% test1 row is written with an old (mocked) timestamp
-    State1 = disco_init(mim(), Opts1),
-    {{ok, Nodes1_2}, State1_2} = disco_get_nodes(mim(), State1),
-    ?assertEqual([], Nodes1_2),
-    ?assertMatch(#{last_query_info := #{timestamp := Timestamp}}, State1_2),
+    State1 = init_and_get_nodes(mim(), Opts1, []),
+    ?assertMatch(#{last_query_info := #{timestamp := Timestamp}}, State1),
 
     unmock_timestamp(mim()),
     %% test1 row is not removed and timestamp is updated
-    {{ok, Nodes1_3}, State1_3} = disco_get_nodes(mim(), State1_2),
-    ?assertNotMatch(#{last_query_info := #{timestamp := Timestamp}}, State1_3),
-    ?assertMatch(#{last_query_info := #{run_cleaning_result := {removed, []}}},
-                 State1_3),
-    ?assertEqual([test1], Nodes1_3),
+    State1A = get_nodes(mim(), State1, [test1]),
+    ?assertNotMatch(#{last_query_info := #{timestamp := Timestamp}}, State1A),
+    ?assertMatch(#{last_query_info := #{run_cleaning_result := {removed, []}}}, State1A),
 
-    State2 = disco_init(mim2(), Opts2),
-    {{ok, Nodes2_2}, State2_2} = disco_get_nodes(mim2(), State2),
-    ?assertEqual([test1], Nodes2_2),
-    ?assertMatch(#{last_query_info := #{run_cleaning_result := {removed, []}}},
-                 State2_2).
+    State2 = init_and_get_nodes(mim2(), Opts2, [test1]),
+    ?assertMatch(#{last_query_info := #{run_cleaning_result := {removed, []}}}, State2).
 
 rdbms_backend_db_queries(_Config) ->
     CN = random_cluster_name(?FUNCTION_NAME),
@@ -168,24 +167,27 @@ rdbms_backend_db_queries(_Config) ->
     TS2 = TS + 100,
 
     %% insertion fails if node name or node num is already added for the cluster
-    ?assertEqual({updated, 1}, insert_new(CN, <<"test1">>, 1, <<>>, TS)),
-    ?assertMatch({error, _}, insert_new(CN, <<"test1">>, 1, <<>>, TS)),
-    ?assertMatch({error, _}, insert_new(CN, <<"test1">>, 2, <<>>, TS)),
-    ?assertMatch({error, _}, insert_new(CN, <<"test2">>, 1, <<>>, TS)),
-    ?assertEqual({updated, 1}, insert_new(CN, <<"test2">>, 2, <<>>, TS)),
+    ?assertEqual({updated, 1}, insert_new(CN, <<"testA">>, 1, <<>>, TS)),
+    ?assertMatch({error, _}, insert_new(CN, <<"testA">>, 1, <<>>, TS)),
+    ?assertMatch({error, _}, insert_new(CN, <<"testA">>, 2, <<>>, TS)),
+    ?assertMatch({error, _}, insert_new(CN, <<"testB">>, 1, <<>>, TS)),
+    ?assertEqual({updated, 1}, insert_new(CN, <<"testB">>, 2, <<>>, TS)),
+
+    %% insertion fails if node is a member of another cluster
+    ?assertMatch({error, _}, insert_new(<<"my-cluster">>, <<"testA">>, 1, <<>>, TS)),
 
     %% update of the timestamp works correctly
     {selected, SelectedNodes1} = select(CN),
-    ?assertEqual(lists:sort([{<<"test1">>, 1, <<>>, TS}, {<<"test2">>, 2, <<>>, TS}]),
+    ?assertEqual(lists:sort([{<<"testA">>, 1, <<>>, TS}, {<<"testB">>, 2, <<>>, TS}]),
                  lists:sort(SelectedNodes1)),
-    ?assertEqual({updated, 1}, update_existing(CN, <<"test1">>, <<>>, TS2)),
+    ?assertEqual({updated, 1}, update_existing(<<"testA">>, <<>>, TS2)),
     {selected, SelectedNodes2} = select(CN),
-    ?assertEqual(lists:sort([{<<"test1">>, 1, <<>>, TS2}, {<<"test2">>, 2, <<>>, TS}]),
+    ?assertEqual(lists:sort([{<<"testA">>, 1, <<>>, TS2}, {<<"testB">>, 2, <<>>, TS}]),
                  lists:sort(SelectedNodes2)),
 
-    %% node removal work correctly
-    ?assertEqual({updated, 1}, delete_node_from_db(CN, <<"test1">>)),
-    ?assertEqual({selected, [{<<"test2">>, 2, <<>>, TS}]}, select(CN)).
+    %% node removal works correctly
+    ?assertEqual({updated, 1}, delete_node_from_db(<<"testA">>)),
+    ?assertEqual({selected, [{<<"testB">>, 2, <<>>, TS}]}, select(CN)).
 
 rdbms_backend_publishes_node_ip(_Config) ->
     %% get_pairs would return only real available nodes, so use the real node names
@@ -249,6 +251,19 @@ address_please_returns_ip_127_0_0_1_from_db(Config) ->
 %% Helpers
 %%--------------------------------------------------------------------
 
+init_and_get_nodes(RPCNode, Opts, ExpectedNodes) ->
+    StateIn = disco_init(RPCNode, Opts),
+    get_nodes(RPCNode, StateIn, ExpectedNodes, false).
+
+get_nodes(RPCNode, StateIn, ExpectedNodes) ->
+    get_nodes(RPCNode, StateIn, ExpectedNodes, true).
+
+get_nodes(RPCNode, StateIn, ExpectedNodes, AlreadyRegistered) ->
+    {{ok, Nodes}, State} = disco_get_nodes(RPCNode, StateIn),
+    ?assertEqual(lists:sort(ExpectedNodes), lists:sort(Nodes)),
+    ?assertMatch(#{last_query_info := #{already_registered := AlreadyRegistered}}, State),
+    State.
+
 disco_init(Node, Opts) ->
     State = rpc(Node, mongoose_cets_discovery_rdbms, init, [Opts]),
     log_disco_request(?FUNCTION_NAME, Node, Opts, State),
@@ -311,14 +326,14 @@ select(CN) ->
     ct:log("select(~p) = ~p", [CN, Ret]),
     Ret.
 
-update_existing(CN, BinNode, Address, TS) ->
-    Ret = rpc(mim(), mongoose_cets_discovery_rdbms, update_existing, [CN, BinNode, Address, TS]),
-    ct:log("select(~p, ~p, ~p, ~p) = ~p", [CN, BinNode, Address, TS, Ret]),
+update_existing(BinNode, Address, TS) ->
+    Ret = rpc(mim(), mongoose_cets_discovery_rdbms, update_existing, [BinNode, Address, TS]),
+    ct:log("select(~p, ~p, ~p) = ~p", [BinNode, Address, TS, Ret]),
     Ret.
 
-delete_node_from_db(CN, BinNode) ->
-    Ret = rpc(mim(), mongoose_cets_discovery_rdbms, delete_node_from_db, [CN, BinNode]),
-    ct:log("delete_node_from_db(~p, ~p) = ~p", [CN, BinNode, Ret]),
+delete_node_from_db(BinNode) ->
+    Ret = rpc(mim(), mongoose_cets_discovery_rdbms, delete_node_from_db, [BinNode]),
+    ct:log("delete_node_from_db(~p) = ~p", [BinNode, Ret]),
     Ret.
 
 start_cets_discovery(Config) ->
diff --git a/big_tests/tests/graphql_cets_SUITE.erl b/big_tests/tests/graphql_cets_SUITE.erl
index 6d1475df00c..6cd82b27863 100644
--- a/big_tests/tests/graphql_cets_SUITE.erl
+++ b/big_tests/tests/graphql_cets_SUITE.erl
@@ -251,9 +251,8 @@ register_bad_node() ->
     {updated, 1} = rpc(mim(), mongoose_cets_discovery_rdbms, insert_new, InsertArgs).
 
 ensure_bad_node_unregistered() ->
-    ClusterName = <<"mim">>,
     Node = <<"badnode@localhost">>,
-    DeleteArgs = [ClusterName, Node],
+    DeleteArgs = [Node],
     %% Ensure the node is removed
     {updated, _} = rpc(mim(), mongoose_cets_discovery_rdbms, delete_node_from_db, DeleteArgs).
 

From cb92b6ac04be6ae92bd223a3c7e8e7888214f6bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chrz=C4=85szcz?=
 <pawel.chrzaszcz@erlang-solutions.com>
Date: Mon, 8 Apr 2024 15:46:34 +0200
Subject: [PATCH 7/7] Don't run CLI CETS tests in parallel

They are flaky (at least locally) if you try to run many at once.
---
 big_tests/tests/graphql_cets_SUITE.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/big_tests/tests/graphql_cets_SUITE.erl b/big_tests/tests/graphql_cets_SUITE.erl
index 6cd82b27863..8017a98361d 100644
--- a/big_tests/tests/graphql_cets_SUITE.erl
+++ b/big_tests/tests/graphql_cets_SUITE.erl
@@ -16,7 +16,7 @@ all() ->
 
 groups() ->
     [{admin_cets_http, [parallel], admin_cets_tests()},
-     {admin_cets_cli, [parallel], admin_cets_tests()},
+     {admin_cets_cli, [], admin_cets_tests()},
      {domain_admin_cets, [], domain_admin_tests()},
      {cets_not_configured, [parallel], cets_not_configured_test()}].