This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch fix-badmatch-in-find-next-node
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 1f3b34d64ea06dbf8393ab6cd4d97638683fbc17
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Wed Aug 21 19:44:07 2024 -0400

    Fix badmatch in find_next_node/0
    
    Previously, `find_next_node/0` didn't handle odd cases such as current node 
not
    being in the `mem3:nodes()` or having an empty `mem3:nodes()` and just 
crashed
    with a badmatch.
    
    Make sure to handle those cases and return `node()` for them. We already
    handled `node()` target as a no-op in `push/2`. We just have to also add it 
to
    `maybe_resubmit/2`.
    
    Also, to avoid confusion between "live" nodes and "all" nodes, in the helper
    function opt to just use `Mem3Nodes` variable name to make it clear what 
we're
    dealing with.
    
    Fix #5191
---
 src/mem3/src/mem3_sync.erl | 53 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/mem3/src/mem3_sync.erl b/src/mem3/src/mem3_sync.erl
index 9f722701f..f01533805 100644
--- a/src/mem3/src/mem3_sync.erl
+++ b/src/mem3/src/mem3_sync.erl
@@ -187,10 +187,12 @@ maybe_resubmit(State, #job{name = DbName, node = Node} = 
Job) ->
     case lists:member(DbName, local_dbs()) of
         true ->
             case find_next_node() of
-                Node ->
+                Node when Node =/= node() ->
                     add_to_queue(State, Job);
                 _ ->
-                    % don't resubmit b/c we have a new replication target
+                    % don't resubmit b/c we have a new replication target and
+                    % don't push to self, as it's a no-op (push/2 has a guard
+                    % for it but the topology might have changed since)
                     State
             end;
         false ->
@@ -302,12 +304,31 @@ sync_push(ShardName, N) ->
     gen_server:call(mem3_sync, {push, #job{name = ShardName, node = N}}, 
infinity).
 
 find_next_node() ->
-    LiveNodes = [node() | nodes()],
-    AllNodes0 = lists:sort(mem3:nodes()),
-    AllNodes1 = [X || X <- AllNodes0, lists:member(X, LiveNodes)],
-    AllNodes = AllNodes1 ++ [hd(AllNodes1)],
-    [_Self, Next | _] = lists:dropwhile(fun(N) -> N =/= node() end, AllNodes),
-    Next.
+    Self = node(),
+    LiveNodes = [Self | nodes()],
+    Mem3Nodes = mem3:nodes(),
+    find_next_node(Self, LiveNodes, Mem3Nodes).
+
+find_next_node(Self, LiveNodes, Mem3Nodes) ->
+    SortedMem3Nodes = lists:sort(Mem3Nodes),
+    LiveMem3Nodes = [N || N <- SortedMem3Nodes, lists:member(N, LiveNodes)],
+    case LiveMem3Nodes of
+        [] ->
+            % No live mem3 nodes. Strange. Return self as we can always
+            % replicate to ourselves, which is just a no-op.
+            Self;
+        [H | _] ->
+            % Append head to the end we have always have a "next"
+            LiveMem3Nodes1 = LiveMem3Nodes ++ [H],
+            case lists:dropwhile(fun(N) -> N =/= Self end, LiveMem3Nodes1) of
+                [] ->
+                    % We're not in the mem3 set of nodes. Also, strange. Return
+                    % self anyway as we can replicate to ourselves.
+                    Self;
+                [_Self, Next | _] ->
+                    Next
+            end
+    end.
 
 %% @doc Finds the next {DbName,Node} pair in the list of waiting replications
 %% which does not correspond to an already running replication
@@ -364,3 +385,19 @@ maybe_redirect(Node) ->
             couch_log:debug("Redirecting push from ~p to ~p", [Node, 
Redirect]),
             list_to_existing_atom(Redirect)
     end.
+
+-ifdef(TEST).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+find_next_node_test() ->
+    ?assertEqual(n, find_next_node(n, [n], [])),
+    ?assertEqual(n, find_next_node(n, [n], [n])),
+    ?assertEqual(x, find_next_node(n, [a, n, x], [a, n, x])),
+    ?assertEqual(a, find_next_node(n, [a, n], [a, n])),
+    ?assertEqual(n, find_next_node(n, [a, n], [a])),
+    ?assertEqual(x, find_next_node(n, [n, x], [x, n])),
+    ?assertEqual(x, find_next_node(n, [n, x], [n, x])),
+    ?assertEqual(a, find_next_node(n, [a, n, x], [a, n, y])).
+
+-endif.

Reply via email to