This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new dd1b281  When shard splitting make sure to reset the targets before 
any retries
dd1b281 is described below

commit dd1b2817bbf7a0efce858414310a0c822ce89468
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Fri Jan 10 12:53:46 2020 -0500

    When shard splitting make sure to reset the targets before any retries
    
    Previously the target was reset only when the whole job started, but not 
when
    the initial copy phase restarted on its own. If that happened, we left the
    target around so the retry failed always with the `eexist` error.
    
    Target reset has a check to make sure the shards are not in the global shard
    map, in case someone manually added them, for example. If they are found 
there
    the job panics and exists.
---
 src/mem3/src/mem3_reshard_job.erl         | 14 ++++----------
 src/mem3/test/eunit/mem3_reshard_test.erl |  6 +-----
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/mem3/src/mem3_reshard_job.erl 
b/src/mem3/src/mem3_reshard_job.erl
index d3a33d3..aedca21 100644
--- a/src/mem3/src/mem3_reshard_job.erl
+++ b/src/mem3/src/mem3_reshard_job.erl
@@ -184,19 +184,12 @@ run(#job{split_state = CurrState} = Job) ->
 
 
 set_start_state(#job{split_state = State} = Job) ->
-    case {State, maps:get(State, ?STATE_RESTART, undefined)} of
-        {_, undefined} ->
+    case maps:get(State, ?STATE_RESTART, undefined) of
+        undefined ->
             Fmt1 = "~p recover : unknown state ~s",
             couch_log:error(Fmt1, [?MODULE, jobfmt(Job)]),
             erlang:error({invalid_split_job_recover_state, Job});
-        {initial_copy, initial_copy} ->
-            % Since we recover from initial_copy to initial_copy, we need
-            % to reset the target state as initial_copy expects to
-            % create a new target
-            Fmt2 = "~p recover : resetting target ~s",
-            couch_log:notice(Fmt2, [?MODULE, jobfmt(Job)]),
-            reset_target(Job);
-        {_, StartState} ->
+        StartState->
             Job#job{split_state = StartState}
     end.
 
@@ -403,6 +396,7 @@ initial_copy_impl(#job{source = Source, target = Targets0} 
= Job) ->
     LogMsg1 = "~p initial_copy started ~s",
     LogArgs1 = [?MODULE, shardsstr(Source, Targets0)],
     couch_log:notice(LogMsg1, LogArgs1),
+    reset_target(Job),
     case couch_db_split:split(SourceName, TMap, fun pickfun/3) of
         {ok, Seq} ->
             LogMsg2 = "~p initial_copy of ~s finished @ seq:~p",
diff --git a/src/mem3/test/eunit/mem3_reshard_test.erl 
b/src/mem3/test/eunit/mem3_reshard_test.erl
index 1e89755..7cd6b1f 100644
--- a/src/mem3/test/eunit/mem3_reshard_test.erl
+++ b/src/mem3/test/eunit/mem3_reshard_test.erl
@@ -453,19 +453,15 @@ target_reset_in_initial_copy(#{db1 := Db}) ->
             job_state = running,
             split_state = initial_copy
         },
-        BogusParent = spawn(fun() -> receive {ack, _, _} -> ok end end),
-        put('$ancestors', [BogusParent]), % make prock_lib:ack not blow up
-        meck:expect(mem3_reshard, checkpoint, 2, ok),
         meck:expect(couch_db_split, cleanup_target, 2, ok),
         meck:expect(couch_server, exists, fun
             (<<"t1">>) -> true;
             (<<"t2">>) -> true;
             (DbName) -> meck:passthrough([DbName])
         end),
-        JobPid = spawn(fun() -> mem3_reshard_job:init(Job) end),
+        JobPid = spawn(fun() -> mem3_reshard_job:initial_copy_impl(Job) end),
         meck:wait(2, couch_db_split, cleanup_target, ['_', '_'], 5000),
         exit(JobPid, kill),
-        exit(BogusParent, kill),
         ?assertEqual(2, meck:num_calls(couch_db_split, cleanup_target, 2))
     end)}.
 

Reply via email to