davisp commented on a change in pull request #470: Scheduling Replicator URL: https://github.com/apache/couchdb/pull/470#discussion_r110433227
########## File path: src/couch_replicator/src/couch_multidb_changes.erl ########## @@ -0,0 +1,819 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_multidb_changes). +-behaviour(gen_server). + +-export([start_link/4]). + +-export([init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). + +-export([changes_reader/3, changes_reader_cb/3]). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +-define(CTX, {user_ctx, #user_ctx{roles=[<<"_admin">>, <<"_replicator">>]}}). + +-define(AVG_DELAY_MSEC, 100). +-define(MAX_DELAY_MSEC, 60000). + +-record(state, { + tid :: ets:tid(), + mod :: atom(), + ctx :: term(), + suffix :: binary(), + event_server :: reference(), + scanner :: nil | pid(), + pids :: [{binary(),pid()}], + skip_ddocs :: boolean() +}). + + + +% Behavior API + +% For each db shard with a matching suffix, report created, +% deleted, found (discovered) and change events. + +-callback db_created(DbName :: binary(), Context :: term()) -> + Context :: term(). + +-callback db_deleted(DbName :: binary(), Context :: term()) -> + Context :: term(). + +-callback db_found(DbName :: binary(), Context :: term()) -> + Context :: term(). + +-callback db_change(DbName :: binary(), Change :: term(), Context :: term()) -> + Context :: term(). + + + +% External API + + +% Opts list can contain: +% - `skip_ddocs` : Skip design docs + +-spec start_link(binary(), module(), term(), list()) -> + {ok, pid()} | ignore | {error, term()}. +start_link(DbSuffix, Module, Context, Opts) when + is_binary(DbSuffix), is_atom(Module), is_list(Opts) -> + gen_server:start_link(?MODULE, [DbSuffix, Module, Context, Opts], []). + + +% gen_server callbacks + +init([DbSuffix, Module, Context, Opts]) -> + process_flag(trap_exit, true), + Server = self(), + {ok, #state{ + tid = ets:new(?MODULE, [set, protected]), + mod = Module, + ctx = Context, + suffix = DbSuffix, + event_server = register_with_event_server(Server), + scanner = spawn_link(fun() -> scan_all_dbs(Server, DbSuffix) end), + pids = [], + skip_ddocs = proplists:is_defined(skip_ddocs, Opts) + }}. + + +terminate(_Reason, _State) -> + ok. + + +handle_call({change, DbName, Change}, _From, + #state{skip_ddocs=SkipDDocs, mod=Mod, ctx=Ctx} = State) -> + case {SkipDDocs, is_design_doc(Change)} of + {true, true} -> + {reply, ok, State}; + {_, _} -> + {reply, ok, State#state{ctx=Mod:db_change(DbName, Change, Ctx)}} + end; + +handle_call({checkpoint, DbName, EndSeq}, _From, #state{tid=Ets} = State) -> + case ets:lookup(Ets, DbName) of + [] -> + true = ets:insert(Ets, {DbName, EndSeq, false}); + [{DbName, _OldSeq, Rescan}] -> + true = ets:insert(Ets, {DbName, EndSeq, Rescan}) + end, + {reply, ok, State}. + + +handle_cast({resume_scan, DbName}, State) -> + {noreply, resume_scan(DbName, State)}. + + +handle_info({'$couch_event', DbName, Event}, #state{suffix = Suf} = State) -> + case Suf =:= couch_db:dbname_suffix(DbName) of + true -> + {noreply, db_callback(Event, DbName, State)}; + _ -> + {noreply, State} + end; + +handle_info({'DOWN', Ref, _, _, Info}, #state{event_server = Ref} = State) -> + {stop, {couch_event_server_died, Info}, State}; + +handle_info({'EXIT', From, normal}, #state{scanner = From} = State) -> + {noreply, State#state{scanner=nil}}; + +handle_info({'EXIT', From, Reason}, #state{scanner = From} = State) -> + {stop, {scanner_died, Reason}, State}; + +handle_info({'EXIT', From, Reason}, #state{pids = Pids} = State) -> + couch_log:info("~p change feed exited ~p",[State#state.suffix, From]), + case lists:keytake(From, 2, Pids) of + {value, {DbName, From}, NewPids} -> + if Reason == normal -> ok; true -> + Fmt = "~s : Known change feed ~w died :: ~w", + couch_log:error(Fmt, [?MODULE, From, Reason]) + end, + NewState = State#state{pids = NewPids}, + case ets:lookup(State#state.tid, DbName) of + [{DbName, _EndSeq, true}] -> + {noreply, resume_scan(DbName, NewState)}; + _ -> + {noreply, NewState} + end; + false when Reason == normal -> + {noreply, State}; + false -> + Fmt = "~s(~p) : Unknown pid ~w died :: ~w", + couch_log:error(Fmt, [?MODULE, State#state.suffix, From, Reason]), + {stop, {unexpected_exit, From, Reason}, State} + end; + +handle_info(_Msg, State) -> + {noreply, State}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +% Private functions + +-spec register_with_event_server(pid()) -> reference(). +register_with_event_server(Server) -> + Ref = erlang:monitor(process, couch_event_server), + couch_event:register_all(Server), + Ref. + + +-spec db_callback(created | deleted | updated, binary(), #state{}) -> #state{}. +db_callback(created, DbName, #state{mod = Mod, ctx = Ctx} = State) -> + State#state{ctx = Mod:db_created(DbName, Ctx)}; + +db_callback(deleted, DbName, #state{mod = Mod, ctx = Ctx} = State) -> + State#state{ctx = Mod:db_deleted(DbName, Ctx)}; + +db_callback(updated, DbName, State) -> + resume_scan(DbName, State); + +db_callback(_Other, _DbName, State) -> + State. + + +-spec resume_scan(binary(), #state{}) -> #state{}. +resume_scan(DbName, #state{pids=Pids, tid=Ets} = State) -> + case {lists:keyfind(DbName, 1, Pids), ets:lookup(Ets, DbName)} of + {{DbName, _}, []} -> + % Found existing change feed, but not entry in ETS + % Flag a need to rescan from begining + true = ets:insert(Ets, {DbName, 0, true}), + State; + {{DbName, _}, [{DbName, EndSeq, _}]} -> + % Found existing change feed and entry in ETS + % Flag a need to rescan from last ETS checkpoint + true = ets:insert(Ets, {DbName, EndSeq, true}), + State; + {false, []} -> + % No existing change feed running. No entry in ETS. + % This is first time seeing this db shard. + % Notify user with a found callback. Insert checkpoint + % entry in ETS to start from 0. And start a change feed. + true = ets:insert(Ets, {DbName, 0, false}), + Mod = State#state.mod, + Ctx = Mod:db_found(DbName, State#state.ctx), + Pid = start_changes_reader(DbName, 0), + State#state{ctx=Ctx, pids=[{DbName, Pid} | Pids]}; + {false, [{DbName, EndSeq, _}]} -> + % No existing change feed running. Found existing checkpoint. + % Start a new change reader from last checkpoint. + true = ets:insert(Ets, {DbName, EndSeq, false}), + Pid = start_changes_reader(DbName, EndSeq), + State#state{pids=[{DbName, Pid} | Pids]} + end. + + + +start_changes_reader(DbName, Since) -> + spawn_link(?MODULE, changes_reader, [self(), DbName, Since]). + + +changes_reader(Server, DbName, Since) -> + {ok, Db} = couch_db:open_int(DbName, [?CTX, sys_db]), + ChFun = couch_changes:handle_db_changes( + #changes_args{ + include_docs = true, + since = Since, + feed = "normal", + timeout = infinity + }, {json_req, null}, Db), Review comment: Minor Nit: I'd pull the `#changes_args{}` out and assign it to a variable and then call handle_db_changes/3 on a single line. Generally we have three patterns for formatting a function call: 1. all on one line 2. all args and closing paren and comma on new line indented twice more 3. each arg on a separate line, indented twice, closing parent/comma on separate line indented once The idea here being that we want to communicate the sections and make things easier on eyeballs. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
