davisp commented on a change in pull request #470: Scheduling Replicator URL: https://github.com/apache/couchdb/pull/470#discussion_r110712109
########## File path: src/couch_replicator/src/couch_replicator_doc_processor.erl ########## @@ -0,0 +1,946 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_doc_processor). + +-behaviour(gen_server). +-behaviour(couch_multidb_changes). + +-export([ + start_link/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_info/2, + handle_cast/2, + code_change/3 +]). + +-export([ + db_created/2, + db_deleted/2, + db_found/2, + db_change/3 +]). + +-export([ + docs/1, + doc/2, + update_docs/0, + get_worker_ref/1, + notify_cluster_event/2 +]). + +-include_lib("couch/include/couch_db.hrl"). +-include("couch_replicator.hrl"). + +-import(couch_replicator_utils, [ + get_json_value/2, + get_json_value/3 +]). + +-define(DEFAULT_UPDATE_DOCS, false). +-define(ERROR_MAX_BACKOFF_EXPONENT, 12). % ~ 1 day on average +-define(TS_DAY_SEC, 86400). + +-type filter_type() :: nil | view | user | docids | mango. +-type repstate() :: initializing | error | scheduled. + + +-record(rdoc, { + id :: db_doc_id() | '_' | {any(), '_'}, + state :: repstate() | '_', + rep :: #rep{} | nil | '_', + rid :: rep_id() | nil | '_', + filter :: filter_type() | '_', + info :: binary() | nil | '_', + errcnt :: non_neg_integer() | '_', + worker :: reference() | nil | '_', + last_updated :: erlang:timestamp() | '_' +}). + + +% couch_multidb_changes API callbacks + +db_created(DbName, Server) -> + couch_stats:increment_counter([couch_replicator, docs, dbs_created]), + couch_replicator_docs:ensure_rep_ddoc_exists(DbName), + Server. + + +db_deleted(DbName, Server) -> + couch_stats:increment_counter([couch_replicator, docs, dbs_deleted]), + ok = gen_server:call(?MODULE, {clean_up_replications, DbName}, infinity), + Server. + + +db_found(DbName, Server) -> + couch_stats:increment_counter([couch_replicator, docs, dbs_found]), + couch_replicator_docs:ensure_rep_ddoc_exists(DbName), + Server. + + +db_change(DbName, {ChangeProps} = Change, Server) -> + couch_stats:increment_counter([couch_replicator, docs, db_changes]), + try + ok = process_change(DbName, Change) + catch + _Tag:Error -> + {RepProps} = get_json_value(doc, ChangeProps), + DocId = get_json_value(<<"_id">>, RepProps), + couch_replicator_docs:update_failed(DbName, DocId, Error) + end, + Server. + + +-spec get_worker_ref(db_doc_id()) -> reference() | nil. +get_worker_ref({DbName, DocId}) when is_binary(DbName), is_binary(DocId) -> + case ets:lookup(?MODULE, {DbName, DocId}) of + [#rdoc{worker = WRef}] when is_reference(WRef) -> + WRef; + [#rdoc{worker = nil}] -> + nil; + [] -> + nil + end. + + +% Cluster membership change notification callback +-spec notify_cluster_event(pid(), {cluster, any()}) -> ok. +notify_cluster_event(Server, {cluster, _} = Event) -> + gen_server:cast(Server, Event). + + +% Private helpers for multidb changes API, these updates into the doc +% processor gen_server + +process_change(DbName, {Change}) -> + {RepProps} = JsonRepDoc = get_json_value(doc, Change), + DocId = get_json_value(<<"_id">>, RepProps), + Owner = couch_replicator_clustering:owner(DbName, DocId), + Id = {DbName, DocId}, + case {Owner, get_json_value(deleted, Change, false)} of + {_, true} -> + ok = gen_server:call(?MODULE, {removed, Id}, infinity); + {unstable, false} -> + couch_log:notice("Not starting '~s' as cluster is unstable", [DocId]); + {ThisNode, false} when ThisNode =:= node() -> + case get_json_value(<<"_replication_state">>, RepProps) of + undefined -> + ok = process_updated(Id, JsonRepDoc); + <<"triggered">> -> + maybe_remove_state_fields(DbName, DocId), + ok = process_updated(Id, JsonRepDoc); + <<"completed">> -> + ok = gen_server:call(?MODULE, {completed, Id}, infinity); + <<"error">> -> + % Handle replications started from older versions of replicator + % which wrote transient errors to replication docs + maybe_remove_state_fields(DbName, DocId), + ok = process_updated(Id, JsonRepDoc); + <<"failed">> -> + ok + end; + {Owner, false} -> + ok + end, + ok. + + +maybe_remove_state_fields(DbName, DocId) -> + case update_docs() of + true -> + ok; + false -> + couch_replicator_docs:remove_state_fields(DbName, DocId) + end. + + +process_updated({DbName, _DocId} = Id, JsonRepDoc) -> + % Parsing replication doc (but not calculating the id) could throw an + % exception which would indicate this document is malformed. This exception + % should propagate to db_change function and will be recorded as permanent + % failure in the document. User will have to delete and re-create the + % document to fix the problem. Review comment: Delete and recreate? Would an update not work just as well for some reason? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
