This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch feature/database-partition-limits in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 06f4445a04d6a9b00e4ca3ed07c405f1f7a6d3b5 Author: Paul J. Davis <paul.joseph.da...@gmail.com> AuthorDate: Fri Dec 14 11:06:03 2018 -0600 Enforce partition size limits This limit helps prevent users from inadvertently misusing partitions by refusing to add documents when the size of a partition exceeds 10GiB. Co-authored-by: Robert Newson <rnew...@apache.org> --- rel/overlay/etc/default.ini | 5 +++ src/chttpd/src/chttpd.erl | 3 ++ src/couch/src/couch_db_updater.erl | 81 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index a77add4..ae9d313 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -64,6 +64,11 @@ default_engine = couch ; move deleted databases/shards there instead. You can then manually delete ; these files later, as desired. ;enable_database_recovery = false +; +; Set the maximum size allowed for a partition. This helps users avoid +; inadvertently abusing partitions resulting in hot shards. The default +; is 10GiB. A value of 0 or less will disable partition size checks. +;max_partition_size = 10737418240 [couchdb_engines] ; The keys in this section are the filename extension that diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 2f241cd..6558b1e 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -873,6 +873,9 @@ error_info(conflict) -> {409, <<"conflict">>, <<"Document update conflict.">>}; error_info({conflict, _}) -> {409, <<"conflict">>, <<"Document update conflict.">>}; +error_info({partition_overflow, DocId}) -> + Descr = <<"'", DocId/binary, "' exceeds partition limit">>, + {403, <<"partition_overflow">>, Descr}; error_info({{not_found, missing}, {_, _}}) -> {409, <<"not_found">>, <<"missing_rev">>}; error_info({forbidden, Error, Msg}) -> diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index 95508e2..00fee90 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -21,6 +21,7 @@ -include("couch_db_int.hrl"). -define(IDLE_LIMIT_DEFAULT, 61000). +-define(DEFAULT_MAX_PARTITION_SIZE, 16#280000000). % 10 GiB -record(merge_acc, { @@ -28,7 +29,8 @@ merge_conflicts, add_infos = [], rem_seqs = [], - cur_seq + cur_seq, + full_partitions = [] }). @@ -466,13 +468,22 @@ merge_rev_trees([], [], Acc) -> merge_rev_trees([NewDocs | RestDocsList], [OldDocInfo | RestOldInfo], Acc) -> #merge_acc{ revs_limit = Limit, - merge_conflicts = MergeConflicts + merge_conflicts = MergeConflicts, + full_partitions = FullPartitions } = Acc, % Track doc ids so we can debug large revision trees erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) -> - merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts) + NewInfo = merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts), + case is_overflowed(NewInfo, OldInfoAcc, FullPartitions) of + true when not MergeConflicts -> + DocId = NewInfo#doc.id, + send_result(Client, NewDoc, {partition_overflow, DocId}), + OldInfoAcc; + false -> + NewInfo + end end, OldDocInfo, NewDocs), NewDocInfo1 = maybe_stem_full_doc_info(NewDocInfo0, Limit), % When MergeConflicts is false, we updated #full_doc_info.deleted on every @@ -595,6 +606,16 @@ merge_rev_tree(OldInfo, NewDoc, _Client, true) -> {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0), OldInfo#full_doc_info{rev_tree = NewTree}. +is_overflowed(_New, _Old, []) -> + false; +is_overflowed(Old, Old, _FullPartitions) -> + false; +is_overflowed(New, Old, FullPartitions) -> + Partition = couch_partition:from_docid(New#full_doc_info.id), + NewSize = estimate_size(New), + OldSize = estimate_size(Old), + lists:member(Partition, FullPartitions) andalso NewSize > OldSize. + maybe_stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) -> case config:get_boolean("couchdb", "stem_interactive_updates", true) of true -> @@ -617,13 +638,31 @@ update_docs_int(Db, DocsList, LocalDocs, MergeConflicts, FullCommit) -> (Id, not_found) -> #full_doc_info{id=Id} end, Ids, OldDocLookups), + + %% Get the list of full partitions + FullPartitions = case couch_db:is_partitioned(Db) of + true -> + case max_partition_size() of + N when N =< 0 -> + []; + Max -> + Partitions = lists:usort(lists:map(fun(Id) -> + couch_partition:from_docid(Id) + end, Ids)), + [P || P <- Partitions, partition_size(Db, P) >= Max] + end; + false -> + [] + end, + % Merge the new docs into the revision trees. AccIn = #merge_acc{ revs_limit = RevsLimit, merge_conflicts = MergeConflicts, add_infos = [], rem_seqs = [], - cur_seq = UpdateSeq + cur_seq = UpdateSeq, + full_partitions = FullPartitions }, {ok, AccOut} = merge_rev_trees(DocsList, OldDocInfos, AccIn), #merge_acc{ @@ -685,6 +724,40 @@ increment_local_doc_revs(#doc{revs = {0, [RevStr | _]}} = Doc) -> increment_local_doc_revs(#doc{}) -> {error, <<"Invalid rev format">>}. +max_partition_size() -> + config:get_integer("couchdb", "max_partition_size", + ?DEFAULT_MAX_PARTITION_SIZE). + +partition_size(Db, Partition) -> + {ok, Info} = couch_db:get_partition_info(Db, Partition), + Sizes = couch_util:get_value(sizes, Info), + couch_util:get_value(external, Sizes). + +estimate_size(#full_doc_info{} = FDI) -> + #full_doc_info{rev_tree = RevTree} = FDI, + Fun = fun + (_Rev, Value, leaf, SizesAcc) -> + case Value of + #doc{} = Doc -> + ExternalSize = get_meta_body_size(Value#doc.meta), + {size_info, AttSizeInfo} = + lists:keyfind(size_info, 1, Doc#doc.meta), + Leaf = #leaf{ + sizes = #size_info{ + external = ExternalSize + }, + atts = AttSizeInfo + }, + add_sizes(leaf, Leaf, SizesAcc); + #leaf{} -> + add_sizes(leaf, Value, SizesAcc) + end; + (_Rev, _Value, branch, SizesAcc) -> + SizesAcc + end, + {_, FinalES, FinalAtts} = couch_key_tree:fold(Fun, {0, 0, []}, RevTree), + TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts), + FinalES + TotalAttSize. purge_docs(Db, []) -> {ok, Db, []};