This is an automated email from the ASF dual-hosted git repository. iilyak pushed a commit to branch couch-stats-resource-tracker-http-api in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit b2b4132e70b1fd4d2864739a5f9cb6a5da8abd19 Author: ILYA Khlopotov <[email protected]> AuthorDate: Tue Jun 10 12:13:07 2025 -0700 Add QUERY_CARDINALITY_LIMIT to group_by --- .../src/couch_stats_resource_tracker.hrl | 2 + src/couch_stats/src/csrt_query.erl | 46 ++++++++++++++-------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/couch_stats/src/couch_stats_resource_tracker.hrl b/src/couch_stats/src/couch_stats_resource_tracker.hrl index 97e38781c..b3b50d7bd 100644 --- a/src/couch_stats/src/couch_stats_resource_tracker.hrl +++ b/src/couch_stats/src/couch_stats_resource_tracker.hrl @@ -35,6 +35,8 @@ -define(ROWS_READ, rows_read). -define(FRPC_CHANGES_RETURNED, changes_returned). +-define(QUERY_CARDINALITY_LIMIT, 10_000). + %% Mapping of couch_stat metric names to #rctx{} field names. %% These are used for fields that we inc a counter on. -define(STATS_TO_KEYS, #{ diff --git a/src/couch_stats/src/csrt_query.erl b/src/couch_stats/src/csrt_query.erl index 62b03d695..c46345cd3 100644 --- a/src/couch_stats/src/csrt_query.erl +++ b/src/couch_stats/src/csrt_query.erl @@ -118,32 +118,44 @@ count_by(KeyFun) -> group_by(KeyFun, ValFun) -> group_by(KeyFun, ValFun, fun erlang:'+'/2). +group_by(KeyFun, ValFun, AggFun) -> + group_by(KeyFun, ValFun, AggFun, ?QUERY_CARDINALITY_LIMIT). + %% eg: group_by(mfa, docs_read). %% eg: group_by(fun(#rctx{mfa=MFA,docs_read=DR}) -> {MFA, DR} end, ioq_calls). %% eg: ^^ or: group_by([mfa, docs_read], ioq_calls). %% eg: group_by([username, dbname, mfa], docs_read). %% eg: group_by([username, dbname, mfa], ioq_calls). %% eg: group_by([username, dbname, mfa], js_filters). -group_by(KeyL, ValFun, AggFun) when is_list(KeyL) -> +group_by(KeyL, ValFun, AggFun, Limit) when is_list(KeyL) -> KeyFun = fun(Ele) -> list_to_tuple([field(Ele, Key) || Key <- KeyL]) end, - group_by(KeyFun, ValFun, AggFun); -group_by(Key, ValFun, AggFun) when is_atom(Key) -> - group_by(curry_field(Key), ValFun, AggFun); -group_by(KeyFun, Val, AggFun) when is_atom(Val) -> - group_by(KeyFun, curry_field(Val), AggFun); -group_by(KeyFun, ValFun, AggFun) -> + group_by(KeyFun, ValFun, AggFun, Limit); +group_by(Key, ValFun, AggFun, Limit) when is_atom(Key) -> + group_by(curry_field(Key), ValFun, AggFun, Limit); +group_by(KeyFun, Val, AggFun, Limit) when is_atom(Val) -> + group_by(KeyFun, curry_field(Val), AggFun, Limit); +group_by(KeyFun, ValFun, AggFun, Limit) -> FoldFun = fun(Ele, Acc) -> - Key = KeyFun(Ele), - Val = ValFun(Ele), - CurrVal = maps:get(Key, Acc, 0), - case AggFun(CurrVal, Val) of - 0 -> - Acc; - NewVal -> - maps:put(Key, NewVal, Acc) - end + case maps:size(Acc) =< Limit of + true -> + Key = KeyFun(Ele), + Val = ValFun(Ele), + CurrVal = maps:get(Key, Acc, 0), + case AggFun(CurrVal, Val) of + 0 -> + Acc; + NewVal -> + maps:put(Key, NewVal, Acc) + end; + false -> + throw({limit, Acc}) + end end, - ets:foldl(FoldFun, #{}, ?CSRT_ETS). + try + {ok, ets:foldl(FoldFun, #{}, ?CSRT_ETS)} + catch throw:{limit, Acc} -> + {limit, Acc} + end. %% Sorts largest first sorted(Map) when is_map(Map) ->
