This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 322fcf0c0269934bf78bab5f3b0e5961030ed462
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Feb 17 18:08:03 2026 -0500

    Switch to hibernate_after
    
    When debugging processes getting stuck in hibernation bug [1] a few 
benchmarks
    showed that hibernation can be pretty expensive. I saw 20% or so reduction 
in
    latency in couch_work_queue if we hibernate after every single item 
insertion.
    
    Erlang documents warn about this [2]:
    
    > Use this feature with care, as hibernation implies at least two garbage
    collections (when hibernating and shortly after waking up) and is not 
something
    you want to do between each call to a busy server.
    
    In a few places like the `couch_work_queue` and `couch_db_updater` we did
    exactly that. However, since we added that more Erlang/OTP implemented a new
    `gen_server` option - `{hibernate_after, Timeout}`. It will trigger 
hibernation
    after an idle time. That seems ideal for us - it keeps expensive hibernation
    out of the main data path, as docs warn us about, but once the server goes 
idle
    we still get to run it to dereference any ref binaries.
    
    Since we encountered the recent hibernation bug [1] also add an option to
    disable it altogether, just to have a way to mitigate the issue when 
running on
    OTP 27 and 28 before the fix is out.
    
    [1] https://github.com/erlang/otp/issues/10651
    [2] https://www.erlang.org/doc/apps/stdlib/gen_server.html
    [3] 
https://github.com/apache/couchdb/commit/d9eb87f60ac8328afcca25cf6c7aae53b395b089
---
 rel/overlay/etc/default.ini               | 17 +++++++
 src/couch/src/couch_db_updater.erl        |  7 +--
 src/couch/src/couch_stream.erl            | 23 +++++----
 src/couch/src/couch_util.erl              | 13 +++++
 src/couch/src/couch_work_queue.erl        | 18 +++----
 src/couch/test/eunit/couch_util_tests.erl | 80 +++++++++++++++++++++++++++++++
 src/rexi/src/rexi_buffer.erl              | 10 ++--
 7 files changed, 138 insertions(+), 30 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index d599656b1..6fbc99f58 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -1270,3 +1270,20 @@ url = {{nouveau_url}}
 ; periodically reload configuration from file.
 ; Set to infinity to disable.
 ;auto_reload_secs = infinity
+
+[hibernate_after]
+; Some processes which handle a large number of referenced binaries can benefit
+; from hibernating periodically, so they can run a complete garbage collection
+; and dereference those binaries. This section configures idle hibernation
+; timeouts for some of those processes. The value is milliseconds or
+; "infinity". Setting the value to "infinity" disables hibernation. The option
+; may be used as a mitigation strategy when running with earlier OTP 27/28
+; versionss which had a bug [1] which prevented processes from waking up from
+; hibernation.
+;
+; [1] https://github.com/erlang/otp/issues/10651
+
+;rexi_buffer = 5000
+;couch_stream = 5000
+;couch_db_updater = 5000
+;couch_work_queue = 5000
diff --git a/src/couch/src/couch_db_updater.erl 
b/src/couch/src/couch_db_updater.erl
index 8394cd5a3..0d89c4750 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -49,7 +49,8 @@ init({Engine, DbName, FilePath, Options0}) ->
         % couch_db:validate_doc_update, which loads them lazily.
         NewDb = Db#db{main_pid = self()},
         proc_lib:init_ack({ok, NewDb}),
-        gen_server:enter_loop(?MODULE, [], NewDb)
+        GenOpts = couch_util:hibernate_after(?MODULE),
+        gen_server:enter_loop(?MODULE, GenOpts, NewDb)
     catch
         throw:InitError ->
             proc_lib:init_ack(InitError)
@@ -220,11 +221,11 @@ handle_info(
                     false ->
                         Db2
                 end,
-            {noreply, Db3, hibernate}
+            {noreply, Db3}
     catch
         throw:retry ->
             [catch (ClientPid ! {retry, self()}) || ClientPid <- Clients],
-            {noreply, Db, hibernate}
+            {noreply, Db}
     end;
 handle_info({'EXIT', _Pid, normal}, Db) ->
     {noreply, Db};
diff --git a/src/couch/src/couch_stream.erl b/src/couch/src/couch_stream.erl
index 0e4ccdae6..f3f0bf190 100644
--- a/src/couch/src/couch_stream.erl
+++ b/src/couch/src/couch_stream.erl
@@ -58,7 +58,8 @@ open({_StreamEngine, _StreamEngineState} = Engine) ->
     open(Engine, []).
 
 open({_StreamEngine, _StreamEngineState} = Engine, Options) ->
-    gen_server:start_link(?MODULE, {Engine, self(), erlang:get(io_priority), 
Options}, []).
+    GenOpts = couch_util:hibernate_after(?MODULE),
+    gen_server:start_link(?MODULE, {Engine, self(), erlang:get(io_priority), 
Options}, GenOpts).
 
 close(Pid) ->
     gen_server:call(Pid, close, infinity).
@@ -223,17 +224,15 @@ handle_call({write, Bin}, _From, Stream) ->
                     Md5_2 = couch_hash:md5_hash_update(Md5, WriteBin2)
             end,
 
-            {reply, ok,
-                Stream#stream{
-                    engine = NewEngine,
-                    written_len = WrittenLen2,
-                    buffer_list = [],
-                    buffer_len = 0,
-                    md5 = Md5_2,
-                    identity_md5 = IdenMd5_2,
-                    identity_len = IdenLen + BinSize
-                },
-                hibernate};
+            {reply, ok, Stream#stream{
+                engine = NewEngine,
+                written_len = WrittenLen2,
+                buffer_list = [],
+                buffer_len = 0,
+                md5 = Md5_2,
+                identity_md5 = IdenMd5_2,
+                identity_len = IdenLen + BinSize
+            }};
         true ->
             {reply, ok, Stream#stream{
                 buffer_list = [Bin | Buffer],
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index d93aaebd6..8ee165666 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -46,6 +46,7 @@
 -export([remove_sensitive_data/1]).
 -export([ejson_to_map/1]).
 -export([new_set/0, set_from_list/1]).
+-export([hibernate_after/1]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -64,6 +65,8 @@
     <<"feature_flags">>
 ]).
 
+-define(DEFAULT_HIBERNATE_AFTER, 5000).
+
 priv_dir() ->
     case code:priv_dir(couch) of
         {error, bad_name} ->
@@ -806,3 +809,13 @@ new_set() ->
 
 set_from_list(KVs) ->
     sets:from_list(KVs, [{version, 2}]).
+
+hibernate_after(Module) when is_atom(Module) ->
+    Key = atom_to_list(Module),
+    Default = ?DEFAULT_HIBERNATE_AFTER,
+    case config:get_integer_or_infinity("hibernate_after", Key, Default) of
+        infinity ->
+            [];
+        Timeout when is_integer(Timeout) ->
+            [{hibernate_after, Timeout}]
+    end.
diff --git a/src/couch/src/couch_work_queue.erl 
b/src/couch/src/couch_work_queue.erl
index 3c6ffeaf8..6ec9d796c 100644
--- a/src/couch/src/couch_work_queue.erl
+++ b/src/couch/src/couch_work_queue.erl
@@ -34,7 +34,8 @@
 }).
 
 new(Options) ->
-    gen_server:start_link(couch_work_queue, Options, []).
+    GenOpts = couch_util:hibernate_after(?MODULE),
+    gen_server:start_link(couch_work_queue, Options, GenOpts).
 
 queue(Wq, Item) when is_binary(Item) ->
     gen_server:call(Wq, {queue, Item, byte_size(Item)}, infinity);
@@ -73,7 +74,7 @@ init(Options) ->
         max_size = couch_util:get_value(max_size, Options, nil),
         max_items = couch_util:get_value(max_items, Options, nil)
     },
-    {ok, Q, hibernate}.
+    {ok, Q}.
 
 terminate(_Reason, #q{worker = undefined}) ->
     ok;
@@ -87,18 +88,13 @@ handle_call({queue, Item, Size}, From, #q{worker = 
undefined} = Q0) ->
         items = Q0#q.items + 1,
         queue = queue:in({Item, Size}, Q0#q.queue)
     },
-    case
-        (Q#q.size >= Q#q.max_size) orelse
-            (Q#q.items >= Q#q.max_items)
-    of
-        true ->
-            {noreply, Q#q{blocked = [From | Q#q.blocked]}, hibernate};
-        false ->
-            {reply, ok, Q, hibernate}
+    case (Q#q.size >= Q#q.max_size) orelse (Q#q.items >= Q#q.max_items) of
+        true -> {noreply, Q#q{blocked = [From | Q#q.blocked]}};
+        false -> {reply, ok, Q}
     end;
 handle_call({queue, Item, _}, _From, #q{worker = {W, _Max}} = Q) ->
     gen_server:reply(W, {ok, [Item]}),
-    {reply, ok, Q#q{worker = undefined}, hibernate};
+    {reply, ok, Q#q{worker = undefined}};
 handle_call({dequeue, _Max}, _From, #q{worker = {_, _}}) ->
     % Something went wrong - the same or a different worker is
     % trying to dequeue an item. We only allow one worker to wait
diff --git a/src/couch/test/eunit/couch_util_tests.erl 
b/src/couch/test/eunit/couch_util_tests.erl
index 5f8f1ce43..ff40a6fdd 100644
--- a/src/couch/test/eunit/couch_util_tests.erl
+++ b/src/couch/test/eunit/couch_util_tests.erl
@@ -206,3 +206,83 @@ ejson_to_map_test() ->
     ?assertEqual(#{a => 1, b => 2}, couch_util:ejson_to_map({[{b, 2}, {a, 
1}]})),
     ?assertEqual(#{<<"a">> => [1, #{}]}, couch_util:ejson_to_map({[{<<"a">>, 
[1, {[]}]}]})),
     ?assertEqual([#{true => 1}], couch_util:ejson_to_map([{[{true, 1}]}])).
+
+hibernate_after_test_() ->
+    {
+        foreach,
+        fun setup/0,
+        fun teardown/1,
+        [
+            ?TDEF_FE(t_hibernate),
+            ?TDEF_FE(t_do_not_hibernate)
+        ]
+    }.
+
+setup() ->
+    test_util:start_applications([config]).
+
+teardown(Ctx) ->
+    config:delete("hibernate_after", "couch_work_queue", false),
+    test_util:stop_applications(Ctx).
+
+t_hibernate(_) ->
+    ?assertEqual([{hibernate_after, 5000}], hibernate_cfg()),
+
+    % Set non-default value
+    config:set("hibernate_after", "couch_work_queue", "100", false),
+    ?assertEqual([{hibernate_after, 100}], hibernate_cfg()),
+
+    {ok, Q} = couch_work_queue:new([]),
+    % Enqueue item, creating gen_server activity
+    ok = couch_work_queue:queue(Q, potato),
+
+    % Assert that we eventually hibernate
+    wait_hibernate(Q),
+    {current_function, {_, F, _}} = process_info(Q, current_function),
+    % Note: different versions of OTP hibernate at a different function
+    ?assert(hibernate == F orelse loop_hibernate == F),
+
+    % We can wait wake up from hibernation and get back to work
+    ?assertEqual({ok, [potato]}, couch_work_queue:dequeue(Q)),
+
+    % Then we can get back into hibernatation
+    wait_hibernate(Q),
+    {current_function, {_, F, _}} = process_info(Q, current_function),
+    ?assert(hibernate == F orelse loop_hibernate == F),
+
+    couch_work_queue:close(Q).
+
+t_do_not_hibernate(_) ->
+    config:set("hibernate_after", "couch_work_queue", "infinity", false),
+    ?assertEqual([], hibernate_cfg()),
+
+    {ok, Q} = couch_work_queue:new([]),
+
+    % Enqueue item, creating some gen_server activity
+    ok = couch_work_queue:queue(Q, potato),
+
+    % Assert that we do not hibernate
+    timer:sleep(200),
+    {current_function, {_, F, _}} = process_info(Q, current_function),
+    ?assertNot(hibernate == F orelse loop_hibernate == F),
+
+    % The queue works as expected without hibernation
+    ?assertEqual({ok, [potato]}, couch_work_queue:dequeue(Q)),
+
+    couch_work_queue:close(Q).
+
+% Helper functions
+
+hibernate_cfg() ->
+    couch_util:hibernate_after(couch_work_queue).
+
+wait_hibernate(Pid) ->
+    WaitFun = fun() ->
+        {current_function, {M, F, _}} = process_info(Pid, current_function),
+        case {M, F} of
+            {erlang, hibernate} -> ok;
+            {gen_server, loop_hibernate} -> ok;
+            {_, _} -> wait
+        end
+    end,
+    test_util:wait(WaitFun).
diff --git a/src/rexi/src/rexi_buffer.erl b/src/rexi/src/rexi_buffer.erl
index 73bf3dfae..90f8bf2e6 100644
--- a/src/rexi/src/rexi_buffer.erl
+++ b/src/rexi/src/rexi_buffer.erl
@@ -42,7 +42,8 @@
 }).
 
 start_link(ServerId) ->
-    gen_server:start_link({local, ServerId}, ?MODULE, [ServerId], []).
+    GenOpts = couch_util:hibernate_after(?MODULE),
+    gen_server:start_link({local, ServerId}, ?MODULE, [ServerId], GenOpts).
 
 send(Dest, Msg) ->
     Server = list_to_atom(lists:concat([rexi_buffer, "_", get_node(Dest)])),
@@ -90,9 +91,10 @@ handle_info(timeout, #state{sender = nil, count = C} = 
State) when C > 0 ->
     counters:add(Counter, 1, -1),
     case erlang:send(Dest, Msg, [noconnect, nosuspend]) of
         ok when C =:= 1 ->
-            % We just sent the last queued messsage, we'll use this opportunity
-            % to hibernate the process and run a garbage collection
-            {noreply, NewState, hibernate};
+            % We just sent the last queued messsage. If we stay idle for a
+            % while, as configured via couch_util:hibernate_after/2 we'll go
+            % into hibernation.
+            {noreply, NewState};
         ok when C > 1 ->
             % Use a zero timeout to recurse into this handler ASAP
             {noreply, NewState, 0};

Reply via email to