This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch change-hibernation-strategy
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 74aed00c70993afb5216a3cab8cca58984ef067f
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Feb 17 18:08:03 2026 -0500

    Switch to hibernate_after
    
    When debugging processes getting stuck in hibernation bug [1] a few 
benchmarks
    showed that hibernation can be pretty expensive. I saw 20% or so reduction 
in
    latency in couch_work_queue if we hibernate after every single item 
insertion.
    
    Erlang documents warn about this [2]:
    
    > Use this feature with care, as hibernation implies at least two garbage
    collections (when hibernating and shortly after waking up) and is not 
something
    you want to do between each call to a busy server.
    
    In a few places like the `couch_work_queue` and `couch_db_updater` we did
    exactly that. However, since we added that more Erlang/OTP implemented a new
    `gen_server` option - `{hibernate_after, Timeout}`. It will trigger 
hibernation
    after an idle time. That seems ideal for us - it keeps expensive hibernation
    out of the main data path, as docs warn us about, but once the server goes 
idle
    we still get to run it to dereference any ref binaries.
    
    Since we encountered the recent hibernation bug [1] also add an option to
    disable it altogether, just to have a way to mitigate the issue when 
running on
    OTP 27 and 28 before the fix is out.
    
    [1] https://github.com/erlang/otp/issues/10651
    [2] https://www.erlang.org/doc/apps/stdlib/gen_server.html
    [3] 
https://github.com/apache/couchdb/commit/d9eb87f60ac8328afcca25cf6c7aae53b395b089
---
 rel/overlay/etc/default.ini               | 17 +++++++
 src/couch/src/couch_db_updater.erl        |  8 +--
 src/couch/src/couch_stream.erl            | 24 ++++-----
 src/couch/src/couch_util.erl              | 10 ++++
 src/couch/src/couch_work_queue.erl        | 20 ++++----
 src/couch/test/eunit/couch_util_tests.erl | 81 +++++++++++++++++++++++++++++++
 src/rexi/src/rexi_buffer.erl              | 11 +++--
 7 files changed, 141 insertions(+), 30 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index d599656b1..34d902118 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -1270,3 +1270,20 @@ url = {{nouveau_url}}
 ; periodically reload configuration from file.
 ; Set to infinity to disable.
 ;auto_reload_secs = infinity
+
+[hibernate_after]
+; Some processes which handle a large number of referenced binaries can benefit
+; from hibernating periodically, so they can run a complete garbage collection
+; and dereference those binaries. This section configures idle hibernation
+; timeouts for some of those processes. The value is milliseconds or
+; "infinity". Setting the value to "infinity" disables hibernation. The option
+; may be used as a mitigation strategy when running with earlier OTP 27/28
+; versionss which had a bug [1] which prevented processes from waking up from
+; hibernation.
+;
+; [1] https://github.com/erlang/otp/issues/10651
+
+;rexi_buffer = 1000
+;couch_stream = 1000
+;couch_db_updater = 1000
+;couch_work_queue = 1000
diff --git a/src/couch/src/couch_db_updater.erl 
b/src/couch/src/couch_db_updater.erl
index 8394cd5a3..b4b42001c 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -21,6 +21,7 @@
 
 % 10 GiB
 -define(DEFAULT_MAX_PARTITION_SIZE, 16#280000000).
+-define(DEFAULT_HIBERNATE_AFTER, 1000).
 
 -record(merge_acc, {
     revs_limit,
@@ -49,7 +50,8 @@ init({Engine, DbName, FilePath, Options0}) ->
         % couch_db:validate_doc_update, which loads them lazily.
         NewDb = Db#db{main_pid = self()},
         proc_lib:init_ack({ok, NewDb}),
-        gen_server:enter_loop(?MODULE, [], NewDb)
+        GenOpts = couch_util:hibernate_after(?MODULE, 
?DEFAULT_HIBERNATE_AFTER),
+        gen_server:enter_loop(?MODULE, GenOpts, NewDb)
     catch
         throw:InitError ->
             proc_lib:init_ack(InitError)
@@ -220,11 +222,11 @@ handle_info(
                     false ->
                         Db2
                 end,
-            {noreply, Db3, hibernate}
+            {noreply, Db3}
     catch
         throw:retry ->
             [catch (ClientPid ! {retry, self()}) || ClientPid <- Clients],
-            {noreply, Db, hibernate}
+            {noreply, Db}
     end;
 handle_info({'EXIT', _Pid, normal}, Db) ->
     {noreply, Db};
diff --git a/src/couch/src/couch_stream.erl b/src/couch/src/couch_stream.erl
index 0e4ccdae6..1b6787c1b 100644
--- a/src/couch/src/couch_stream.erl
+++ b/src/couch/src/couch_stream.erl
@@ -36,6 +36,7 @@
 ]).
 
 -define(DEFAULT_BUFFER_SIZE, 4096).
+-define(DEFAULT_HIBERNATE_AFTER, 1000).
 
 -record(stream, {
     engine,
@@ -58,7 +59,8 @@ open({_StreamEngine, _StreamEngineState} = Engine) ->
     open(Engine, []).
 
 open({_StreamEngine, _StreamEngineState} = Engine, Options) ->
-    gen_server:start_link(?MODULE, {Engine, self(), erlang:get(io_priority), 
Options}, []).
+    GenOpts = couch_util:hibernate_after(?MODULE, ?DEFAULT_HIBERNATE_AFTER),
+    gen_server:start_link(?MODULE, {Engine, self(), erlang:get(io_priority), 
Options}, GenOpts).
 
 close(Pid) ->
     gen_server:call(Pid, close, infinity).
@@ -223,17 +225,15 @@ handle_call({write, Bin}, _From, Stream) ->
                     Md5_2 = couch_hash:md5_hash_update(Md5, WriteBin2)
             end,
 
-            {reply, ok,
-                Stream#stream{
-                    engine = NewEngine,
-                    written_len = WrittenLen2,
-                    buffer_list = [],
-                    buffer_len = 0,
-                    md5 = Md5_2,
-                    identity_md5 = IdenMd5_2,
-                    identity_len = IdenLen + BinSize
-                },
-                hibernate};
+            {reply, ok, Stream#stream{
+                engine = NewEngine,
+                written_len = WrittenLen2,
+                buffer_list = [],
+                buffer_len = 0,
+                md5 = Md5_2,
+                identity_md5 = IdenMd5_2,
+                identity_len = IdenLen + BinSize
+            }};
         true ->
             {reply, ok, Stream#stream{
                 buffer_list = [Bin | Buffer],
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index d93aaebd6..09ca71e1e 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -46,6 +46,7 @@
 -export([remove_sensitive_data/1]).
 -export([ejson_to_map/1]).
 -export([new_set/0, set_from_list/1]).
+-export([hibernate_after/2]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -806,3 +807,12 @@ new_set() ->
 
 set_from_list(KVs) ->
     sets:from_list(KVs, [{version, 2}]).
+
+hibernate_after(Module, Default) when is_atom(Module) ->
+    Key = atom_to_list(Module),
+    case config:get_integer_or_infinity("hibernate_after", Key, Default) of
+        infinity ->
+            [];
+        Timeout when is_integer(Timeout) ->
+            [{hibernate_after, Timeout}]
+    end.
diff --git a/src/couch/src/couch_work_queue.erl 
b/src/couch/src/couch_work_queue.erl
index 3c6ffeaf8..0285ad671 100644
--- a/src/couch/src/couch_work_queue.erl
+++ b/src/couch/src/couch_work_queue.erl
@@ -22,6 +22,8 @@
 -export([init/1, terminate/2]).
 -export([handle_call/3, handle_cast/2, handle_info/2]).
 
+-define(DEFAULT_HIBERNATE_AFTER, 1000).
+
 -record(q, {
     queue = queue:new(),
     blocked = [],
@@ -34,7 +36,8 @@
 }).
 
 new(Options) ->
-    gen_server:start_link(couch_work_queue, Options, []).
+    GenOpts = couch_util:hibernate_after(?MODULE, ?DEFAULT_HIBERNATE_AFTER),
+    gen_server:start_link(couch_work_queue, Options, GenOpts).
 
 queue(Wq, Item) when is_binary(Item) ->
     gen_server:call(Wq, {queue, Item, byte_size(Item)}, infinity);
@@ -73,7 +76,7 @@ init(Options) ->
         max_size = couch_util:get_value(max_size, Options, nil),
         max_items = couch_util:get_value(max_items, Options, nil)
     },
-    {ok, Q, hibernate}.
+    {ok, Q}.
 
 terminate(_Reason, #q{worker = undefined}) ->
     ok;
@@ -87,18 +90,13 @@ handle_call({queue, Item, Size}, From, #q{worker = 
undefined} = Q0) ->
         items = Q0#q.items + 1,
         queue = queue:in({Item, Size}, Q0#q.queue)
     },
-    case
-        (Q#q.size >= Q#q.max_size) orelse
-            (Q#q.items >= Q#q.max_items)
-    of
-        true ->
-            {noreply, Q#q{blocked = [From | Q#q.blocked]}, hibernate};
-        false ->
-            {reply, ok, Q, hibernate}
+    case (Q#q.size >= Q#q.max_size) orelse (Q#q.items >= Q#q.max_items) of
+        true -> {noreply, Q#q{blocked = [From | Q#q.blocked]}};
+        false -> {reply, ok, Q}
     end;
 handle_call({queue, Item, _}, _From, #q{worker = {W, _Max}} = Q) ->
     gen_server:reply(W, {ok, [Item]}),
-    {reply, ok, Q#q{worker = undefined}, hibernate};
+    {reply, ok, Q#q{worker = undefined}};
 handle_call({dequeue, _Max}, _From, #q{worker = {_, _}}) ->
     % Something went wrong - the same or a different worker is
     % trying to dequeue an item. We only allow one worker to wait
diff --git a/src/couch/test/eunit/couch_util_tests.erl 
b/src/couch/test/eunit/couch_util_tests.erl
index 5f8f1ce43..a7d870d5e 100644
--- a/src/couch/test/eunit/couch_util_tests.erl
+++ b/src/couch/test/eunit/couch_util_tests.erl
@@ -206,3 +206,84 @@ ejson_to_map_test() ->
     ?assertEqual(#{a => 1, b => 2}, couch_util:ejson_to_map({[{b, 2}, {a, 
1}]})),
     ?assertEqual(#{<<"a">> => [1, #{}]}, couch_util:ejson_to_map({[{<<"a">>, 
[1, {[]}]}]})),
     ?assertEqual([#{true => 1}], couch_util:ejson_to_map([{[{true, 1}]}])).
+
+hibernate_after_test_() ->
+    {
+        foreach,
+        fun setup/0,
+        fun teardown/1,
+        [
+            ?TDEF_FE(t_hibernate),
+            ?TDEF_FE(t_do_not_hibernate)
+        ]
+    }.
+
+setup() ->
+    test_util:start_applications([config]).
+
+teardown(Ctx) ->
+    config:delete("hibernate_after", "couch_work_queue", false),
+    test_util:stop_applications(Ctx).
+
+t_hibernate(_) ->
+    ?assertEqual([{hibernate_after, 999}], hibernate_cfg(999)),
+    config:set("hibernate_after", "couch_work_queue", "10", false),
+    ?assertEqual([{hibernate_after, 10}], hibernate_cfg(10)),
+
+    {ok, Q} = couch_work_queue:new([]),
+
+    % Enqueue item, creating gen_server activity
+    ok = couch_work_queue:queue(Q, potato),
+
+    % Assert that we eventually hibernate
+    wait_hibernate(Q),
+    {current_function, {M, F, _}} = process_info(Q, current_function),
+    ?assertEqual({erlang, hibernate}, {M, F}),
+
+    % We can wait wake up from hibernation and get back to work
+    ?assertEqual({ok, [potato]}, couch_work_queue:dequeue(Q)),
+
+    % Then we can get back into hibernatation
+    wait_hibernate(Q),
+    {current_function, {M, F, _}} = process_info(Q, current_function),
+    ?assertEqual({erlang, hibernate}, {M, F}),
+
+    couch_work_queue:close(Q).
+
+t_do_not_hibernate(_) ->
+    ?assertEqual([], hibernate_cfg(infinity)),
+    config:set("hibernate_after", "couch_work_queue", "10", false),
+    ?assertEqual([{hibernate_after, 10}], hibernate_cfg(infinity)),
+    config:set("hibernate_after", "couch_work_queue", "infinity", false),
+    ?assertEqual([], hibernate_cfg(10)),
+
+    {ok, Q} = couch_work_queue:new([]),
+
+    % Enqueue item, creating gen_server activity
+    ok = couch_work_queue:queue(Q, potato),
+
+    % Assert that we do not hibernate
+    timer:sleep(100),
+    {current_function, {M, F, _}} = process_info(Q, current_function),
+    ?assertNotEqual({erlang, hibernate}, {M, F}),
+
+    % The queue works as expected without hibernation
+    ?assertEqual({ok, [potato]}, couch_work_queue:dequeue(Q)),
+
+    couch_work_queue:close(Q).
+
+
+% Helper functions
+
+hibernate_cfg(Default) ->
+    couch_util:hibernate_after(couch_work_queue, Default).
+
+wait_hibernate(Pid) ->
+    WaitFun = fun() ->
+        {current_function, {M, F, _}} = process_info(Pid, current_function),
+        case {M, F} of
+            {erlang, hibernate} -> ok;
+            {_, _} -> wait
+        end
+    end,
+    test_util:wait(WaitFun).
diff --git a/src/rexi/src/rexi_buffer.erl b/src/rexi/src/rexi_buffer.erl
index 73bf3dfae..c4b0c2360 100644
--- a/src/rexi/src/rexi_buffer.erl
+++ b/src/rexi/src/rexi_buffer.erl
@@ -31,6 +31,7 @@
 
 -define(BUFFER_COUNT_DEFAULT, 2000).
 -define(COUNTER, counter).
+-define(DEFAULT_HIBERNATE_AFTER, 1000).
 
 -record(state, {
     server_id,
@@ -42,7 +43,8 @@
 }).
 
 start_link(ServerId) ->
-    gen_server:start_link({local, ServerId}, ?MODULE, [ServerId], []).
+    GenOpts = couch_util:hibernate_after(?MODULE, ?DEFAULT_HIBERNATE_AFTER),
+    gen_server:start_link({local, ServerId}, ?MODULE, [ServerId], GenOpts).
 
 send(Dest, Msg) ->
     Server = list_to_atom(lists:concat([rexi_buffer, "_", get_node(Dest)])),
@@ -90,9 +92,10 @@ handle_info(timeout, #state{sender = nil, count = C} = 
State) when C > 0 ->
     counters:add(Counter, 1, -1),
     case erlang:send(Dest, Msg, [noconnect, nosuspend]) of
         ok when C =:= 1 ->
-            % We just sent the last queued messsage, we'll use this opportunity
-            % to hibernate the process and run a garbage collection
-            {noreply, NewState, hibernate};
+            % We just sent the last queued messsage. If we stay idle for a
+            % while, as configured via couch_util:hibernate_after/2 we'll go
+            % into hibernation.
+            {noreply, NewState};
         ok when C > 1 ->
             % Use a zero timeout to recurse into this handler ASAP
             {noreply, NewState, 0};

Reply via email to