This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch cache-props-and-other-header-terms
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 0b6c01ea901ec6459b37a4fb9ad6b30d4255e2b3
Author: Nick Vatamaniuc <vatam...@gmail.com>
AuthorDate: Wed Aug 6 00:58:57 2025 -0400

    Store frequently accessed header metadata terms in a cache
    
    Previously, on every write and _all_docs and other call the props term was
    always loaded from disk, decompressed and run through binary_to_term. The 
term
    is small, and likely will be in the page cache, but still we'd have to go 
back
    to the disk to fetch it, in the worst case.
    
    To fix it, load the terms once at startup and cache them into memory. This 
way
    we get the best of both worlds: a small header record, since we're using a
    pointer to a term, and fast interactive operations, since we don't have to 
do
    extra reads and deserializations.
---
 src/couch/src/couch_bt_engine.erl | 66 ++++++++++++++++++++++++++++++---------
 src/couch/src/couch_bt_engine.hrl |  3 +-
 2 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/src/couch/src/couch_bt_engine.erl 
b/src/couch/src/couch_bt_engine.erl
index 180c99e36..56932c2b4 100644
--- a/src/couch/src/couch_bt_engine.erl
+++ b/src/couch/src/couch_bt_engine.erl
@@ -122,6 +122,12 @@
 -define(PURGE_INFOS_LIMIT, purge_infos_limit).
 -define(COMPACTED_SEQ, compacted_seq).
 
+% Some frequently accessed keys we store in a cache
+-define(CACHED_KEYS, #{
+    ?SECURITY_PTR => true,
+    ?PROPS_PTR => true
+}).
+
 exists(FilePath) ->
     case is_file(FilePath) of
         true ->
@@ -544,10 +550,8 @@ commit_data(St) ->
         true ->
             ok = couch_file:write_header(Fd, NewHeader, [sync]),
             couch_stats:increment_counter([couchdb, commits]),
-            {ok, St#st{
-                header = NewHeader,
-                needs_commit = false
-            }};
+            St1 = St#st{header = NewHeader, needs_commit = false},
+            {ok, update_cache(St1)};
         false ->
             {ok, St}
     end.
@@ -884,7 +888,7 @@ init_state(FilePath, Fd, Header0, Options) ->
             {ok, NewSt} = commit_data(St#st{needs_commit = true}),
             NewSt;
         false ->
-            St
+            update_cache(St)
     end.
 
 update_header(St, Header) ->
@@ -1190,23 +1194,57 @@ is_file(Path) ->
         _ -> false
     end.
 
-get_header_term(#st{header = Header} = St, Key, Default) when is_atom(Key) ->
-    case couch_bt_engine_header:get(Header, Key) of
-        undefined ->
+get_header_term(#st{} = St, Key, Default) when is_atom(Key) ->
+    #st{header = Header, cache = Cache} = St,
+    case {couch_bt_engine_header:get(Header, Key), Cache} of
+        {undefined, #{}} ->
             Default;
-        Pointer when is_integer(Pointer) ->
+        {Pointer, #{Key := {Pointer, Term}}} when is_integer(Pointer) ->
+            % Explicitly match on both the key and the pointer to make sure
+            % we never return stale data. If we don't match somehow we'd
+            % still fall through to the next clause and just read from disk
+            Term;
+        {Pointer, #{}} when is_integer(Pointer) ->
             {ok, Term} = couch_file:pread_term(St#st.fd, Pointer),
             Term
     end.
 
 set_header_term(#st{} = St, Key, Term) when is_atom(Key) ->
-    #st{fd = Fd, header = Header, compression = Compression} = St,
-    St#st{
-        header = set_header_term(Fd, Header, Key, Term, Compression),
-        needs_commit = true
-    }.
+    #st{fd = Fd, header = Header, compression = Compression, cache = Cache} = 
St,
+    Header1 = set_header_term(Fd, Header, Key, Term, Compression),
+    Pointer = couch_bt_engine_header:get(Header1, Key),
+    Cache1 = Cache#{Key => {Pointer, Term}},
+    St#st{header = Header1, cache = Cache1, needs_commit = true}.
 
 set_header_term(Fd, Header, Key, Term, Compression) when is_atom(Key) ->
     TermOpts = [{compression, Compression}],
     {ok, Ptr, _} = couch_file:append_term(Fd, Term, TermOpts),
     couch_bt_engine_header:set(Header, Key, Ptr).
+
+% This is a cache for header terms referenced by file pointers. If they are not
+% cached, and are used for interactive requests, then on each request we'd have
+% to do these extra calls: file read, decompression and binary_to_term.
+%
+% This cache lives in the engine state, which lives in the #db{} record, so it
+% will be shared and available to all the client requests.
+
+update_cache(#st{} = St) ->
+    update_cache(#st{} = St, maps:keys(?CACHED_KEYS)).
+
+update_cache(#st{} = St, []) ->
+    St;
+update_cache(#st{} = St, [Key | Keys]) ->
+    #st{header = Header, cache = Cache} = St,
+    case couch_bt_engine_header:get(Header, Key) of
+        Pointer when is_integer(Pointer) ->
+            case Cache of
+                #{Key := {Pointer, _}} ->
+                    update_cache(St, Keys);
+                #{} ->
+                    {ok, Term} = couch_file:pread_term(St#st.fd, Pointer),
+                    Cache1 = Cache#{Key => {Pointer, Term}},
+                    update_cache(St#st{cache = Cache1}, Keys)
+            end;
+        _ ->
+            update_cache(St#st{cache = maps:remove(Key, Cache)}, Keys)
+    end.
diff --git a/src/couch/src/couch_bt_engine.hrl 
b/src/couch/src/couch_bt_engine.hrl
index e3c1d4983..c16cd8719 100644
--- a/src/couch/src/couch_bt_engine.hrl
+++ b/src/couch/src/couch_bt_engine.hrl
@@ -14,8 +14,7 @@
     filepath,
     fd,
     fd_monitor,
-    % deprecated but keeping it here to avoid altering the record size
-    fsync_options_deprecated,
+    cache = #{},
     header,
     needs_commit,
     id_tree,

Reply via email to