This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch try-xxhash-for-couch-file
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 463f17fb3c37812d3e99d25ca5b8ddbd3781a0ef
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Sat May 6 02:20:43 2023 -0400

    Use xxHash for couch_file checksums
    
    Check xxhash first, since it's faster [1], and if that fails, check
    the slower md5 version.
    
    Bump a stats counter to indicate if there are still any md5 checksums
    found during normal cluster operation.
    
    Initially default to not writting xxHash checkums, only reading
    them. There is a config setting and tests to assert that it's possible
    to upgrade and downgrade.
    
    [1]
    
    Comparison of hashing a 4KB block (units are microseconds).
    ```
    ([email protected])20> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun 
(_) -> do_nothing_overhead end, lists:seq(1, 1000000)) end), (T/1000000.0).
    0.167425
    ([email protected])21> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun 
(_) -> exxhash:xxhash128(B) end, lists:seq(1, 1000000)) end), (T/1000000).
    0.770687
    ([email protected])22> f(T), {T, ok} = timer:tc(fun() -> lists:foreach(fun 
(_) -> crypto:hash(md5, B) end, lists:seq(1, 1000000)) end), (T/1000000).
    6.205445
    ```
---
 rel/overlay/etc/default.ini               |   5 ++
 src/couch/priv/stats_descriptions.cfg     |   4 +
 src/couch/src/couch_file.erl              | 113 ++++++++++++++++++-------
 src/couch/test/eunit/couch_file_tests.erl | 133 ++++++++++++++++++++++++++++++
 4 files changed, 226 insertions(+), 29 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 14b2a5362..2cc195d55 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -95,6 +95,11 @@ view_index_dir = {{view_index_dir}}
 ; Sets the log level for informational compaction related entries.
 ;compaction_log_level = info
 
+; Enable writting xxHash checksums in .couch files. The current
+; default is false. When the value is false both xxHash and legacy
+; checksums can be read and verified.
+;write_xxhash_checksums = false
+
 [purge]
 ; Allowed maximum number of documents in one purge request
 ;max_document_id_number = 100
diff --git a/src/couch/priv/stats_descriptions.cfg 
b/src/couch/priv/stats_descriptions.cfg
index 7c8fd94cb..dc426c387 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -290,6 +290,10 @@
     {type, histogram},
     {desc, <<"duration of validate_doc_update function calls">>}
 ]}.
+{[couchdb, legacy_checksums], [
+    {type, counter},
+    {desc, <<"number of legacy checksums found in couch_file instances">>}
+]}.
 {[pread, exceed_eof], [
     {type, counter},
     {desc, <<"number of the attempts to read beyond end of db file">>}
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index 514d4e3d9..98a615773 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -23,6 +23,8 @@
 -define(IS_OLD_STATE(S), is_pid(S#file.db_monitor)).
 -define(PREFIX_SIZE, 5).
 -define(DEFAULT_READ_COUNT, 1024).
+-define(WRITE_XXHASH_CHECKSUMS, generate_xxhash_checksums).
+-define(WRITE_XXHASH_CHECKSUMS_DEFAULT, false).
 
 -type block_id() :: non_neg_integer().
 -type location() :: non_neg_integer().
@@ -55,6 +57,10 @@
 %% helper functions
 -export([process_info/1]).
 
+% test helper functions
+-export([reset_checksum_persistent_term_config/0]).
+-export([reset_legacy_checksum_stats/0]).
+
 %%----------------------------------------------------------------------
 %% Args:   Valid Options are [create] and [create,overwrite].
 %%  Files are opened in read/write mode.
@@ -142,8 +148,8 @@ assemble_file_chunk(Bin) ->
     [<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin].
 
 assemble_file_chunk_and_checksum(Bin) ->
-    Md5 = couch_hash:md5_hash(Bin),
-    [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Md5, Bin].
+    Checksum = generate_checksum(Bin),
+    [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Checksum, Bin].
 
 %%----------------------------------------------------------------------
 %% Purpose: Reads a term from a file that was written with append_term
@@ -169,8 +175,8 @@ pread_binary(Fd, Pos) ->
 
 pread_iolist(Fd, Pos) ->
     case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of
-        {ok, IoList, Md5} ->
-            {ok, verify_md5(Fd, Pos, IoList, Md5)};
+        {ok, IoList, Checksum} ->
+            {ok, verify_checksum(Fd, Pos, IoList, Checksum)};
         Error ->
             Error
     end.
@@ -191,13 +197,13 @@ pread_binaries(Fd, PosList) ->
 
 pread_iolists(Fd, PosList) ->
     case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of
-        {ok, DataMd5s} ->
+        {ok, DataAndChecksums} ->
             Data = lists:zipwith(
-                fun(Pos, {IoList, Md5}) ->
-                    verify_md5(Fd, Pos, IoList, Md5)
+                fun(Pos, {IoList, Checksum}) ->
+                    verify_checksum(Fd, Pos, IoList, Checksum)
                 end,
                 PosList,
-                DataMd5s
+                DataAndChecksums
             ),
             {ok, Data};
         Error ->
@@ -400,9 +406,9 @@ read_header(Fd) ->
 
 write_header(Fd, Data) ->
     Bin = term_to_binary(Data),
-    Md5 = couch_hash:md5_hash(Bin),
+    Checksum = generate_checksum(Bin),
     % now we assemble the final header binary and write to disk
-    FinalBin = <<Md5/binary, Bin/binary>>,
+    FinalBin = <<Checksum/binary, Bin/binary>>,
     ioq:call(Fd, {write_header, FinalBin}, erlang:get(io_priority)).
 
 init_status_error(ReturnPid, Ref, Error) ->
@@ -504,11 +510,11 @@ handle_call({pread_iolist, Pos}, _From, File) ->
     update_read_timestamp(),
     {LenIolist, NextPos} = read_raw_iolist_int(File, Pos, 4),
     case iolist_to_binary(LenIolist) of
-        % an MD5-prefixed term
+        % an checksum-prefixed term
         <<1:1/integer, Len:31/integer>> ->
-            {Md5AndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16),
-            {Md5, IoList} = extract_md5(Md5AndIoList),
-            {reply, {ok, IoList, Md5}, File};
+            {ChecksumAndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 
16),
+            {Checksum, IoList} = extract_checksum(ChecksumAndIoList),
+            {reply, {ok, IoList, Checksum}, File};
         <<0:1/integer, Len:31/integer>> ->
             {Iolist, _} = read_raw_iolist_int(File, NextPos, Len),
             {reply, {ok, Iolist, <<>>}, File}
@@ -520,7 +526,7 @@ handle_call({pread_iolists, PosL}, _From, File) ->
     LocNums2 = lists:map(
         fun({LenIoList, NextPos}) ->
             case iolist_to_binary(LenIoList) of
-                % an MD5-prefixed term
+                % a checksum-prefixed term
                 <<1:1/integer, Len:31/integer>> ->
                     {NextPos, Len + 16};
                 <<0:1/integer, Len:31/integer>> ->
@@ -534,8 +540,8 @@ handle_call({pread_iolists, PosL}, _From, File) ->
         fun({LenIoList, _}, {IoList, _}) ->
             case iolist_to_binary(LenIoList) of
                 <<1:1/integer, _:31/integer>> ->
-                    {Md5, IoList} = extract_md5(IoList),
-                    {IoList, Md5};
+                    {Checksum, IoList} = extract_checksum(IoList),
+                    {IoList, Checksum};
                 <<0:1/integer, _:31/integer>> ->
                     {IoList, <<>>}
             end
@@ -674,9 +680,15 @@ load_header(Fd, Pos, HeaderLen, RestBlock) ->
                 {ok, Missing} = file:pread(Fd, ReadStart, ReadLen),
                 <<RestBlock/binary, Missing/binary>>
         end,
-    <<Md5Sig:16/binary, HeaderBin/binary>> =
+    <<Checksum:16/binary, HeaderBin/binary>> =
         iolist_to_binary(remove_block_prefixes(?PREFIX_SIZE, RawBin)),
-    Md5Sig = couch_hash:md5_hash(HeaderBin),
+    case exxhash:xxhash128(HeaderBin) of
+        Checksum ->
+            ok;
+        <<_/binary>> ->
+            Checksum = couch_hash:md5_hash(HeaderBin),
+            legacy_checksums_stats_update()
+    end,
     {ok, HeaderBin}.
 
 %% Read multiple block locations using a single file:pread/2.
@@ -779,10 +791,10 @@ get_pread_locnum(File, Pos, Len) ->
             {Pos, TotalBytes}
     end.
 
--spec extract_md5(iolist()) -> {binary(), iolist()}.
-extract_md5(FullIoList) ->
-    {Md5List, IoList} = split_iolist(FullIoList, 16, []),
-    {iolist_to_binary(Md5List), IoList}.
+-spec extract_checksum(iolist()) -> {binary(), iolist()}.
+extract_checksum(FullIoList) ->
+    {ChecksumList, IoList} = split_iolist(FullIoList, 16, []),
+    {iolist_to_binary(ChecksumList), IoList}.
 
 calculate_total_read_len(0, FinalLen) ->
     calculate_total_read_len(1, FinalLen) + 1;
@@ -852,15 +864,23 @@ monitored_by_pids() ->
     {monitored_by, PidsAndRefs} = process_info(self(), monitored_by),
     lists:filter(fun is_pid/1, PidsAndRefs).
 
-verify_md5(_Fd, _Pos, IoList, <<>>) ->
+verify_checksum(_Fd, _Pos, IoList, <<>>) ->
     IoList;
-verify_md5(Fd, Pos, IoList, Md5) ->
-    case couch_hash:md5_hash(IoList) of
-        Md5 -> IoList;
-        _ -> report_md5_error(Fd, Pos)
+verify_checksum(Fd, Pos, IoList, Checksum) ->
+    case exxhash:xxhash128(iolist_to_binary(IoList)) of
+        Checksum ->
+            IoList;
+        <<_/binary>> ->
+            case couch_hash:md5_hash(IoList) of
+                Checksum ->
+                    legacy_checksums_stats_update(),
+                    IoList;
+                _ ->
+                    report_checksum_error(Fd, Pos)
+            end
     end.
 
-report_md5_error(Fd, Pos) ->
+report_checksum_error(Fd, Pos) ->
     couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]),
     exit({file_corruption, <<"file corruption">>}).
 
@@ -906,6 +926,41 @@ reset_eof(#file{} = File) ->
     {ok, Eof} = file:position(File#file.fd, eof),
     File#file{eof = Eof}.
 
+-spec generate_checksum(binary()) -> <<_:128>>.
+generate_checksum(Bin) when is_binary(Bin) ->
+    case generate_xxhash_checksums() of
+        true -> <<_:128>> = exxhash:xxhash128(Bin);
+        false -> <<_:128>> = couch_hash:md5_hash(Bin)
+    end.
+
+legacy_checksums_stats_update() ->
+    % Bump stats only if we're writing new checksums.
+    case generate_xxhash_checksums() of
+        true -> couch_stats:increment_counter([couchdb, legacy_checksums]);
+        false -> ok
+    end.
+
+reset_legacy_checksum_stats() ->
+    Counter = couch_stats:sample([couchdb, legacy_checksums]),
+    couch_stats:decrement_counter([couchdb, legacy_checksums], Counter).
+
+reset_checksum_persistent_term_config() ->
+    persistent_term:erase({?MODULE, ?WRITE_XXHASH_CHECKSUMS}).
+
+generate_xxhash_checksums() ->
+    % Caching the config value here as we'd need to call this per file chunk
+    % and also from various processes (not just couch_file pids). Node must be
+    % restarted for the new value to take effect.
+    case persistent_term:get({?MODULE, ?WRITE_XXHASH_CHECKSUMS}, not_cached) of
+        not_cached ->
+            Default = ?WRITE_XXHASH_CHECKSUMS_DEFAULT,
+            Val = config:get_boolean("couchdb", "write_xxhash_checksums", 
Default),
+            persistent_term:put({?MODULE, ?WRITE_XXHASH_CHECKSUMS}, Val),
+            Val;
+        Val when is_boolean(Val) ->
+            Val
+    end.
+
 -ifdef(TEST).
 -include_lib("couch/include/couch_eunit.hrl").
 
diff --git a/src/couch/test/eunit/couch_file_tests.erl 
b/src/couch/test/eunit/couch_file_tests.erl
index 1b54cd70e..d9b859587 100644
--- a/src/couch/test/eunit/couch_file_tests.erl
+++ b/src/couch/test/eunit/couch_file_tests.erl
@@ -551,3 +551,136 @@ fake_fsync_fd() ->
         {'$gen_call', From, sync} ->
             gen:reply(From, {error, eio})
     end.
+
+checksum_test_() ->
+    {
+        foreach,
+        fun setup_checksum/0,
+        fun teardown_checksum/1,
+        [
+            ?TDEF_FE(t_write_read_xxhash_checksums),
+            ?TDEF_FE(t_downgrade_xxhash_checksums),
+            ?TDEF_FE(t_read_legacy_checksums_after_upgrade)
+        ]
+    }.
+
+setup_checksum() ->
+    Path = ?tempfile(),
+    Ctx = test_util:start_couch(),
+    config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false),
+    {Ctx, Path}.
+
+teardown_checksum({Ctx, Path}) ->
+    file:delete(Path),
+    meck:unload(),
+    test_util:stop_couch(Ctx),
+    couch_file:reset_checksum_persistent_term_config().
+
+t_write_read_xxhash_checksums({_Ctx, Path}) ->
+    enable_xxhash(),
+
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Header = header,
+    ok = couch_file:write_header(Fd, Header),
+    Bin = <<"bin">>,
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    couch_file:close(Fd),
+
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header1} = couch_file:read_header(Fd1),
+    ?assertEqual(Header, Header1),
+    {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+    ?assertEqual(Bin, Bin1),
+    ?assertEqual(0, legacy_stats()),
+    couch_file:close(Fd1).
+
+t_downgrade_xxhash_checksums({_Ctx, Path}) ->
+    % We're in the future and writting xxhash checkums by default
+    enable_xxhash(),
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Header = header,
+    ok = couch_file:write_header(Fd, Header),
+    Bin = <<"bin">>,
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    couch_file:close(Fd),
+
+    % The future was broken, we travel back, but still know how to
+    % interpret future checksums without crashing
+    disable_xxhash(),
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header1} = couch_file:read_header(Fd1),
+    ?assertEqual(Header, Header1),
+    {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+    ?assertEqual(Bin, Bin1),
+
+    % We'll write some legacy checksums to the file and then ensure
+    % we can read both legacy and the new ones
+    OtherBin = <<"otherbin">>,
+    OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin),
+    {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk),
+    couch_file:close(Fd1),
+
+    {ok, Fd2} = couch_file:open(Path, []),
+    {ok, Header2} = couch_file:read_header(Fd2),
+    ?assertEqual(Header, Header2),
+    {ok, Bin2} = couch_file:pread_binary(Fd2, Pos),
+    {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos),
+    ?assertEqual(Bin, Bin2),
+    ?assertEqual(OtherBin, OtherBin1),
+    couch_file:close(Fd2).
+
+t_read_legacy_checksums_after_upgrade({_Ctx, Path}) ->
+    % We're in the past and writting legacy checkums by default
+    disable_xxhash(),
+    {ok, Fd} = couch_file:open(Path, [create]),
+    Header = header,
+    ok = couch_file:write_header(Fd, Header),
+    Bin = <<"bin">>,
+    Chunk = couch_file:assemble_file_chunk_and_checksum(Bin),
+    {ok, Pos, _} = couch_file:append_raw_chunk(Fd, Chunk),
+    couch_file:close(Fd),
+
+    % We upgrade and xxhash checksums are not the default, but we can
+    % still read legacy checksums.
+    enable_xxhash(),
+    {ok, Fd1} = couch_file:open(Path, []),
+    {ok, Header1} = couch_file:read_header(Fd1),
+    ?assertEqual(Header, Header1),
+    {ok, Bin1} = couch_file:pread_binary(Fd1, Pos),
+    ?assertEqual(Bin, Bin1),
+    % one header, one chunk
+    ?assertEqual(2, legacy_stats()),
+
+    % We'll write some new checksums to the file and then ensure
+    % we can read both legacy and the new ones
+    OtherBin = <<"otherbin">>,
+    OtherChunk = couch_file:assemble_file_chunk_and_checksum(OtherBin),
+    {ok, OtherPos, _} = couch_file:append_raw_chunk(Fd1, OtherChunk),
+    couch_file:close(Fd1),
+
+    couch_stats:decrement_counter([couchdb, legacy_checksums], legacy_stats()),
+    {ok, Fd2} = couch_file:open(Path, []),
+    {ok, Header2} = couch_file:read_header(Fd2),
+    ?assertEqual(Header, Header2),
+    {ok, Bin2} = couch_file:pread_binary(Fd2, Pos),
+    {ok, OtherBin1} = couch_file:pread_binary(Fd2, OtherPos),
+    ?assertEqual(Bin, Bin2),
+    ?assertEqual(OtherBin, OtherBin1),
+    % one header, legacy chunk, not counting new chunk
+    ?assertEqual(2, legacy_stats()),
+    couch_file:close(Fd2).
+
+enable_xxhash() ->
+    couch_file:reset_checksum_persistent_term_config(),
+    couch_file:reset_legacy_checksum_stats(),
+    config:set("couchdb", "write_xxhash_checksums", "true", _Persist = false).
+
+disable_xxhash() ->
+    couch_file:reset_checksum_persistent_term_config(),
+    couch_file:reset_legacy_checksum_stats(),
+    config:set("couchdb", "write_xxhash_checksums", "false", _Persist = false).
+
+legacy_stats() ->
+    couch_stats:sample([couchdb, legacy_checksums]).

Reply via email to