This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git
The following commit(s) were added to refs/heads/main by this push:
new bbbbfc82d Improve b64url benchmarking
bbbbfc82d is described below
commit bbbbfc82de3a481c71419bca3e81a9fd0840d0af
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Dec 16 03:35:06 2025 -0500
Improve b64url benchmarking
Use erlperf as recommended in
https://www.erlang.org/doc/system/benchmarking.html
- Generate data in a more determinstic way and outside the main
benchmarking lop
- Benchmark encoding and decoding separately
- Benchmark a wide range of sizes
Issue: https://github.com/apache/couchdb/issues/5801
---
src/b64url/README.md | 51 ++++++++----
src/b64url/benchmark.sh | 22 +++++
src/b64url/test/benchmark.escript | 165 --------------------------------------
3 files changed, 58 insertions(+), 180 deletions(-)
diff --git a/src/b64url/README.md b/src/b64url/README.md
index c3a3497f2..f93b31a8d 100644
--- a/src/b64url/README.md
+++ b/src/b64url/README.md
@@ -14,24 +14,45 @@ decoding Base64 URL values:
## Performance
-This implementation is significantly faster than the Erlang version it replaced
-in CouchDB. The `benchmark.escript` file contains the original implementation
-(using regular expressions to replace unsafe characters in the output of the
-`base64` module) and can be used to compare the two for strings of various
-lengths. For example:
+This implementation is faster than the Erlang version in OTP 26-28,
+especially for larger binaries (1000+ bytes). To benchmark clone
+erlperf repo and run `./benchmark.sh` script. In the future, it's
+plausible Erlang OTP's base64 module may become faster than the NIF,
+due to improvements in the JIT capabilities but it's not there yet.
```
-ERL_LIBS=_build/default/lib/b64url/ ./test/benchmark.escript 4 10 100 30
-erl : 75491270 bytes / 30 seconds = 2516375.67 bps
-nif : 672299342 bytes / 30 seconds = 22409978.07 bps
-```
+./benchmark.sh
+
+[...]
+
+--- bytes: 100 -----
+Code || QPS Time Rel
+encode_otp_100 1 1613 Ki 620 ns 100%
+encode_nif_100 1 1391 Ki 719 ns 86%
+Code || QPS Time Rel
+decode_nif_100 1 1453 Ki 688 ns 100%
+decode_otp_100 1 1395 Ki 716 ns 96%
+
+[...]
-This test invocation spawns four workers that generate random strings between
10
-and 100 bytes in length and then perform an encode/decode on them in a tight
-loop for 30 seconds, and then reports the aggregate encoded data volume. Note
-that the generator overhead (`crypto:strong_rand_bytes/1`) is included in these
-results, so the relative difference in encoder throughput is rather larger than
-what's reported here.
+--- bytes: 1000 -----
+Code || QPS Time Rel
+encode_nif_1000 1 369 Ki 2711 ns 100%
+encode_otp_1000 1 204 Ki 4904 ns 55%
+Code || QPS Time Rel
+decode_nif_1000 1 455 Ki 2196 ns 100%
+decode_otp_1000 1 178 Ki 5612 ns 39%
+
+[...]
+
+--- bytes: 10000000 -----
+Code || QPS Time Rel
+encode_nif_10000000 1 45 22388 us 100%
+encode_otp_10000000 1 19 51724 us 43%
+Code || QPS Time Rel
+decode_nif_10000000 1 55 18078 us 100%
+decode_otp_10000000 1 17 60020 us 30%
+```
## Timeslice Consumption
diff --git a/src/b64url/benchmark.sh b/src/b64url/benchmark.sh
new file mode 100755
index 000000000..d7760ed2b
--- /dev/null
+++ b/src/b64url/benchmark.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Expects erlperf to be installed
+#
+# $ git clone https://github.com/max-au/erlperf.git
+# $ cd erlperf
+# $ rebar3 as prod escriptize
+# $ cd ..
+
+for i in 50 100 150 200 500 1000 5000 10000 50000 1000000 10000000; do
+ echo ""
+ echo "--- bytes: ${i} -----"
+ ERL_LIBS="." erlperf/erlperf -w 2 \
+ 'runner(Bin) -> b64url:encode(Bin).' --label "encode_nif_${i}" \
+ 'runner(Bin) -> base64:encode(Bin, #{mode => urlsafe, padding => false}).'
--label "encode_otp_${i}" \
+ --init_runner_all "rand:seed(default,{1,2,3}), rand:bytes(${i})."
+
+ ERL_LIBS="." erlperf/erlperf -w 2 \
+ 'runner(Enc) -> b64url:decode(Enc).' --label "decode_nif_${i}" \
+ 'runner(Enc) -> base64:decode(Enc, #{mode => urlsafe, padding => false}).'
--label "decode_otp_${i}" \
+ --init_runner_all "rand:seed(default,{1,2,3}),
b64url:encode(rand:bytes(round(${i} * (3/4))))."
+done
diff --git a/src/b64url/test/benchmark.escript
b/src/b64url/test/benchmark.escript
deleted file mode 100755
index 00a6f0dda..000000000
--- a/src/b64url/test/benchmark.escript
+++ /dev/null
@@ -1,165 +0,0 @@
-#!/usr/bin/env escript
-
--mode(compile).
-
-
--export([
- encode/1,
- decode/1,
- run_worker/1
-]).
-
-
--record(st, {
- parent,
- module,
- workers,
- minsize,
- maxsize,
- duration,
- total_bytes
-}).
-
-
-main([Workers0, MinSize0, MaxSize0, Duration0]) ->
- code:add_path("./ebin"),
- code:add_path("../ebin"),
- Workers = to_int(Workers0),
- MinSize = to_int(MinSize0),
- MaxSize = to_int(MaxSize0),
- Duration = to_int(Duration0),
- if Workers > 0 -> ok; true ->
- die("Worker count must be positive~n")
- end,
- if MinSize > 0 -> ok; true ->
- die("Minimum size must be positive.~n")
- end,
- if MaxSize > 0 -> ok; true ->
- die("Maximum size must be positive.~n")
- end,
- if MinSize < MaxSize -> ok; true ->
- die("Minimum size must be less than maximum size.~n")
- end,
- if Duration > 0 -> ok; true ->
- die("Duration must be positive.~n")
- end,
- St = #st{
- parent = self(),
- workers = Workers,
- minsize = MinSize,
- maxsize = MaxSize,
- duration = Duration
- },
- lists:foreach(fun(M) ->
- run_test(St#st{module=M})
- end, randomize([b64url, ?MODULE]));
-
-main(_) ->
- Args = [escript:script_name()],
- die("usage: ~s num_workers min_size max_size time_per_test~n", Args).
-
-
-run_test(St) ->
- Workers = spawn_workers(St#st.workers, St),
- start_workers(Workers),
- Results = wait_for_workers(Workers),
- report(St#st.module, St#st.duration, Results).
-
-
-start_workers(Pids) ->
- lists:foreach(fun(P) ->
- P ! start
- end, Pids).
-
-
-wait_for_workers(Pids) ->
- lists:map(fun(P) ->
- receive
- {P, TotalBytes} -> TotalBytes
- end
- end, Pids).
-
-
-report(Module, Duration, TotalByteList) ->
- ModDesc = case Module of
- ?MODULE -> "erl";
- b64url -> "nif"
- end,
- TotalBytes = lists:sum(TotalByteList),
- io:format("~s : ~14b bytes / ~3b seconds = ~14.2f bps~n", [
- ModDesc, TotalBytes, Duration, TotalBytes / Duration]).
-
-
-spawn_workers(NumWorkers, St) ->
- lists:map(fun(_) ->
- spawn_link(?MODULE, run_worker, [St])
- end, lists:seq(1, NumWorkers)).
-
-
-run_worker(St) ->
- receive
- start -> ok
- end,
- run_worker(St#st{total_bytes=0}, os:timestamp()).
-
-
-run_worker(St, Started) ->
- HasRun = timer:now_diff(os:timestamp(), Started),
- case HasRun div 1000000 > St#st.duration of
- true ->
- St#st.parent ! {self(), St#st.total_bytes};
- false ->
- NewSt = do_round_trip(St),
- run_worker(NewSt, Started)
- end.
-
-
-do_round_trip(St) ->
- Size = St#st.minsize + rand:uniform(St#st.maxsize - St#st.minsize),
- Data = crypto:strong_rand_bytes(Size),
- Encoded = (St#st.module):encode(Data),
- Data = (St#st.module):decode(Encoded),
- St#st{total_bytes=St#st.total_bytes+Size}.
-
-
-encode(Url) ->
- Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")),
- Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])),
- iolist_to_binary(re:replace(Url2, "\\+", "-", [global])).
-
-
-decode(Url64) ->
- Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]),
- Url2 = iolist_to_binary(
- re:replace(iolist_to_binary(Url1), "_", "/", [global])
- ),
- Padding = list_to_binary(lists:duplicate((4 - size(Url2) rem 4) rem 4,
$=)),
- base64:decode(<<Url2/binary, Padding/binary>>).
-
-randomize(List) ->
- List0 = [{rand:uniform(), L} || L <- List],
- List1 = lists:sort(List0),
- [L || {_, L} <- List1].
-
-
-to_int(Val) when is_integer(Val) ->
- Val;
-to_int(Val) when is_binary(Val) ->
- to_int(binary_to_list(Val));
-to_int(Val) when is_list(Val) ->
- try
- list_to_integer(Val)
- catch _:_ ->
- die("Invalid integer: ~w~n", [Val])
- end;
-to_int(Val) ->
- die("Invalid integer: ~w~n", [Val]).
-
-
-die(Message) ->
- die(Message, []).
-
-die(Format, Args) ->
- io:format(Format, Args),
- init:stop().
-