This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch fix-local-rev-parsing in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 2080aee65e2a347b43cc32e5ee3c18f5a99989b5 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Fri Nov 21 15:02:50 2025 -0500 Fix local doc rev parsing Previously, we applied the same "magic" hex decoding to revision ids for local docs as we did for regular docs. The "magic" is to detect the case of binaries with length 32 and assume they are hex-encoded, and then hex-decode them. When encoding them we did the opposite -- checked if they are 16 bytes long, and if, so we hex-encoded them. That's a nice optimization to half the storage needed for revs internally. However, the trick above doesn't really apply to local docs. For local docs revision ids have the format <<"0-$N">>. Where N is a decimal number. It starts at 1 with the first update, then gets bumped on every update. The `0` prefix though never changes, it's always `0`. Since regular (non-local) docs cannot have a revision depth of 0, it's only the case for local docs we can detect the case of encoding local doc revision and skip the magic hex encoding and decoding to remove the strange corner case. --- src/couch/src/couch_doc.erl | 22 ++++++++++---------- src/couch/test/eunit/couch_db_doc_tests.erl | 32 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 7ad0f76b6..0993b85dc 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -68,20 +68,20 @@ to_json_revisions(Options, Start, RevIds0) -> {<<"_revisions">>, {[ {<<"start">>, Start}, - {<<"ids">>, [revid_to_str(R) || R <- RevIds]} + {<<"ids">>, [revid_to_str(Start, R) || R <- RevIds]} ]}} ] end. -revid_to_str(RevId) when size(RevId) =:= 16 -> +revid_to_str(Start, RevId) when Start =/= 0, byte_size(RevId) =:= 16 -> couch_util:to_hex_bin(RevId); -revid_to_str(RevId) when is_binary(RevId) -> +revid_to_str(Start, RevId) when is_integer(Start), Start >= 0, is_binary(RevId) -> RevId; -revid_to_str(RevId) when is_list(RevId) -> +revid_to_str(Start, RevId) when is_integer(Start), Start >= 0, is_list(RevId) -> list_to_binary(RevId). rev_to_str({Pos, RevId}) -> - <<(integer_to_binary(Pos))/binary, $-, (revid_to_str(RevId))/binary>>. + <<(integer_to_binary(Pos))/binary, $-, (revid_to_str(Pos, RevId))/binary>>. revs_to_strs([]) -> []; @@ -188,13 +188,13 @@ from_json_obj({Props}, DbName) -> from_json_obj(_Other, _) -> throw({bad_request, "Document must be a JSON object"}). -parse_revid(RevId) when is_binary(RevId), size(RevId) =:= 32 -> +parse_revid(Start, RevId) when Start =/= 0, is_binary(RevId), byte_size(RevId) =:= 32 -> binary:decode_hex(RevId); -parse_revid(RevId) when is_list(RevId), length(RevId) =:= 32 -> +parse_revid(Start, RevId) when Start =/= 0, is_list(RevId), length(RevId) =:= 32 -> binary:decode_hex(list_to_binary(RevId)); -parse_revid(RevId) when is_binary(RevId) -> +parse_revid(Start, RevId) when is_integer(Start), Start >= 0, is_binary(RevId) -> RevId; -parse_revid(RevId) when is_list(RevId) -> +parse_revid(Start, RevId) when is_integer(Start), Start >= 0, is_list(RevId) -> ?l2b(RevId). parse_rev(Rev) when is_binary(Rev) -> @@ -211,7 +211,7 @@ parse_rev(Rev) when is_list(Rev) -> {Pos, [$- | RevId]} -> try IntPos = list_to_integer(Pos), - {IntPos, parse_revid(RevId)} + {IntPos, parse_revid(IntPos, RevId)} catch error:badarg -> throw({bad_request, <<"Invalid rev format">>}) end; @@ -313,7 +313,7 @@ transfer_fields([{<<"_revisions">>, {Props}} | Rest], Doc, DbName) -> RevIds2 = lists:map( fun(RevId) -> try - parse_revid(RevId) + parse_revid(Start, RevId) catch error:function_clause -> throw({doc_validation, "RevId isn't a string"}); diff --git a/src/couch/test/eunit/couch_db_doc_tests.erl b/src/couch/test/eunit/couch_db_doc_tests.erl index dc1ac79e6..46a42fe2d 100644 --- a/src/couch/test/eunit/couch_db_doc_tests.erl +++ b/src/couch/test/eunit/couch_db_doc_tests.erl @@ -115,3 +115,35 @@ add_revisions(Db, DocId, Rev, N) -> Rev, lists:seq(1, N) ). + +rev_to_str_test() -> + ?assertEqual(<<"0-0">>, couch_doc:rev_to_str({0, "0"})), + ?assertEqual(<<"0-0">>, couch_doc:rev_to_str({0, <<"0">>})), + ?assertEqual(<<"1-a">>, couch_doc:rev_to_str({1, <<"a">>})), + Bytes = <<<<48>> || _ <- lists:seq(1, 16)>>, + ?assertEqual(<<"0-0000000000000000">>, couch_doc:rev_to_str({0, Bytes})), + ?assertEqual(<<"1-30303030303030303030303030303030">>, couch_doc:rev_to_str({1, Bytes})), + ?assertError(badarg, couch_doc:rev_to_str({a, b})), + ?assertError(function_clause, couch_doc:rev_to_str({-1, <<"x">>})), + ?assertError(function_clause, couch_doc:rev_to_str(foo)). + +parse_rev_test() -> + ?assertEqual({1, <<"x">>}, couch_doc:parse_rev("1-x")), + ?assertEqual({1, <<"x">>}, couch_doc:parse_rev(<<"1-x">>)), + ?assertEqual({0, <<"y">>}, couch_doc:parse_rev("0-y")), + ?assertEqual({1234567890, <<"abc">>}, couch_doc:parse_rev("1234567890-abc")), + ?assertEqual( + {0, <<"00000000000000000000000000000000">>}, + couch_doc:parse_rev("0-00000000000000000000000000000000") + ), + ?assertEqual( + {1, <<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>>}, + couch_doc:parse_rev("1-00000000000000000000000000000000") + ), + ?assertThrow({bad_request, _}, couch_doc:parse_rev(foo)), + ?assertThrow({bad_request, _}, couch_doc:parse_rev("x-y")), + ?assertThrow({bad_request, _}, couch_doc:parse_rev("x")), + ?assertThrow({bad_request, _}, couch_doc:parse_rev("0")), + ?assertThrow({bad_request, _}, couch_doc:parse_rev("1")), + ?assertThrow({bad_request, _}, couch_doc:parse_rev("-")), + ?assertThrow({bad_request, _}, couch_doc:parse_rev("-0-1")).
