[GitHub] [couchdb] nickva commented on a diff in pull request #4512: feat(mango): keys-only covering indexes

via GitHub Fri, 14 Apr 2023 09:12:50 -0700


nickva commented on code in PR #4512:
URL: https://github.com/apache/couchdb/pull/4512#discussion_r1166983131



##########
src/mango/src/mango_idx_view.erl:
##########
@@ -26,14 +26,20 @@
 
     indexable_fields/1,
     field_ranges/1,
-    field_ranges/2
+    field_ranges/2,
+
+    covers/2
 ]).
 
 -include_lib("couch/include/couch_db.hrl").
 -include("mango.hrl").
 -include("mango_idx.hrl").
 -include("mango_idx_view.hrl").
 
+-ifdef(TEST).
+-import(test_util, [as_selector/1]).
+-endif.
+

Review Comment:
   We usually try to avoid imports just as a style preference. Can add a small 
helper function like `indexable_as_selector` in the `TEST` define section 
perhaps.



##########
src/mango/src/mango_cursor_view.erl:
##########
@@ -30,25 +30,60 @@
 -include_lib("couch_mrview/include/couch_mrview.hrl").
 -include_lib("fabric/include/fabric.hrl").
 
+-include("mango.hrl").
 -include("mango_cursor.hrl").
 -include("mango_idx.hrl").
 -include("mango_idx_view.hrl").
 
 -define(HEARTBEAT_INTERVAL_IN_USEC, 4000000).
 
+-type cursor_options() :: [{term(), term()}].
+-type message() ::
+    {meta, _}
+    | {row, row_properties()}
+    | {execution_stats, shard_stats()}
+    | {stop, #cursor{}}
+    | {complete, #cursor{}}
+    | {error, any()}.
+
 % viewcbargs wraps up the arguments that view_cb uses into a single
 % entry in the mrargs.extra list. We use a Map to allow us to later
 % add fields without having old messages causing errors/crashes.
-viewcbargs_new(Selector, Fields) ->
+
+-type viewcbargs() ::
+    #{
+        selector => selector(),
+        fields => fields(),
+        covering_index => maybe(#idx{})
+    }.
+
+-spec viewcbargs_new(Selector, Fields, CoveringIndex) -> ViewCBArgs when
+    Selector :: selector(),
+    Fields :: fields(),
+    CoveringIndex :: maybe(#idx{}),
+    ViewCBArgs :: viewcbargs().
+viewcbargs_new(Selector, Fields, CoveringIndex) ->
     #{
         selector => Selector,
-        fields => Fields
+        fields => Fields,
+        covering_index => CoveringIndex
     }.
+
+-spec viewcbargs_get(Key, Args) -> maybe(term()) when

Review Comment:
   `maybe(term())` looks a bit odd since `undefined` is also a `term()` so it 
would be just `term()`?



##########
src/mango/src/mango_cursor_view.erl:
##########
@@ -73,13 +108,12 @@ create(Db, Indexes, Selector, Opts) ->
         bookmark = Bookmark
     }}.
 
-explain(Cursor) ->
-    #cursor{
-        opts = Opts
-    } = Cursor,
-
+-spec explain(#cursor{}) -> nonempty_list(term()).
+explain(#cursor{opts = Opts} = Cursor) ->
     BaseArgs = base_args(Cursor),
-    Args = apply_opts(Opts, BaseArgs),
+    Args0 = apply_opts(Opts, BaseArgs),
+    #cursor{index = Index, fields = Fields} = Cursor,

Review Comment:
   Could could match Index and Fields in the function head since we already 
match Opts there.
   
   An even cleaner way is to match on `#cursor{} = Cursor` in the function head.
   
   Then on the first function line:
   
   ```
   #cursor{index = Index, fields = Fields, opts = Opts} = Cursor
   ```



##########
src/mango/src/mango.hrl:
##########
@@ -12,6 +12,8 @@
 
 -define(MANGO_ERROR(R), throw({mango_error, ?MODULE, R})).
 
+-type maybe(A) :: A | undefined.

Review Comment:
   This one is a bit as A can be undefined also. I'd would expect Dialyzer 
should emit some kind of warning here but perhaps our Dialyzer options are not 
configured properly.



##########
src/mango/src/mango_cursor_view.erl:
##########
@@ -157,7 +197,8 @@ execute(#cursor{db = Db, index = Idx, execution_stats = 
Stats} = Cursor0, UserFu
             BaseArgs = base_args(Cursor),
             #cursor{opts = Opts, bookmark = Bookmark} = Cursor,
             Args0 = apply_opts(Opts, BaseArgs),
-            Args = mango_json_bookmark:update_args(Bookmark, Args0),
+            Args1 = consider_index_coverage(Idx, Cursor#cursor.fields, Args0),

Review Comment:
   Could just add `fields = Fields` to the `#cursor{...} = Cursor` match line 
above?



##########
src/mango/src/mango_cursor_view.erl:
##########
@@ -482,6 +564,32 @@ apply_opts([{_, _} | Rest], Args) ->
     % Ignore unknown options
     apply_opts(Rest, Args).
 
+-spec consider_index_coverage(#idx{}, fields(), #mrargs{}) -> #mrargs{}.
+consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs} = 
Args) ->
+    Covering = mango_idx_view:covers(Index, Fields),
+    Args0 = Args#mrargs{include_docs = IncludeDocs andalso (not Covering)},

Review Comment:
   It is a bit odd to have `Args` then `Args0`.
   
   We usually have an `Args0` when we need to pre-process the value and want to 
keep the rest of the code using `Args`.  Or if we keep updating the value Args, 
we'd have `Args, Args1, Args2, ...`
   
   So in this case we might have Arg0 in the function clause, then after 
updating it have `Args = Args0#mrargs{include_docs = ...}`



##########
src/mango/src/mango_cursor_view.erl:
##########
@@ -482,6 +564,32 @@ apply_opts([{_, _} | Rest], Args) ->
     % Ignore unknown options
     apply_opts(Rest, Args).
 
+-spec consider_index_coverage(#idx{}, fields(), #mrargs{}) -> #mrargs{}.
+consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs} = 
Args) ->

Review Comment:
   I had noticed that we always calls `apply_opts` then right after always 
`call consider_index_coverage`. It might make sense to have a function like 
`apply_cursor_opts` which then may still call `apply_opts` and 
`consider_index_coverage` (since both are tested separately they can stay that 
way to minimize the changes diff).
   
   



##########
src/mango/src/mango_cursor_view.erl:
##########
@@ -541,6 +651,746 @@ update_bookmark_keys(Cursor, _Props) ->
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 
+viewcbargs_test() ->
+    ViewCBArgs = viewcbargs_new(selector, fields, index),
+    ?assertEqual(selector, viewcbargs_get(selector, ViewCBArgs)),
+    ?assertEqual(fields, viewcbargs_get(fields, ViewCBArgs)),
+    ?assertEqual(index, viewcbargs_get(covering_index, ViewCBArgs)),
+    ?assertError(function_clause, viewcbargs_get(something_else, ViewCBArgs)).
+
+maybe_replace_max_json_test() ->
+    ?assertEqual([], maybe_replace_max_json([])),
+    ?assertEqual(<<"<MAX>">>, maybe_replace_max_json(?MAX_STR)),
+    ?assertEqual(
+        [val1, val2, <<"<MAX>">>, val3], maybe_replace_max_json([val1, val2, 
?MAX_JSON_OBJ, val3])
+    ),
+    ?assertEqual(something, maybe_replace_max_json(something)).
+
+base_opts_test() ->
+    Index =
+        #idx{
+            type = <<"json">>,
+            def = {[{<<"fields">>, {[{field1, undefined}, {field2, 
undefined}]}}]}
+        },
+    Fields = [field1, field2],
+    Cursor =
+        #cursor{
+            index = Index,
+            selector = selector,
+            fields = Fields,
+            ranges = [{'$gte', start_key, '$lte', end_key}]
+        },
+    Extra =
+        [
+            {callback, {mango_cursor_view, view_cb}},
+            {selector, selector},
+            {callback_args, #{
+                selector => selector,
+                fields => Fields,
+                covering_index => undefined
+            }},
+            {ignore_partition_query_limit, true}
+        ],
+    MRArgs =
+        #mrargs{
+            view_type = map,
+            reduce = false,
+            start_key = [start_key],
+            end_key = [end_key, ?MAX_JSON_OBJ],
+            include_docs = true,
+            extra = Extra
+        },
+    ?assertEqual(MRArgs, base_args(Cursor)).
+
+apply_opts_empty_test() ->
+    ?assertEqual(args, apply_opts([], args)).
+
+apply_opts_r_test() ->
+    Args = #mrargs{},
+    ArgsWithDocs = Args#mrargs{include_docs = true},
+    ?assertEqual(ArgsWithDocs, apply_opts([{r, "1"}], Args)),
+    ArgsWithoutDocs = Args#mrargs{include_docs = false},
+    ?assertEqual(ArgsWithoutDocs, apply_opts([{r, "3"}], Args)).
+
+apply_opts_conflicts_test() ->
+    Args = #mrargs{},
+    ArgsWithConflicts = Args#mrargs{conflicts = true},
+    ?assertEqual(ArgsWithConflicts, apply_opts([{conflicts, true}], Args)),
+    ArgsWithoutConflicts = Args#mrargs{conflicts = undefined},
+    ?assertEqual(ArgsWithoutConflicts, apply_opts([{conflicts, false}], Args)).
+
+apply_opts_sort_test() ->
+    Args =
+        #mrargs{
+            start_key = start_key,
+            start_key_docid = start_key_docid,
+            end_key = end_key,
+            end_key_docid = end_key_docid
+        },
+    ?assertEqual(Args, apply_opts([{sort, {[]}}], Args)),
+    ?assertEqual(Args, apply_opts([{sort, {[{field1, <<"asc">>}]}}], Args)),
+    ?assertEqual(Args, apply_opts([{sort, {[{field1, <<"asc">>}, {field2, 
<<"desc">>}]}}], Args)),
+    ArgsWithSort =
+        Args#mrargs{
+            direction = rev,
+            start_key = end_key,
+            start_key_docid = end_key_docid,
+            end_key = start_key,
+            end_key_docid = start_key_docid
+        },
+    ?assertEqual(ArgsWithSort, apply_opts([{sort, {[{field1, <<"desc">>}]}}], 
Args)).
+
+apply_opts_stale_test() ->
+    Args = #mrargs{},
+    ArgsWithStale = Args#mrargs{stable = true, update = false},
+    ?assertEqual(ArgsWithStale, apply_opts([{stale, ok}], Args)).
+
+apply_opts_stable_test() ->
+    Args = #mrargs{},
+    ArgsWithStable = Args#mrargs{stable = true},
+    ?assertEqual(ArgsWithStable, apply_opts([{stable, true}], Args)),
+    ArgsWithoutStable = Args#mrargs{stable = false},
+    ?assertEqual(ArgsWithoutStable, apply_opts([{stable, false}], Args)).
+
+apply_opts_update_test() ->
+    Args = #mrargs{},
+    ArgsWithUpdate = Args#mrargs{update = true},
+    ?assertEqual(ArgsWithUpdate, apply_opts([{update, true}], Args)),
+    ArgsWithoutUpdate = Args#mrargs{update = false},
+    ?assertEqual(ArgsWithoutUpdate, apply_opts([{update, false}], Args)).
+
+apply_opts_partition_test() ->
+    Args = #mrargs{},
+    ArgsWithPartition = Args#mrargs{extra = [{partition, <<"partition">>}]},
+    ?assertEqual(ArgsWithPartition, apply_opts([{partition, <<"partition">>}], 
Args)),
+    ArgsWithoutPartition = Args#mrargs{extra = []},
+    ?assertEqual(ArgsWithoutPartition, apply_opts([{partition, <<>>}], Args)).
+
+consider_index_coverage_positive_test() ->
+    Index =
+        #idx{
+            type = <<"json">>,
+            def = {[{<<"fields">>, {[]}}]}
+        },
+    Fields = [<<"_id">>],
+    MRArgs =
+        #mrargs{
+            include_docs = true,
+            extra = [{callback_args, viewcbargs_new(selector, fields, 
undefined)}]
+        },
+    MRArgsRef =
+        MRArgs#mrargs{
+            include_docs = false,
+            extra = [{callback_args, viewcbargs_new(selector, fields, Index)}]
+        },
+    ?assertEqual(MRArgsRef, consider_index_coverage(Index, Fields, MRArgs)),
+    MRArgs1 = MRArgs#mrargs{include_docs = false},
+    ?assertEqual(MRArgsRef, consider_index_coverage(Index, Fields, MRArgs1)).
+
+consider_index_coverage_negative_test() ->
+    Index = undefined,
+    Fields = all_fields,
+    MRArgs = #mrargs{include_docs = true},
+    ?assertEqual(MRArgs, consider_index_coverage(Index, Fields, MRArgs)),
+    MRArgs1 = #mrargs{include_docs = false},
+    ?assertEqual(MRArgs1, consider_index_coverage(Index, Fields, MRArgs1)),
+    % no extra attributes hence no effect
+    Index1 =
+        #idx{
+            type = <<"json">>,
+            def = {[{<<"fields">>, {[]}}]}
+        },
+    MRArgs2 = #mrargs{include_docs = false},
+    ?assertEqual(MRArgs1, consider_index_coverage(Index1, [<<"_id">>], 
MRArgs2)).
+
+derive_doc_from_index_test() ->
+    Index =
+        #idx{
+            type = <<"json">>,
+            def = {[{<<"fields">>, {[{<<"field1">>, undefined}, {<<"field2">>, 
undefined}]}}]}
+        },
+    DocId = doc_id,
+    Keys = [key1, key2],
+    ViewRow = #view_row{id = DocId, key = Keys},
+    Doc = {[{<<"_id">>, DocId}, {<<"field2">>, key2}, {<<"field1">>, key1}]},
+    ?assertEqual(Doc, derive_doc_from_index(Index, ViewRow)).
+
+composite_indexes_test() ->
+    ?assertEqual([], composite_indexes([], [])),
+    Index1 =
+        #idx{
+            type = <<"json">>,
+            def = {[{<<"fields">>, {[{field1, undefined}, {field2, 
undefined}]}}]}
+        },
+    Index2 =
+        #idx{
+            type = <<"json">>,
+            def = {[{<<"fields">>, {[{field1, undefined}, {field3, undefined}, 
{field4, range4}]}}]}
+        },
+    Index3 =
+        #idx{
+            type = <<"json">>,
+            def = {[{<<"fields">>, {[{field3, undefined}, {field4, 
undefined}]}}]}
+        },
+    Indexes = [Index1, Index2, Index3],
+    Ranges = [{field1, range1}, {field3, range3}, {field4, range4}],
+    Result = [
+        {Index3, [range3, range4], 1}, {Index2, [range1, range3, range4], 0}, 
{Index1, [range1], 2}
+    ],
+    ?assertEqual(Result, composite_indexes(Indexes, Ranges)).
+
+create_test() ->
+    Index = #idx{type = <<"json">>, def = {[{<<"fields">>, {[]}}]}},
+    Indexes = [Index],
+    Ranges = [],
+    Selector = {[]},
+    Options = [{limit, limit}, {skip, skip}, {fields, fields}, {bookmark, 
bookmark}],
+    Cursor =
+        #cursor{
+            db = db,
+            index = Index,
+            ranges = Ranges,
+            selector = Selector,
+            opts = Options,
+            limit = limit,
+            skip = skip,
+            fields = fields,
+            bookmark = bookmark
+        },
+    ?assertEqual({ok, Cursor}, create(db, Indexes, Selector, Options)).
+
+explain_test() ->
+    Cursor =
+        #cursor{
+            ranges = [empty],
+            fields = all_fields,
+            opts = []
+        },
+    Response =
+        [
+            {mrargs,
+                {[
+                    {include_docs, true},
+                    {view_type, map},
+                    {reduce, false},
+                    {partition, null},
+                    {start_key, null},
+                    {end_key, null},
+                    {direction, fwd},
+                    {stable, false},
+                    {update, true},
+                    {conflicts, undefined}
+                ]}},
+            {covered, false}
+        ],
+    ?assertEqual(Response, explain(Cursor)).
+
+execute_empty_test() ->
+    Cursor = #cursor{ranges = [empty]},
+    meck:new(fabric),
+    meck:expect(fabric, all_docs, ['_', '_', '_', '_', '_'], meck:val(error)),
+    meck:expect(fabric, query_view, ['_', '_', '_', '_', '_', '_'], 
meck:val(error)),
+    ?assertEqual({ok, accumulator}, execute(Cursor, undefined, accumulator)),
+    ?assertNot(meck:called(fabric, all_docs, '_')),
+    ?assertNot(meck:called(fabric, query_view, '_')),
+    meck:unload(fabric).

Review Comment:
   If test crashes half-way would meck automatically unload fabric and other 
mocks? If there is a chance it won't we should probably wrap the tests into a 
suite with a `{foreach, setup/0, teardown/1, [?TDEF_FE(...),...]}` construct 
and use `meck:unload()` in `teardown/1`, that would unload all the mocks.



##########
src/mango/src/mango_idx_view.erl:
##########
@@ -298,6 +304,8 @@ indexable({[{<<"$gt">>, _}]}) ->
     true;
 indexable({[{<<"$gte">>, _}]}) ->
     true;
+indexable({[{<<"$exists">>, _}]}) ->

Review Comment:
   What's the intuition about making `$exists` indexable as well?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [couchdb] nickva commented on a diff in pull request #4512: feat(mango): keys-only covering indexes

Reply via email to