This is an automated email from the ASF dual-hosted git repository. pgj pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit ce2607a5c4cbddee39f0f66eabd6948489a8014e Author: Gabor Pali <[email protected]> AuthorDate: Wed Oct 4 13:18:48 2023 +0200 mango: de-duplicate results on returning them in `nouveau` This is a port of a fix applied to the `text` cursor where occasionally duplicated documents were filtered out. This happens when moving between pages and an interleaved update is applied. --- src/mango/src/mango_cursor_nouveau.erl | 55 +++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/src/mango/src/mango_cursor_nouveau.erl b/src/mango/src/mango_cursor_nouveau.erl index 26358409a..2588e94d9 100644 --- a/src/mango/src/mango_cursor_nouveau.erl +++ b/src/mango/src/mango_cursor_nouveau.erl @@ -35,7 +35,8 @@ user_fun, user_acc, fields, - execution_stats + execution_stats, + documents_seen }). create(Db, {Indexes, Trace}, Selector, Opts) -> @@ -103,7 +104,8 @@ execute(Cursor, UserFun, UserAcc) -> user_fun = UserFun, user_acc = UserAcc, fields = Cursor#cursor.fields, - execution_stats = mango_execution_stats:log_start(Stats) + execution_stats = mango_execution_stats:log_start(Stats), + documents_seen = sets:new([{version, 2}]) }, try case Query of @@ -171,28 +173,41 @@ handle_hit(CAcc0, Hit, Doc) -> #cacc{ limit = Limit, skip = Skip, - execution_stats = Stats + execution_stats = Stats, + documents_seen = Seen } = CAcc0, - CAcc1 = update_bookmark(CAcc0, Hit), Stats1 = mango_execution_stats:incr_docs_examined(Stats), couch_stats:increment_counter([mango, docs_examined]), - CAcc2 = CAcc1#cacc{execution_stats = Stats1}, - case mango_selector:match(CAcc2#cacc.selector, Doc) of - true when Skip > 0 -> - CAcc2#cacc{skip = Skip - 1}; - true when Limit == 0 -> - % We hit this case if the user spcified with a - % zero limit. Notice that in this case we need - % to return the bookmark from before this match - throw({stop, CAcc0}); - true when Limit == 1 -> - NewCAcc = apply_user_fun(CAcc2, Doc), - throw({stop, NewCAcc}); - true when Limit > 1 -> - NewCAcc = apply_user_fun(CAcc2, Doc), - NewCAcc#cacc{limit = Limit - 1}; + CAcc1 = CAcc0#cacc{execution_stats = Stats1}, + case mango_selector:match(CAcc1#cacc.selector, Doc) of + true -> + DocId = mango_doc:get_field(Doc, <<"_id">>), + case sets:is_element(DocId, Seen) of + true -> + CAcc1; + false -> + CAcc2 = update_bookmark(CAcc1, Hit), + CAcc3 = CAcc2#cacc{ + documents_seen = sets:add_element(DocId, Seen) + }, + if + Skip > 0 -> + CAcc3#cacc{skip = Skip - 1}; + Limit == 0 -> + % We hit this case if the user specified with a + % zero limit. Notice that in this case we need + % to return the bookmark from before this match. + throw({stop, CAcc0}); + Limit == 1 -> + CAcc4 = apply_user_fun(CAcc3, Doc), + throw({stop, CAcc4}); + Limit > 1 -> + CAcc4 = apply_user_fun(CAcc3, Doc), + CAcc4#cacc{limit = Limit - 1} + end + end; false -> - CAcc2 + CAcc1 end. apply_user_fun(CAcc, Doc) ->
