This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch prototype/fdb-layer-couch-views-size-tests in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 20e5e443062b961d30233daffc42767c575cf64c Author: Paul J. Davis <[email protected]> AuthorDate: Wed Mar 4 09:54:53 2020 -0600 Fix handling of duplicate keys If a map function emits duplicate keys for a document this stores multiple rows in the map index differentiated by a `DupeId` counter. Previously we were attempting to save some work avoiding clearing ranges for keys that would be overwritten. However, if a document update causes fewer duplicates to be emitted for the same key we left orphaned rows in the index. --- src/couch_views/src/couch_views_fdb.erl | 5 +- src/couch_views/test/couch_views_indexer_test.erl | 76 +++++++++++++++++++++++ 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index f2ac01b..98257f3 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -224,13 +224,10 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> db_prefix := DbPrefix } = TxDb, - Unique = lists:usort([K || {K, _V} <- NewRows]), - - KeysToRem = ExistingKeys -- Unique, lists:foreach(fun(RemKey) -> {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId), ok = erlfdb:clear_range(Tx, Start, End) - end, KeysToRem), + end, ExistingKeys), KVsToAdd = process_rows(NewRows), MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 9482fdd..9b12af6 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -39,6 +39,7 @@ indexer_test_() -> ?TDEF_FE(multipe_docs_with_same_key), ?TDEF_FE(multipe_keys_from_same_doc), ?TDEF_FE(multipe_identical_keys_from_same_doc), + ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc), ?TDEF_FE(handle_size_key_limits), ?TDEF_FE(handle_size_value_limits) ] @@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) -> ], Out). +fewer_multipe_identical_keys_from_same_doc(Db) -> + DDoc = create_ddoc(multi_emit_same), + + Doc0 = couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(0))}, + {<<"val">>, 1}, + {<<"extra">>, 3} + ]}), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 1} + ]}, + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 2} + ]}, + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 3} + ]} + ], Out1), + + Doc1 = couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(0))}, + {<<"val">>, 1} + ]}), + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + {ok, Out2} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 1} + ]}, + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 2} + ]} + ], Out2). + + handle_size_key_limits(Db) -> ok = meck:new(config, [passthrough]), ok = meck:expect(config, get_integer, fun(Section, Key, Default) -> @@ -495,6 +567,7 @@ row(Id, Key, Value) -> {value, Value} ]}. + fold_fun({meta, _Meta}, Acc) -> {ok, Acc}; fold_fun({row, _} = Row, Acc) -> @@ -544,6 +617,9 @@ create_ddoc(multi_emit_same) -> {<<"map">>, <<"function(doc) { " "emit(doc.val, doc.val * 2); " "emit(doc.val, doc.val); " + "if(doc.extra) {" + " emit(doc.val, doc.extra);" + "}" "}">>} ]}}, {<<"map_fun2">>, {[
