This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch prototype/fdb-layer-couch-views-size-tests
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 20e5e443062b961d30233daffc42767c575cf64c
Author: Paul J. Davis <[email protected]>
AuthorDate: Wed Mar 4 09:54:53 2020 -0600

    Fix handling of duplicate keys
    
    If a map function emits duplicate keys for a document this stores
    multiple rows in the map index differentiated by a `DupeId` counter.
    Previously we were attempting to save some work avoiding clearing ranges
    for keys that would be overwritten. However, if a document update causes
    fewer duplicates to be emitted for the same key we left orphaned rows in
    the index.
---
 src/couch_views/src/couch_views_fdb.erl           |  5 +-
 src/couch_views/test/couch_views_indexer_test.erl | 76 +++++++++++++++++++++++
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/src/couch_views/src/couch_views_fdb.erl 
b/src/couch_views/src/couch_views_fdb.erl
index f2ac01b..98257f3 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -224,13 +224,10 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, 
NewRows) ->
         db_prefix := DbPrefix
     } = TxDb,
 
-    Unique = lists:usort([K || {K, _V} <- NewRows]),
-
-    KeysToRem = ExistingKeys -- Unique,
     lists:foreach(fun(RemKey) ->
         {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
         ok = erlfdb:clear_range(Tx, Start, End)
-    end, KeysToRem),
+    end, ExistingKeys),
 
     KVsToAdd = process_rows(NewRows),
     MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
diff --git a/src/couch_views/test/couch_views_indexer_test.erl 
b/src/couch_views/test/couch_views_indexer_test.erl
index 9482fdd..9b12af6 100644
--- a/src/couch_views/test/couch_views_indexer_test.erl
+++ b/src/couch_views/test/couch_views_indexer_test.erl
@@ -39,6 +39,7 @@ indexer_test_() ->
                     ?TDEF_FE(multipe_docs_with_same_key),
                     ?TDEF_FE(multipe_keys_from_same_doc),
                     ?TDEF_FE(multipe_identical_keys_from_same_doc),
+                    ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc),
                     ?TDEF_FE(handle_size_key_limits),
                     ?TDEF_FE(handle_size_value_limits)
                 ]
@@ -388,6 +389,77 @@ multipe_identical_keys_from_same_doc(Db) ->
         ], Out).
 
 
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+    DDoc = create_ddoc(multi_emit_same),
+
+    Doc0 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
+        {<<"val">>, 1},
+        {<<"extra">>, 3}
+    ]}),
+
+    {ok, _} = fabric2_db:update_doc(Db, DDoc, []),
+    {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []),
+
+    {ok, Out1} = couch_views:query(
+            Db,
+            DDoc,
+            <<"map_fun1">>,
+            fun fold_fun/2,
+            [],
+            #mrargs{}
+        ),
+
+    ?assertEqual([
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 1}
+            ]},
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 2}
+            ]},
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 3}
+            ]}
+        ], Out1),
+
+    Doc1 = couch_doc:from_json_obj({[
+        {<<"_id">>, list_to_binary(integer_to_list(0))},
+        {<<"val">>, 1}
+    ]}),
+    Doc2 = Doc1#doc{
+        revs = {Pos, [Rev]}
+    },
+    {ok, _} = fabric2_db:update_doc(Db, Doc2, []),
+
+    {ok, Out2} = couch_views:query(
+            Db,
+            DDoc,
+            <<"map_fun1">>,
+            fun fold_fun/2,
+            [],
+            #mrargs{}
+        ),
+
+    ?assertEqual([
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 1}
+            ]},
+            {row, [
+                {id, <<"0">>},
+                {key, 1},
+                {value, 2}
+            ]}
+        ], Out2).
+
+
 handle_size_key_limits(Db) ->
     ok = meck:new(config, [passthrough]),
     ok = meck:expect(config, get_integer, fun(Section, Key, Default) ->
@@ -495,6 +567,7 @@ row(Id, Key, Value) ->
         {value, Value}
     ]}.
 
+
 fold_fun({meta, _Meta}, Acc) ->
     {ok, Acc};
 fold_fun({row, _} = Row, Acc) ->
@@ -544,6 +617,9 @@ create_ddoc(multi_emit_same) ->
                 {<<"map">>, <<"function(doc) { "
                     "emit(doc.val, doc.val * 2); "
                     "emit(doc.val, doc.val); "
+                    "if(doc.extra) {"
+                    "  emit(doc.val, doc.extra);"
+                    "}"
                 "}">>}
             ]}},
             {<<"map_fun2">>, {[

Reply via email to