I think we got this straightened out on IRC, but the key here is that
the #full_doc_info{} gets the udpate_seq+1 value, but the #doc_info{}
record that's generated in couch_db_updater:new_index_entries ends up
only consulting the leaves of the revision tree to figure out which
update_seq to remove. This was the crux of the bug. The solution was
to just fold over the entire tree looking for the largest update_seq
and use that for for the #full_doc_info{}.On Fri, Sep 2, 2011 at 9:12 AM, Adam Kocoloski <[email protected]> wrote: > The part that I didn't quite get was the following > >> But what happens is at the point we calculate the seq to remove from >> docinfo_by_seq_tree, we calculate the wrong value. Thus when the update >> continues we remove an update_seq that doesn't exist in the tree and insert >> our new seq. > > Do we end up selecting the sequence from the non-leaf revision? Best, > > Adam > > > On Friday, September 2, 2011 at 6:29 AM, Dave Cottlehuber wrote: > >> Paul, >> >> Love your work mate! Outstanding commit msg, I'm working through it (slowly). >> >> A+ >> D >> >> On 2 September 2011 16:31, <[email protected] (mailto:[email protected])> >> wrote: >> > Author: davisp >> > Date: Fri Sep 2 04:31:10 2011 >> > New Revision: 1164350 >> > >> > URL: http://svn.apache.org/viewvc?rev=1164350&view=rev >> > Log: >> > Fix introduction of duplicates into _changes feed >> > >> > When a document is updated the new update_seq is assigned as part of the >> > rev_tree merging in couch_db_updater:merge_rev_trees/7 based on the >> > condition of whether the new rev_tree is equal to the old tree. This >> > equality is done as a simple Erlang term comparison. If the trees are >> > not equal a new update_seq is assigned to the #full_doc_info{} record >> > that is stored in fulldocinfo_by_id_btree. >> > >> > During replication it is possible that a document update merges into the >> > rev_tree internally without creating a leaf. If the terminal node of the >> > replicated path happens to land on a node with a value of ?REV_MISSING >> > the new document information will be preferred and replace the >> > ?REV_MISSING value. >> > >> > This preference ends up causing the rev_tree comparison to evaluate to >> > false which ends up giving this document a new update_seq. Up until this >> > point everything is fine. We write a bit of extra data (that will be >> > cleared during compaction) because of a previous bug where we decided to >> > be cautious and avoid losing data due to a broken rev_tree merging >> > aglorithm. It is also important to note that this is the place were we >> > calculate the update_seq to remove from the docinfo_by_seq_tree. >> > >> > After this point we get back to couch_db_udpater:update_docs_int/5 where >> > we eventually call couch_db_updater:new_index_entries/3 which creates >> > the new entries for the fulldocinfo_by_id_tree and docinfo_by_seq_btree. >> > At this point we end up creating a #doc_info{} record based on the >> > leaves in the rev_tree. As you recall, the update that caused the new >> > update_seq was not a leaf, at this point we create a #doc_info{} record >> > with an incorrect high_seq member pointing to the update_seq we are >> > about to remove from the docinfo_by_seq_tree (remember we calculated the >> > seq to remove before we consulted the leaves). >> > >> > The end result is that we remove the same update_seq we insert. This >> > sets us up for the real bug. The next time we go to update this document >> > the same procedure is applied. But what happens is at the point we >> > calculate the seq to remove from docinfo_by_seq_tree, we calculate the >> > wrong value. Thus when the update continues we remove an update_seq that >> > doesn't exist in the tree and insert our new seq. But, the seq from the >> > previous update is still in the tree. Thus, our docinfo_by_seq_tree now >> > contains two entries pointing at the same document. >> > >> > At this point, we run into the observed behavior of this bug that ends >> > up causing duplicate entries in views which then ends up throwing errors >> > when the view is compaction. These errors are also particularly nasty >> > because they bubble up the the couch_view gen_server which crashes and >> > spiders out crashing every couch_view_group process. That part probably >> > isn't important though. >> > >> > There's a simple test include with the patch to illustrate the behavior >> > and maintain an assertion that it stays fixed. >> > >> > Fixes COUCHDB-1265 >> > >> > >> > Modified: >> > couchdb/trunk/share/www/script/test/recreate_doc.js >> > couchdb/trunk/src/couchdb/couch_doc.erl >> > couchdb/trunk/src/couchdb/couch_key_tree.erl >> > >> > Modified: couchdb/trunk/share/www/script/test/recreate_doc.js >> > URL: >> > http://svn.apache.org/viewvc/couchdb/trunk/share/www/script/test/recreate_doc.js?rev=1164350&r1=1164349&r2=1164350&view=diff >> > ============================================================================== >> > --- couchdb/trunk/share/www/script/test/recreate_doc.js (original) >> > +++ couchdb/trunk/share/www/script/test/recreate_doc.js Fri Sep 2 04:31:10 >> > 2011 >> > @@ -77,4 +77,45 @@ couchTests.recreate_doc = function(debug >> > } catch (e) { >> > T(e.error == "conflict"); >> > } >> > + >> > + db.deleteDb(); >> > + db.createDb(); >> > + >> > + // COUCHDB-1265 >> > + // Resuscitate an unavailable old revision and make sure that it >> > + // doesn't introduce duplicates into the _changes feed. >> > + >> > + var doc = {_id: "bar", count: 0}; >> > + T(db.save(doc).ok); >> > + var ghost = {_id: "bar", _rev: doc._rev, count: doc.count}; >> > + for(var i = 0; i < 2; i++) { >> > + doc.count += 1; >> > + T(db.save(doc).ok); >> > + } >> > + >> > + // Compact so that the old revision to be resuscitated will be >> > + // in the rev_tree as ?REV_MISSING >> > + db.compact(); >> > + while(db.info (http://db.info)().compact_running) {} >> > + >> > + // Saving the ghost here puts it back in the rev_tree in such >> > + // a way as to create a new update_seq but without changing a >> > + // leaf revision. This would cause the #full_doc_info{} and >> > + // #doc_info{} records to diverge in their idea of what the >> > + // doc's update_seq is and end up introducing a duplicate in >> > + // the _changes feed the next time this doc is updated. >> > + T(db.save(ghost, {new_edits: false}).ok); >> > + >> > + // The duplicate would have been introduce here becuase the #doc_info{} >> > + // would not have been removed correctly. >> > + T(db.save(doc).ok); >> > + >> > + // And finally assert that there are no duplicates in _changes. >> > + var req = CouchDB.request("GET", "/test_suite_db/_changes"); >> > + var resp = JSON.parse(req.responseText); >> > + var docids = {}; >> > + for(var i = 0; i < resp.results.length; i++) { >> > + T(docids[resp.results[i].id] === undefined, "Duplicates in _changes >> > feed."); >> > + docids[resp.results[i].id] = true; >> > + } >> > }; >> > >> > Modified: couchdb/trunk/src/couchdb/couch_doc.erl >> > URL: >> > http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_doc.erl?rev=1164350&r1=1164349&r2=1164350&view=diff >> > ============================================================================== >> > --- couchdb/trunk/src/couchdb/couch_doc.erl (original) >> > +++ couchdb/trunk/src/couchdb/couch_doc.erl Fri Sep 2 04:31:10 2011 >> > @@ -319,10 +319,19 @@ to_doc_info(FullDocInfo) -> >> > {DocInfo, _Path} = to_doc_info_path(FullDocInfo), >> > DocInfo. >> > >> > -max_seq([], Max) -> >> > - Max; >> > -max_seq([#rev_info{seq=Seq}|Rest], Max) -> >> > - max_seq(Rest, if Max > Seq -> Max; true -> Seq end). >> > +max_seq(Tree) -> >> > + FoldFun = fun({_Pos, _Key}, Value, _Type, MaxOldSeq) -> >> > + case Value of >> > + {_Deleted, _DiskPos, OldTreeSeq} -> >> > + % Older versions didn't track data sizes. >> > + erlang:max(MaxOldSeq, OldTreeSeq); >> > + {_Deleted, _DiskPos, OldTreeSeq, _Size} -> >> > + erlang:max(MaxOldSeq, OldTreeSeq); >> > + _ -> >> > + MaxOldSeq >> > + end >> > + end, >> > + couch_key_tree:fold(FoldFun, 0, Tree). >> > >> > to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) -> >> > RevInfosAndPath = [ >> > @@ -342,7 +351,7 @@ to_doc_info_path(#full_doc_info{id=Id,re >> > end, RevInfosAndPath), >> > [{_RevInfo, WinPath}|_] = SortedRevInfosAndPath, >> > RevInfos = [RevInfo || {RevInfo, _Path} <- SortedRevInfosAndPath], >> > - {#doc_info{id=Id, high_seq=max_seq(RevInfos, 0), revs=RevInfos}, >> > WinPath}. >> > + {#doc_info{id=Id, high_seq=max_seq(Tree), revs=RevInfos}, WinPath}. >> > >> > >> > >> > >> > Modified: couchdb/trunk/src/couchdb/couch_key_tree.erl >> > URL: >> > http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_key_tree.erl?rev=1164350&r1=1164349&r2=1164350&view=diff >> > ============================================================================== >> > --- couchdb/trunk/src/couchdb/couch_key_tree.erl (original) >> > +++ couchdb/trunk/src/couchdb/couch_key_tree.erl Fri Sep 2 04:31:10 2011 >> > @@ -49,7 +49,7 @@ >> > >> > -export([merge/3, find_missing/2, get_key_leafs/2, get_full_key_paths/2, >> > get/2]). >> > -export([get_all_leafs/1, count_leafs/1, remove_leafs/2, >> > get_all_leafs_full/1, stem/2]). >> > --export([map/2, mapfold/3, map_leafs/2]). >> > +-export([map/2, mapfold/3, map_leafs/2, fold/3]). >> > >> > -include("couch_db.hrl"). >> > >> > @@ -320,6 +320,21 @@ count_leafs_simple([{_Key, _Value, SubTr >> > count_leafs_simple(SubTree) + count_leafs_simple(RestTree). >> > >> > >> > +fold(_Fun, Acc, []) -> >> > + Acc; >> > +fold(Fun, Acc0, [{Pos, Tree}|Rest]) -> >> > + Acc1 = fold_simple(Fun, Acc0, Pos, [Tree]), >> > + fold(Fun, Acc1, Rest). >> > + >> > +fold_simple(_Fun, Acc, _Pos, []) -> >> > + Acc; >> > +fold_simple(Fun, Acc0, Pos, [{Key, Value, SubTree} | RestTree]) -> >> > + Type = if SubTree == [] -> leaf; true -> branch end, >> > + Acc1 = Fun({Pos, Key}, Value, Type, Acc0), >> > + Acc2 = fold_simple(Fun, Acc1, Pos+1, SubTree), >> > + fold_simple(Fun, Acc2, Pos, RestTree). >> > + >> > + >> > map(_Fun, []) -> >> > []; >> > map(Fun, [{Pos, Tree}|Rest]) -> > > >
