Introduce a #leaf{} record in revision trees
This is substantially based on work by Bob Dionne (a452a4a) to introduce
the data size calculations at Cloudant. There's quite a bit of conflict
in code and actual behavior between this work and what Filipe wrote for
CouchDB. This new record should ease the transition of merging both
behaviors.
An important thing to note is that this record is only ever in RAM and
not written to disk so we don't have to worry about record upgrades
though we will have to maintain upgrade info that Filipe and Bob both
introduced (which is fairly straightforward).
Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/85cf2b26
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/85cf2b26
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/85cf2b26
Branch: refs/heads/import
Commit: 85cf2b26e52135b75135587cc553abb635c2a2c2
Parents: dc5a6de
Author: Robert Newson <[email protected]>
Authored: Sun Mar 10 16:12:28 2013 -0500
Committer: Paul J. Davis <[email protected]>
Committed: Fri Jan 17 16:44:31 2014 -0800
----------------------------------------------------------------------
include/couch_db.hrl | 9 +++++++++
src/couch_db.erl | 22 +++++++---------------
src/couch_db_updater.erl | 40 +++++++++++++++++-----------------------
src/couch_doc.erl | 13 ++++++++-----
src/couch_util.erl | 11 +++++++++++
5 files changed, 52 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 77006e4..61a59f7 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -23,6 +23,8 @@
-define(b2l(V), binary_to_list(V)).
-define(l2b(V), list_to_binary(V)).
+-define(i2b(V), couch_util:integer_to_boolean(V)).
+-define(b2i(V), couch_util:boolean_to_integer(V)).
-define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
-define(term_size(T),
try
@@ -271,3 +273,10 @@
stop_fun
}).
+-record(leaf, {
+ deleted,
+ ptr,
+ seq,
+ size = nil
+}).
+
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index 7734c7c..e4e8cca 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -580,7 +580,7 @@ prep_and_validate_update(Db, #doc{id=Id,revs={RevStart,
Revs}}=Doc,
case Revs of
[PrevRev|_] ->
case dict:find({RevStart, PrevRev}, LeafRevsDict) of
- {ok, {Deleted, DiskSp, DiskRevs}} ->
+ {ok, {#leaf{deleted=Deleted, ptr=DiskSp}, DiskRevs}} ->
case couch_doc:has_stubs(Doc) of
true ->
DiskDoc = make_doc(Db, Id, Deleted, DiskSp, DiskRevs),
@@ -643,12 +643,8 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets],
AllowConflict, AccPrepped, AccErrors) ->
Leafs = couch_key_tree:get_all_leafs(OldRevTree),
LeafRevsDict = dict:from_list([
- begin
- Deleted = element(1, LeafVal),
- Sp = element(2, LeafVal),
- {{Start, RevId}, {Deleted, Sp, Revs}}
- end ||
- {LeafVal, {Start, [RevId | _]} = Revs} <- Leafs
+ {{Start, RevId}, {Leaf, Revs}} ||
+ {Leaf, {Start, [RevId | _]} = Revs} <- Leafs
]),
{PreppedBucket, AccErrors3} = lists:foldl(
fun({Doc, Ref}, {Docs2Acc, AccErrors2}) ->
@@ -895,9 +891,7 @@ make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #doc{}} |
RestPath]) ->
make_first_doc_on_disk(Db, Id, Pos-1, RestPath);
make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) ->
make_first_doc_on_disk(Db, Id, Pos - 1, RestPath);
-make_first_doc_on_disk(Db, Id, Pos, [{_Rev, RevValue} |_]=DocPath) ->
- IsDel = element(1, RevValue),
- Sp = element(2, RevValue),
+make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #leaf{deleted=IsDel, ptr=Sp}}
|_]=DocPath) ->
Revs = [Rev || {Rev, _} <- DocPath],
make_doc(Db, Id, IsDel, Sp, {Pos, Revs}).
@@ -1243,9 +1237,7 @@ open_doc_revs_int(Db, IdRevs, Options) ->
?REV_MISSING ->
% we have the rev in our list but know nothing about it
{{not_found, missing}, {Pos, Rev}};
- RevValue ->
- IsDeleted = element(1, RevValue),
- SummaryPtr = element(2, RevValue),
+ #leaf{deleted=IsDeleted, ptr=SummaryPtr} ->
{ok, make_doc(Db, Id, IsDeleted, SummaryPtr,
FoundRevPath)}
end
end, FoundRevs),
@@ -1297,8 +1289,8 @@
doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTre
[{revs_info, Pos, lists:map(
fun({Rev1, ?REV_MISSING}) ->
{Rev1, missing};
- ({Rev1, RevValue}) ->
- case element(1, RevValue) of
+ ({Rev1, Leaf}) ->
+ case Leaf#leaf.deleted of
true ->
{Rev1, deleted};
false ->
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index c64911e..cc48ef8 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -158,10 +158,8 @@ handle_call({purge_docs, IdRevs}, _From, Db) ->
{DocInfoToUpdate, NewSeq} = lists:mapfoldl(
fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) ->
Tree2 = couch_key_tree:map_leafs(
- fun(_RevId, LeafVal) ->
- IsDeleted = element(1, LeafVal),
- BodyPointer = element(2, LeafVal),
- {IsDeleted, BodyPointer, SeqAcc + 1}
+ fun(_RevId, Leaf) ->
+ Leaf#leaf{seq=SeqAcc+1}
end, Tree),
{FullInfo#full_doc_info{rev_tree=Tree2}, SeqAcc + 1}
end, LastSeq, FullDocInfoToUpdate),
@@ -340,37 +338,35 @@ rev_tree(DiskTree) ->
couch_key_tree:mapfold(fun
(_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
% pre 1.2 format, will be upgraded on compaction
- {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, nil};
+ {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq},
nil};
(_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
- {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, Acc};
+ {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq},
Acc};
(_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) ->
Acc2 = sum_leaf_sizes(Acc, Size),
- {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc2};
+ {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq,
size=Size}, Acc2};
(_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) ->
- {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc};
+ {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq,
size=Size}, Acc};
(_RevId, ?REV_MISSING, _Type, Acc) ->
{?REV_MISSING, Acc}
- end, DiskTree).
+ end, 0, DiskTree).
disk_tree(RevTree) ->
couch_key_tree:map(fun
(_RevId, ?REV_MISSING) ->
?REV_MISSING;
- (_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
- {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, nil};
- (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}) ->
- {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, Size}
+ (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq,
size=Size}) ->
+ {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
end, RevTree).
btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del,
rev_tree=T}) ->
- {Seq, {Id, if Del -> 1; true -> 0 end, disk_tree(T)}}.
+ {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
{RevTree, LeafsSize} = rev_tree(DiskTree),
#full_doc_info{
id = Id,
update_seq = Seq,
- deleted = (Del == 1),
+ deleted = ?i2b(Del),
rev_tree = RevTree,
leafs_size = LeafsSize
};
@@ -388,14 +384,14 @@ btree_by_seq_join(KeySeq, {Id, RevInfos,
DeletedRevInfos}) ->
btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
deleted=Deleted, rev_tree=Tree}) ->
- {Id, {Seq, if Deleted -> 1; true -> 0 end, disk_tree(Tree)}}.
+ {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
{Tree, LeafsSize} = rev_tree(DiskTree),
#full_doc_info{
id = Id,
update_seq = HighSeq,
- deleted = (Deleted == 1),
+ deleted = ?i2b(Deleted),
rev_tree = Tree,
leafs_size = LeafsSize
}.
@@ -573,7 +569,8 @@ flush_trees(#db{fd = Fd} = Db,
TotalSize = lists:foldl(
fun(#att{att_len = L}, A) -> A + L end,
SummarySize, Value#doc.atts),
- NewValue = {IsDeleted, NewSummaryPointer, UpdateSeq,
TotalSize},
+ NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
+ seq=UpdateSeq, size=TotalSize},
case Type of
leaf ->
{NewValue, Acc + TotalSize};
@@ -899,10 +896,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry)
->
Info#full_doc_info{rev_tree=couch_key_tree:map(
fun(_, _, branch) ->
?REV_MISSING;
- (_Rev, LeafVal, leaf) ->
- IsDel = element(1, LeafVal),
- Sp = element(2, LeafVal),
- Seq = element(3, LeafVal),
+ (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
{_Body, AttsInfo} = Summary = copy_doc_attachments(
Db, Sp, DestFd),
SummaryChunk = make_doc_summary(NewDb, Summary),
@@ -911,7 +905,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry)
->
TotalLeafSize = lists:foldl(
fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen
end,
SummarySize, AttsInfo),
- {IsDel, Pos, Seq, TotalLeafSize}
+ Leaf#leaf{ptr=Pos, size=TotalLeafSize}
end, RevTree)}
end, NewInfos0),
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couch_doc.erl b/src/couch_doc.erl
index bcf79d2..6f2ca9b 100644
--- a/src/couch_doc.erl
+++ b/src/couch_doc.erl
@@ -330,7 +330,10 @@ max_seq(Tree, UpdateSeq) ->
{_Deleted, _DiskPos, OldTreeSeq} ->
% Older versions didn't track data sizes.
erlang:max(MaxOldSeq, OldTreeSeq);
- {_Deleted, _DiskPos, OldTreeSeq, _Size} ->
+ {_Deleted, _DiskPos, OldTreeSeq, _Size} -> % necessary clause?
+ % Older versions didn't store #leaf records.
+ erlang:max(MaxOldSeq, OldTreeSeq);
+ #leaf{seq=OldTreeSeq} ->
erlang:max(MaxOldSeq, OldTreeSeq);
_ ->
MaxOldSeq
@@ -341,11 +344,11 @@ max_seq(Tree, UpdateSeq) ->
to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=FDISeq}) ->
RevInfosAndPath = [
{#rev_info{
- deleted = element(1, LeafVal),
- body_sp = element(2, LeafVal),
- seq = element(3, LeafVal),
+ deleted = Leaf#leaf.deleted,
+ body_sp = Leaf#leaf.ptr,
+ seq = Leaf#leaf.seq,
rev = {Pos, RevId}
- }, Path} || {LeafVal, {Pos, [RevId | _]} = Path} <-
+ }, Path} || {Leaf, {Pos, [RevId | _]} = Path} <-
couch_key_tree:get_all_leafs(Tree)
],
SortedRevInfosAndPath = lists:sort(
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_util.erl
----------------------------------------------------------------------
diff --git a/src/couch_util.erl b/src/couch_util.erl
index 3556d36..d09211a 100644
--- a/src/couch_util.erl
+++ b/src/couch_util.erl
@@ -29,6 +29,7 @@
-export([encode_doc_id/1]).
-export([with_db/2]).
-export([rfc1123_date/0, rfc1123_date/1]).
+-export([integer_to_boolean/1, boolean_to_integer/1]).
-include_lib("couch/include/couch_db.hrl").
@@ -487,3 +488,13 @@ month(9) -> "Sep";
month(10) -> "Oct";
month(11) -> "Nov";
month(12) -> "Dec".
+
+integer_to_boolean(1) ->
+ true;
+integer_to_boolean(0) ->
+ false.
+
+boolean_to_integer(true) ->
+ 1;
+boolean_to_integer(false) ->
+ 0.