Clear commit messages are something we’ve committed to in the past and will be a required practice once we get the merges nailed down. The review process will include ensuring that commits follow the standard we’ve agreed to.
B. On 11 Feb 2014, at 08:13, Andy Wenk <[email protected]> wrote: > This is not a rant but a selfish wish :) > > I wish all commit messages would be this explanatory like the one below. As > I am not really deep into the erlang code but would love to understand > more, a good way for me is to read all the commit messages. But mostly we > have oneliner. Reading the code is ok but with a explaining message, it is > clearer. > > Maybe you remember this when you write your next message to help me and > others :) > > Thanks a lot > > Cheers > > Andy > > ---------- Forwarded message ---------- > From: <[email protected]> > Date: 10 February 2014 22:54 > Subject: couch commit: updated refs/heads/2001-feature-external-size to > d9d11a9 > To: [email protected] > > > Updated Branches: > refs/heads/2001-feature-external-size [created] d9d11a943 > > > Implement database external size calculations > > This patch adds calculations to show the "external size" of a database > which is roughly a measure of how much disk space it would take to store > the contents of the database in flat files. It is used to calculate > rough compression ratios for capacity planning. > > COUCHDB-2001 > > > Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo > Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/d9d11a94 > Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/d9d11a94 > Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/d9d11a94 > > Branch: refs/heads/2001-feature-external-size > Commit: d9d11a943e2815897ee5b5b3d1b1eef4b417c1fa > Parents: 09c6556 > Author: Paul J. Davis <[email protected]> > Authored: Mon Feb 10 14:03:47 2014 -0600 > Committer: Paul J. Davis <[email protected]> > Committed: Mon Feb 10 15:32:21 2014 -0600 > > ---------------------------------------------------------------------- > include/couch_db.hrl | 5 +- > src/couch_btree.erl | 10 +- > src/couch_compress.erl | 10 ++ > src/couch_db.erl | 56 +++++----- > src/couch_db_updater.erl | 237 ++++++++++++++++++++++++++---------------- > 5 files changed, 196 insertions(+), 122 deletions(-) > ---------------------------------------------------------------------- > > > http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/include/couch_db.hrl > ---------------------------------------------------------------------- > diff --git a/include/couch_db.hrl b/include/couch_db.hrl > index 2c015df..2ce5ebe 100644 > --- a/include/couch_db.hrl > +++ b/include/couch_db.hrl > @@ -65,7 +65,7 @@ > update_seq = 0, > deleted = false, > rev_tree = [], > - leafs_size = 0 > + sizes = {0, 0} > }). > > -record(httpd, > @@ -251,6 +251,7 @@ > deleted, > ptr, > seq, > - size = nil > + sizes = {0, 0}, > + atts = [] > }). > > > http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_btree.erl > ---------------------------------------------------------------------- > diff --git a/src/couch_btree.erl b/src/couch_btree.erl > index 9caceb8..ac5681d 100644 > --- a/src/couch_btree.erl > +++ b/src/couch_btree.erl > @@ -61,8 +61,8 @@ final_reduce(#btree{reduce=Reduce}, Val) -> > final_reduce(Reduce, Val); > final_reduce(Reduce, {[], []}) -> > Reduce(reduce, []); > -final_reduce(_Bt, {[], [Red]}) -> > - Red; > +final_reduce(Reduce, {[], [Red]}) -> > + Reduce(rereduce, [Red]); > final_reduce(Reduce, {[], Reductions}) -> > Reduce(rereduce, Reductions); > final_reduce(Reduce, {KVs, Reductions}) -> > @@ -92,14 +92,14 @@ fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) -> > > full_reduce(#btree{root=nil,reduce=Reduce}) -> > {ok, Reduce(reduce, [])}; > -full_reduce(#btree{root=Root}) -> > - {ok, element(2, Root)}. > +full_reduce(#btree{root=Root, reduce=Reduce}) -> > + {ok, Reduce(rereduce, [element(2, Root)])}. > > size(#btree{root = nil}) -> > 0; > size(#btree{root = {_P, _Red}}) -> > % pre 1.2 format > - nil; > + undefined; > size(#btree{root = {_P, _Red, Size}}) -> > Size. > > > http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_compress.erl > ---------------------------------------------------------------------- > diff --git a/src/couch_compress.erl b/src/couch_compress.erl > index 6b47a7a..49876d8 100644 > --- a/src/couch_compress.erl > +++ b/src/couch_compress.erl > @@ -14,6 +14,7 @@ > > -export([compress/2, decompress/1, is_compressed/2]). > -export([get_compression_method/0]). > +-export([uncompressed_length/1]). > > -include_lib("couch/include/couch_db.hrl"). > > @@ -82,3 +83,12 @@ is_compressed(<<?TERM_PREFIX, _/binary>>, Method) -> > is_compressed(Term, _Method) when not is_binary(Term) -> > false. > > + > +uncompressed_length(<<?SNAPPY_PREFIX, _/binary>> = Bin) -> > + snappy:uncompressed_length(Bin); > +uncompressed_length(<<?COMPRESSED_TERM_PREFIX, _/binary>> = Bin) -> > + <<131, 80, Size:4/big-unsigned-integer-unit:8, _/binary>> = Bin, > + Size; > +uncompressed_length(<<?TERM_PREFIX, _/binary>> = Bin) -> > + size(Bin). > + > > http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db.erl > ---------------------------------------------------------------------- > diff --git a/src/couch_db.erl b/src/couch_db.erl > index 32a0049..6432e8e 100644 > --- a/src/couch_db.erl > +++ b/src/couch_db.erl > @@ -302,44 +302,48 @@ get_db_info(Db) -> > name=Name, > instance_start_time=StartTime, > committed_update_seq=CommittedUpdateSeq, > - id_tree = IdBtree, > - seq_tree = SeqBtree, > - local_tree = LocalBtree > + id_tree = IdBtree > } = Db, > - {ok, Size} = couch_file:bytes(Fd), > - {ok, DbReduction} = couch_btree:full_reduce(IdBtree), > + {ok, FileSize} = couch_file:bytes(Fd), > + {ok, {Count, DelCount, Sizes}} = couch_btree:full_reduce(IdBtree), > + {ActiveSize0, ExternalSize} = Sizes, > + ActiveSize = active_size(Db, ActiveSize0), > InfoList = [ > {db_name, Name}, > - {doc_count, element(1, DbReduction)}, > - {doc_del_count, element(2, DbReduction)}, > + {doc_count, Count}, > + {doc_del_count, DelCount}, > {update_seq, SeqNum}, > {purge_seq, couch_db:get_purge_seq(Db)}, > {compact_running, Compactor/=nil}, > - {disk_size, Size}, > - {data_size, db_data_size(DbReduction, [SeqBtree, IdBtree, > LocalBtree])}, > + {disk_size, FileSize}, > + {data_size, ActiveSize}, > + {sizes, {[ > + {file, FileSize}, > + {active, ActiveSize}, > + {external, ExternalSize} > + ]}}, > {instance_start_time, StartTime}, > {disk_format_version, DiskVersion}, > {committed_update_seq, CommittedUpdateSeq} > ], > {ok, InfoList}. > > -db_data_size({_Count, _DelCount}, _Trees) -> > - % pre 1.2 format, upgraded on compaction > - null; > -db_data_size({_Count, _DelCount, nil}, _Trees) -> > - null; > -db_data_size({_Count, _DelCount, DocAndAttsSize}, Trees) -> > - sum_tree_sizes(DocAndAttsSize, Trees). > - > -sum_tree_sizes(Acc, []) -> > - Acc; > -sum_tree_sizes(Acc, [T | Rest]) -> > - case couch_btree:size(T) of > - nil -> > - null; > - Sz -> > - sum_tree_sizes(Acc + Sz, Rest) > - end. > +active_size(#db{}=Db, DocActiveSize) -> > + Trees = [ > + Db#db.id_tree, > + Db#db.seq_tree, > + Db#db.local_tree > + ], > + lists:foldl(fun(T, Acc) -> > + case couch_btree:size(T) of > + _ when Acc == null -> > + null; > + undefined -> > + null; > + Size -> > + Acc + Size > + end > + end, DocActiveSize, Trees). > > get_design_docs(#db{name = <<"shards/", _:18/binary, DbName/binary>>}) -> > {_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName)) > end), > > http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db_updater.erl > ---------------------------------------------------------------------- > diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl > index 649826a..eb75177 100644 > --- a/src/couch_db_updater.erl > +++ b/src/couch_db_updater.erl > @@ -342,40 +342,60 @@ collect_updates(GroupedDocsAcc, ClientsAcc, > MergeConflicts, FullCommit) -> > end. > > rev_tree(DiskTree) -> > - couch_key_tree:mapfold(fun > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) -> > + couch_key_tree:map(fun > + (_RevId, {Del, Ptr, Seq}) -> > % pre 1.2 format, will be upgraded on compaction > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, > seq=UpdateSeq}, nil}; > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) -> > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, > seq=UpdateSeq}, Acc}; > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) -> > - Acc2 = sum_leaf_sizes(Acc, Size), > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, > seq=UpdateSeq, size=Size}, Acc2}; > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) -> > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, > seq=UpdateSeq, size=Size}, Acc}; > - (_RevId, ?REV_MISSING, _Type, Acc) -> > - {?REV_MISSING, Acc} > - end, 0, DiskTree). > + #leaf{deleted=?i2b(Del), ptr=Ptr, seq=Seq}; > + (_RevId, {Del, Ptr, Seq, Size}) -> > + % Pre-bigcouch format, will be upgraded on compaction > + #leaf{ > + deleted = ?i2b(Del), > + ptr = Ptr, > + seq = Seq, > + sizes = {Size, 0}, > + atts = [] > + }; > + (_RevId, {Del, Ptr, Seq, Sizes, Atts}) -> > + #leaf{ > + deleted = ?i2b(Del), > + ptr = Ptr, > + seq = Seq, > + sizes = Sizes, > + atts = Atts > + }; > + (_RevId, ?REV_MISSING) -> > + ?REV_MISSING > + end, DiskTree). > > disk_tree(RevTree) -> > couch_key_tree:map(fun > (_RevId, ?REV_MISSING) -> > ?REV_MISSING; > - (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq, > size=Size}) -> > - {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size} > + (_RevId, #leaf{}=Leaf) -> > + #leaf{ > + deleted = Del, > + ptr = Ptr, > + seq = Seq, > + sizes = Sizes, > + atts = Atts > + } = Leaf, > + {?b2i(Del), Ptr, Seq, upgrade_sizes(Sizes), Atts} > end, RevTree). > > +upgrade_sizes({_, _} = Sizes) -> > + Sizes; > +upgrade_sizes(S) when is_integer(S) -> > + {S, 0}. > + > btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, > rev_tree=T}) -> > {Seq, {Id, ?b2i(Del), disk_tree(T)}}. > > btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) -> > - {RevTree, LeafsSize} = rev_tree(DiskTree), > #full_doc_info{ > id = Id, > update_seq = Seq, > deleted = ?i2b(Del), > - rev_tree = RevTree, > - leafs_size = LeafsSize > + rev_tree = rev_tree(DiskTree) > }; > btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) -> > % Older versions stored #doc_info records in the seq_tree. > @@ -389,49 +409,59 @@ btree_by_seq_join(KeySeq, {Id, RevInfos, > DeletedRevInfos}) -> > [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} || > {Rev, Seq, Bp} <- DeletedRevInfos]}. > > -btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq, > - deleted=Deleted, rev_tree=Tree}) -> > - {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}. > +btree_by_id_split(#full_doc_info{}=Info) -> > + #full_doc_info{ > + id = Id, > + update_seq = Seq, > + deleted = Del, > + sizes = Sizes, > + rev_tree = Tree > + } = Info, > + {Id, {Seq, ?b2i(Del), upgrade_sizes(Sizes), disk_tree(Tree)}}. > > btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) -> > - {Tree, LeafsSize} = rev_tree(DiskTree), > + % Upgrade from pre-BigCouch disk format > + ActiveSize = couch_key_tree:fold(fun > + (_RevId, {_Del, _Ptr, _Seq}, _, Acc) -> > + Acc; > + (_RevId, {_Del, _Ptr, _Seq, Size}, _, Acc) -> > + Acc + Size; > + (_RevId, {_Del, _Ptr, _Seq, Sizes, _Atts}, _, Acc) -> > + {Active, _} = Sizes, > + Active + Acc; > + (_RevId, ?REV_MISSING, _, Acc) -> > + Acc > + end, 0, DiskTree), > + btree_by_id_join(Id, {HighSeq, Deleted, {ActiveSize, 0}, DiskTree}); > + > +btree_by_id_join(Id, {HighSeq, Deleted, Sizes, DiskTree}) -> > #full_doc_info{ > id = Id, > update_seq = HighSeq, > deleted = ?i2b(Deleted), > - rev_tree = Tree, > - leafs_size = LeafsSize > + sizes = Sizes, > + rev_tree = rev_tree(DiskTree) > }. > > btree_by_id_reduce(reduce, FullDocInfos) -> > - lists:foldl( > - fun(Info, {NotDeleted, Deleted, Size}) -> > - Size2 = sum_leaf_sizes(Size, Info#full_doc_info.leafs_size), > - case Info#full_doc_info.deleted of > - true -> > - {NotDeleted, Deleted + 1, Size2}; > - false -> > - {NotDeleted + 1, Deleted, Size2} > - end > - end, > - {0, 0, 0}, FullDocInfos); > -btree_by_id_reduce(rereduce, Reds) -> > - lists:foldl( > - fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSize}) > -> > - % pre 1.2 format, will be upgraded on compaction > - {AccNotDeleted + NotDeleted, AccDeleted + Deleted, nil}; > - ({NotDeleted, Deleted, Size}, {AccNotDeleted, AccDeleted, > AccSize}) -> > - AccSize2 = sum_leaf_sizes(AccSize, Size), > - {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSize2} > - end, > - {0, 0, 0}, Reds). > - > -sum_leaf_sizes(nil, _) -> > - nil; > -sum_leaf_sizes(_, nil) -> > - nil; > -sum_leaf_sizes(Size1, Size2) -> > - Size1 + Size2. > + lists:foldl(fun > + (#full_doc_info{deleted=false, sizes=Sizes}, {NotDel, Del, SAcc}) > -> > + {NotDel + 1, Del, reduce_sizes(Sizes, SAcc)}; > + (#full_doc_info{deleted=true, sizes=Sizes}, {NotDel, Del, SAcc}) -> > + {NotDel, Del + 1, reduce_sizes(Sizes, SAcc)} > + end, {0, 0, {0, 0}}, FullDocInfos); > +btree_by_id_reduce(rereduce, Reductions) -> > + lists:foldl(fun > + ({NotDel, Del}, {NDAcc, DAcc, SAcc}) -> > + {NotDel + NDAcc, Del + DAcc, SAcc}; > + ({NotDel, Del, Sizes}, {NDAcc, DAcc, SAcc}) -> > + {NotDel + NDAcc, Del + DAcc, reduce_sizes(Sizes, SAcc)} > + end, {0, 0, {0, 0}}, Reductions). > + > +reduce_sizes({A1, E1}, {A2, E2}) -> > + {A1 + A2, E1 + E2}; > +reduce_sizes(S, {_, _} = Acc) when is_integer(S) -> > + reduce_sizes({S, 0}, Acc). > > btree_by_seq_reduce(reduce, DocInfos) -> > % count the number of documents > @@ -549,10 +579,15 @@ flush_trees(_Db, [], AccFlushedTrees) -> > flush_trees(#db{fd = Fd} = Db, > [InfoUnflushed | RestUnflushed], AccFlushed) -> > #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = > InfoUnflushed, > - {Flushed, LeafsSize} = couch_key_tree:mapfold( > + {Flushed, FinalAcc} = couch_key_tree:mapfold( > fun(_Rev, Value, Type, Acc) -> > case Value of > - #doc{deleted = IsDeleted, body = {summary, Summary, AttsFd}} -> > + #doc{} = Doc -> > + #doc{ > + deleted = IsDeleted, > + body = {summary, Summary, AttsFd}, > + atts = Atts > + } = Doc, > % this node value is actually an unwritten document > summary, > % write to disk. > % make sure the Fd in the written bins is the same Fd we > are > @@ -571,31 +606,44 @@ flush_trees(#db{fd = Fd} = Db, > " changed. Possibly retrying.", []), > throw(retry) > end, > - {ok, NewSummaryPointer, SummarySize} = > - couch_file:append_raw_chunk(Fd, Summary), > - TotalSize = lists:foldl( > - fun(#att{att_len = L}, A) -> A + L end, > - SummarySize, Value#doc.atts), > - NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer, > - seq=UpdateSeq, size=TotalSize}, > - case Type of > - leaf -> > - {NewValue, Acc + TotalSize}; > - branch -> > - {NewValue, Acc} > - end; > - {_, _, _, LeafSize} when Type =:= leaf, LeafSize =/= nil -> > - {Value, Acc + LeafSize}; > - _ -> > + AttsInfo = lists:usort([ > + {P, L} || #att{data = {_, P}, att_len = L} <- Atts > + ]), > + [_, _, SummaryBin] = Summary, > + ExternalSize = > couch_compress:uncompressed_length(SummaryBin), > + {ok, NewPtr, ActiveSize} > + = couch_file:append_raw_chunk(Fd, Summary), > + Leaf = #leaf{ > + deleted = IsDeleted, > + ptr = NewPtr, > + seq = UpdateSeq, > + sizes = {ActiveSize, ExternalSize}, > + atts = AttsInfo > + }, > + {Leaf, add_sizes(Type, Leaf, Acc)}; > + #leaf{} = Leaf -> > + {Value, add_sizes(Type, Leaf, Acc)}; > + ?REV_MISSING -> > {Value, Acc} > end > - end, 0, Unflushed), > + end, {0, 0, []}, Unflushed), > + {FinalAS, FinalES, FinalAtts} = FinalAcc, > + TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts), > InfoFlushed = InfoUnflushed#full_doc_info{ > rev_tree = Flushed, > - leafs_size = LeafsSize > + sizes = {FinalAS + TotalAttSize, FinalES + TotalAttSize} > }, > flush_trees(Db, RestUnflushed, [InfoFlushed | AccFlushed]). > > +add_sizes(branch, _, Acc) -> > + Acc; > +add_sizes(leaf, #leaf{sizes=Sizes, atts=AttSizes}, Acc) -> > + {ActiveSize, ExternalSize} = upgrade_sizes(Sizes), > + {ASAcc, ESAcc, AttsAcc} = Acc, > + NewASAcc = ActiveSize + ASAcc, > + NewESAcc = ExternalSize + ESAcc, > + NewAttsAcc = lists:umerge(AttSizes, AttsAcc), > + {NewASAcc, NewESAcc, NewAttsAcc}. > > send_result(Client, Ref, NewResult) -> > % used to send a result to the client > @@ -896,23 +944,34 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, > Retry) -> > A =< B > end, merge_lookups(MixedInfos, LookupResults)), > > - NewInfos1 = lists:map( > - fun(#full_doc_info{rev_tree=RevTree}=Info) -> > - Info#full_doc_info{rev_tree=couch_key_tree:map( > - fun(_, _, branch) -> > - ?REV_MISSING; > - (_Rev, #leaf{ptr=Sp}=Leaf, leaf) -> > - {_Body, AttsInfo} = Summary = copy_doc_attachments( > - Db, Sp, DestFd), > - SummaryChunk = make_doc_summary(NewDb, Summary), > - {ok, Pos, SummarySize} = couch_file:append_raw_chunk( > - DestFd, SummaryChunk), > - TotalLeafSize = lists:foldl( > - fun({_, _, _, AttLen, _, _, _, _}, S) -> S + > AttLen end, > - SummarySize, AttsInfo), > - Leaf#leaf{ptr=Pos, size=TotalLeafSize} > - end, RevTree)} > - end, NewInfos0), > + NewInfos1 = lists:map(fun(Info) -> > + {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun > + (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) -> > + {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd), > + Summary = make_doc_summary(NewDb, {Body, AttInfos}), > + [_, _, SummaryBin] = Summary, > + ExternalSize = > couch_compress:uncompressed_length(SummaryBin), > + {ok, Pos, ActiveSize} > + = couch_file:append_raw_chunk(DestFd, Summary), > + AttSizes = [{element(3, A), element(4, A)} || A <- > AttInfos], > + NewLeaf = Leaf#leaf{ > + ptr = Pos, > + sizes = {ActiveSize, ExternalSize}, > + atts = lists:usort(AttSizes) > + }, > + {NewLeaf, add_sizes(leaf, NewLeaf, SizesAcc)}; > + (_Rev, _Value, branch, SizesAcc) -> > + {?REV_MISSING, SizesAcc} > + end, {0, 0, []}, Info#full_doc_info.rev_tree), > + {FinalAS, FinalES, FinalAtts} = FinalAcc, > + TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, > FinalAtts), > + NewActiveSize = FinalAS + TotalAttSize, > + NewExternalSize = FinalES + TotalAttSize, > + Info#full_doc_info{ > + rev_tree = NewRevTree, > + sizes = {NewActiveSize, NewExternalSize} > + } > + end, NewInfos0), > > NewInfos = stem_full_doc_infos(Db, NewInfos1), > RemoveSeqs = > > > > > -- > Andy Wenk > Hamburg - Germany > RockIt! > > http://www.couchdb-buch.de > http://www.pg-praxisbuch.de > > GPG fingerprint: C044 8322 9E12 1483 4FEC 9452 B65D 6BE3 9ED3 9588 > > https://people.apache.org/keys/committer/andywenk.asc
