On Thu, Jun 2, 2011 at 3:25 AM, Adam Kocoloski <[email protected]> wrote: > Nice. So many useful things one can find digging around the undocumented > internals of Erlang/OTP ...
The best of all is that I got confirmation it will be documented in future releases (R15) but with a different name (however external_size/1 will still be kept around for many releases) > > On May 31, 2011, at 6:04 AM, [email protected] wrote: > >> Author: fdmanana >> Date: Tue May 31 10:04:20 2011 >> New Revision: 1129597 >> >> URL: http://svn.apache.org/viewvc?rev=1129597&view=rev >> Log: >> More efficient term size calculation >> >> Unlike byte_size(term_to_binary(Term)), the BIF erlang:external_size/1 >> doesn't >> do the serialization step, it only calculates the maximum external size for >> any term, which is more efficient (faster and avoids the garbage generation). >> >> With the test couch_http_bulk_writes.sh at [1], using 20 writers and batches >> of 100 1Kb documents, it's possible to write about 1 400 000 documents with >> this patch instead of about 1 300 000. >> >> [1] https://github.com/fdmanana/basho_bench_couch >> >> >> >> Modified: >> couchdb/trunk/src/couchdb/couch_btree.erl >> couchdb/trunk/src/couchdb/couch_db.hrl >> couchdb/trunk/src/couchdb/couch_db_updater.erl >> couchdb/trunk/src/couchdb/couch_view_compactor.erl >> couchdb/trunk/src/couchdb/couch_work_queue.erl >> >> Modified: couchdb/trunk/src/couchdb/couch_btree.erl >> URL: >> http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_btree.erl?rev=1129597&r1=1129596&r2=1129597&view=diff >> ============================================================================== >> --- couchdb/trunk/src/couchdb/couch_btree.erl (original) >> +++ couchdb/trunk/src/couchdb/couch_btree.erl Tue May 31 10:04:20 2011 >> @@ -276,26 +276,26 @@ complete_root(Bt, KPs) -> >> % written. Plus with the "case byte_size(term_to_binary(InList)) of" code >> % it's probably really inefficient. >> >> -chunkify(#btree{compression = Comp} = Bt, InList) -> >> - case byte_size(couch_compress:compress(InList, Comp)) of >> +chunkify(InList) -> >> + case ?term_size(InList) of >> Size when Size > ?CHUNK_THRESHOLD -> >> NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1), >> ChunkThreshold = Size div NumberOfChunksLikely, >> - chunkify(Bt, InList, ChunkThreshold, [], 0, []); >> + chunkify(InList, ChunkThreshold, [], 0, []); >> _Else -> >> [InList] >> end. >> >> -chunkify(_Bt, [], _ChunkThreshold, [], 0, OutputChunks) -> >> +chunkify([], _ChunkThreshold, [], 0, OutputChunks) -> >> lists:reverse(OutputChunks); >> -chunkify(_Bt, [], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> >> +chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> >> lists:reverse([lists:reverse(OutList) | OutputChunks]); >> -chunkify(Bt, [InElement | RestInList], ChunkThreshold, OutList, >> OutListSize, OutputChunks) -> >> - case byte_size(couch_compress:compress(InElement, >> Bt#btree.compression)) of >> +chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, >> OutputChunks) -> >> + case ?term_size(InElement) of >> Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] -> >> - chunkify(Bt, RestInList, ChunkThreshold, [], 0, >> [lists:reverse([InElement | OutList]) | OutputChunks]); >> + chunkify(RestInList, ChunkThreshold, [], 0, >> [lists:reverse([InElement | OutList]) | OutputChunks]); >> Size -> >> - chunkify(Bt, RestInList, ChunkThreshold, [InElement | OutList], >> OutListSize + Size, OutputChunks) >> + chunkify(RestInList, ChunkThreshold, [InElement | OutList], >> OutListSize + Size, OutputChunks) >> end. >> >> modify_node(Bt, RootPointerInfo, Actions, QueryOutput) -> >> @@ -350,7 +350,7 @@ get_node(#btree{fd = Fd}, NodePos) -> >> >> write_node(#btree{fd = Fd, compression = Comp} = Bt, NodeType, NodeList) -> >> % split up nodes into smaller sizes >> - NodeListList = chunkify(Bt, NodeList), >> + NodeListList = chunkify(NodeList), >> % now write out each chunk and return the KeyPointer pairs for those >> nodes >> ResultList = [ >> begin >> >> Modified: couchdb/trunk/src/couchdb/couch_db.hrl >> URL: >> http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db.hrl?rev=1129597&r1=1129596&r2=1129597&view=diff >> ============================================================================== >> --- couchdb/trunk/src/couchdb/couch_db.hrl (original) >> +++ couchdb/trunk/src/couchdb/couch_db.hrl Tue May 31 10:04:20 2011 >> @@ -27,6 +27,12 @@ >> -define(b2l(V), binary_to_list(V)). >> -define(l2b(V), list_to_binary(V)). >> -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])). >> +-define(term_size(T), >> + try >> + erlang:external_size(T) >> + catch _:_ -> >> + byte_size(?term_to_bin(T)) >> + end). >> >> -define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>). >> >> >> Modified: couchdb/trunk/src/couchdb/couch_db_updater.erl >> URL: >> http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db_updater.erl?rev=1129597&r1=1129596&r2=1129597&view=diff >> ============================================================================== >> --- couchdb/trunk/src/couchdb/couch_db_updater.erl (original) >> +++ couchdb/trunk/src/couchdb/couch_db_updater.erl Tue May 31 10:04:20 2011 >> @@ -888,7 +888,7 @@ copy_compact(Db, NewDb0, Retry) -> >> fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, >> {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, >> TotalCopied}) -> >> >> - AccUncopiedSize2 = AccUncopiedSize + >> byte_size(?term_to_bin(DocInfo)), >> + AccUncopiedSize2 = AccUncopiedSize + ?term_size(DocInfo), >> if AccUncopiedSize2 >= BufferSize -> >> NewDb2 = copy_docs( >> Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry), >> >> Modified: couchdb/trunk/src/couchdb/couch_view_compactor.erl >> URL: >> http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_view_compactor.erl?rev=1129597&r1=1129596&r2=1129597&view=diff >> ============================================================================== >> --- couchdb/trunk/src/couchdb/couch_view_compactor.erl (original) >> +++ couchdb/trunk/src/couchdb/couch_view_compactor.erl Tue May 31 10:04:20 >> 2011 >> @@ -57,7 +57,7 @@ compact_group(Group, EmptyGroup) -> >> Msg = "Duplicates of ~s detected in ~s ~s - rebuild required", >> exit(io_lib:format(Msg, [DocId, DbName, GroupId])); >> true -> ok end, >> - AccSize2 = AccSize + byte_size(?term_to_bin(KV)), >> + AccSize2 = AccSize + ?term_size(KV), >> if AccSize2 >= BufferSize -> >> {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), >> couch_task_status:update("Copied ~p of ~p Ids (~p%)", >> @@ -90,7 +90,7 @@ compact_view(View, EmptyView, BufferSize >> >> %% Key is {Key,DocId} >> Fun = fun(KV, {Bt, Acc, AccSize, TotalCopied}) -> >> - AccSize2 = AccSize + byte_size(?term_to_bin(KV)), >> + AccSize2 = AccSize + ?term_size(KV), >> if AccSize2 >= BufferSize -> >> {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), >> couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)", >> >> Modified: couchdb/trunk/src/couchdb/couch_work_queue.erl >> URL: >> http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_work_queue.erl?rev=1129597&r1=1129596&r2=1129597&view=diff >> ============================================================================== >> --- couchdb/trunk/src/couchdb/couch_work_queue.erl (original) >> +++ couchdb/trunk/src/couchdb/couch_work_queue.erl Tue May 31 10:04:20 2011 >> @@ -42,7 +42,7 @@ new(Options) -> >> queue(Wq, Item) when is_binary(Item) -> >> gen_server:call(Wq, {queue, Item, byte_size(Item)}, infinity); >> queue(Wq, Item) -> >> - gen_server:call(Wq, {queue, Item, byte_size(?term_to_bin(Item))}, >> infinity). >> + gen_server:call(Wq, {queue, Item, ?term_size(Item)}, infinity). >> >> >> dequeue(Wq) -> >> >> > > -- Filipe David Manana, [email protected], [email protected] "Reasonable men adapt themselves to the world. Unreasonable men adapt the world to themselves. That's why all progress depends on unreasonable men."
