Nice. So many useful things one can find digging around the undocumented internals of Erlang/OTP ...
On May 31, 2011, at 6:04 AM, [email protected] wrote: > Author: fdmanana > Date: Tue May 31 10:04:20 2011 > New Revision: 1129597 > > URL: http://svn.apache.org/viewvc?rev=1129597&view=rev > Log: > More efficient term size calculation > > Unlike byte_size(term_to_binary(Term)), the BIF erlang:external_size/1 doesn't > do the serialization step, it only calculates the maximum external size for > any term, which is more efficient (faster and avoids the garbage generation). > > With the test couch_http_bulk_writes.sh at [1], using 20 writers and batches > of 100 1Kb documents, it's possible to write about 1 400 000 documents with > this patch instead of about 1 300 000. > > [1] https://github.com/fdmanana/basho_bench_couch > > > > Modified: > couchdb/trunk/src/couchdb/couch_btree.erl > couchdb/trunk/src/couchdb/couch_db.hrl > couchdb/trunk/src/couchdb/couch_db_updater.erl > couchdb/trunk/src/couchdb/couch_view_compactor.erl > couchdb/trunk/src/couchdb/couch_work_queue.erl > > Modified: couchdb/trunk/src/couchdb/couch_btree.erl > URL: > http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_btree.erl?rev=1129597&r1=1129596&r2=1129597&view=diff > ============================================================================== > --- couchdb/trunk/src/couchdb/couch_btree.erl (original) > +++ couchdb/trunk/src/couchdb/couch_btree.erl Tue May 31 10:04:20 2011 > @@ -276,26 +276,26 @@ complete_root(Bt, KPs) -> > % written. Plus with the "case byte_size(term_to_binary(InList)) of" code > % it's probably really inefficient. > > -chunkify(#btree{compression = Comp} = Bt, InList) -> > - case byte_size(couch_compress:compress(InList, Comp)) of > +chunkify(InList) -> > + case ?term_size(InList) of > Size when Size > ?CHUNK_THRESHOLD -> > NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1), > ChunkThreshold = Size div NumberOfChunksLikely, > - chunkify(Bt, InList, ChunkThreshold, [], 0, []); > + chunkify(InList, ChunkThreshold, [], 0, []); > _Else -> > [InList] > end. > > -chunkify(_Bt, [], _ChunkThreshold, [], 0, OutputChunks) -> > +chunkify([], _ChunkThreshold, [], 0, OutputChunks) -> > lists:reverse(OutputChunks); > -chunkify(_Bt, [], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> > +chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> > lists:reverse([lists:reverse(OutList) | OutputChunks]); > -chunkify(Bt, [InElement | RestInList], ChunkThreshold, OutList, OutListSize, > OutputChunks) -> > - case byte_size(couch_compress:compress(InElement, Bt#btree.compression)) > of > +chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, > OutputChunks) -> > + case ?term_size(InElement) of > Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] -> > - chunkify(Bt, RestInList, ChunkThreshold, [], 0, > [lists:reverse([InElement | OutList]) | OutputChunks]); > + chunkify(RestInList, ChunkThreshold, [], 0, > [lists:reverse([InElement | OutList]) | OutputChunks]); > Size -> > - chunkify(Bt, RestInList, ChunkThreshold, [InElement | OutList], > OutListSize + Size, OutputChunks) > + chunkify(RestInList, ChunkThreshold, [InElement | OutList], > OutListSize + Size, OutputChunks) > end. > > modify_node(Bt, RootPointerInfo, Actions, QueryOutput) -> > @@ -350,7 +350,7 @@ get_node(#btree{fd = Fd}, NodePos) -> > > write_node(#btree{fd = Fd, compression = Comp} = Bt, NodeType, NodeList) -> > % split up nodes into smaller sizes > - NodeListList = chunkify(Bt, NodeList), > + NodeListList = chunkify(NodeList), > % now write out each chunk and return the KeyPointer pairs for those nodes > ResultList = [ > begin > > Modified: couchdb/trunk/src/couchdb/couch_db.hrl > URL: > http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db.hrl?rev=1129597&r1=1129596&r2=1129597&view=diff > ============================================================================== > --- couchdb/trunk/src/couchdb/couch_db.hrl (original) > +++ couchdb/trunk/src/couchdb/couch_db.hrl Tue May 31 10:04:20 2011 > @@ -27,6 +27,12 @@ > -define(b2l(V), binary_to_list(V)). > -define(l2b(V), list_to_binary(V)). > -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])). > +-define(term_size(T), > + try > + erlang:external_size(T) > + catch _:_ -> > + byte_size(?term_to_bin(T)) > + end). > > -define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>). > > > Modified: couchdb/trunk/src/couchdb/couch_db_updater.erl > URL: > http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db_updater.erl?rev=1129597&r1=1129596&r2=1129597&view=diff > ============================================================================== > --- couchdb/trunk/src/couchdb/couch_db_updater.erl (original) > +++ couchdb/trunk/src/couchdb/couch_db_updater.erl Tue May 31 10:04:20 2011 > @@ -888,7 +888,7 @@ copy_compact(Db, NewDb0, Retry) -> > fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, > {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, TotalCopied}) > -> > > - AccUncopiedSize2 = AccUncopiedSize + > byte_size(?term_to_bin(DocInfo)), > + AccUncopiedSize2 = AccUncopiedSize + ?term_size(DocInfo), > if AccUncopiedSize2 >= BufferSize -> > NewDb2 = copy_docs( > Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry), > > Modified: couchdb/trunk/src/couchdb/couch_view_compactor.erl > URL: > http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_view_compactor.erl?rev=1129597&r1=1129596&r2=1129597&view=diff > ============================================================================== > --- couchdb/trunk/src/couchdb/couch_view_compactor.erl (original) > +++ couchdb/trunk/src/couchdb/couch_view_compactor.erl Tue May 31 10:04:20 > 2011 > @@ -57,7 +57,7 @@ compact_group(Group, EmptyGroup) -> > Msg = "Duplicates of ~s detected in ~s ~s - rebuild required", > exit(io_lib:format(Msg, [DocId, DbName, GroupId])); > true -> ok end, > - AccSize2 = AccSize + byte_size(?term_to_bin(KV)), > + AccSize2 = AccSize + ?term_size(KV), > if AccSize2 >= BufferSize -> > {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), > couch_task_status:update("Copied ~p of ~p Ids (~p%)", > @@ -90,7 +90,7 @@ compact_view(View, EmptyView, BufferSize > > %% Key is {Key,DocId} > Fun = fun(KV, {Bt, Acc, AccSize, TotalCopied}) -> > - AccSize2 = AccSize + byte_size(?term_to_bin(KV)), > + AccSize2 = AccSize + ?term_size(KV), > if AccSize2 >= BufferSize -> > {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), > couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)", > > Modified: couchdb/trunk/src/couchdb/couch_work_queue.erl > URL: > http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_work_queue.erl?rev=1129597&r1=1129596&r2=1129597&view=diff > ============================================================================== > --- couchdb/trunk/src/couchdb/couch_work_queue.erl (original) > +++ couchdb/trunk/src/couchdb/couch_work_queue.erl Tue May 31 10:04:20 2011 > @@ -42,7 +42,7 @@ new(Options) -> > queue(Wq, Item) when is_binary(Item) -> > gen_server:call(Wq, {queue, Item, byte_size(Item)}, infinity); > queue(Wq, Item) -> > - gen_server:call(Wq, {queue, Item, byte_size(?term_to_bin(Item))}, > infinity). > + gen_server:call(Wq, {queue, Item, ?term_size(Item)}, infinity). > > > dequeue(Wq) -> > >
