I had played around with chunkify some time ago and seen about 4%. It may be worth looking at [1]. Also note that I don't think you need the variable _Bt, it's just passed around the recursion but never used.

Cheers,

Bob

[1] http://github.com/bdionne/couchdb/tree/btree




On May 10, 2009, at 9:56 PM, Chris Anderson wrote:

On Sun, May 10, 2009 at 6:49 PM, Paul Davis <[email protected]> wrote:
Chris reminded me that I had an optimization patch laying around for
couch_btree:chunkify and his tests show that it gets a bit of a speed
increase when running some tests with hovercraft. The basic outline of what I did was to swap a call like term_to_binary([ListOfTuples]) to a
sequence of ListOfSizes = lists:map(term_to_binary, ListOfTuples),
Size = sum(ListOfSizes), and then when we go through the list of
tuples to split them into chunks I use the pre calculated sizes.


In my tests this gave a more than 10% speed boost both via Hovercraft
and http benchmarking. I'm +1 on committing it.

Anyway, I just wanted to run it across the list before I commit it in
case anyone sees anything subtle I might be missing.

chunkify(_Bt, []) ->
   [];
chunkify(Bt, InList) ->
   ToSize = fun(X) -> size(term_to_binary(X)) end,
   SizeList = lists:map(ToSize, InList),
   TotalSize = lists:sum(SizeList),
   case TotalSize of
   Size when Size > ?CHUNK_THRESHOLD ->
       NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1),
       ChunkThreshold = Size div NumberOfChunksLikely,
       chunkify(Bt, InList, SizeList, ChunkThreshold, [], 0, []);
   _Else ->
       [InList]
   end.

chunkify(_Bt, [], [], _Threshold, [], 0, Chunks) ->
   lists:reverse(Chunks);
chunkify(_Bt, [], [], _Threshold, OutAcc, _OutAccSize, Chunks) ->
   lists:reverse([lists:reverse(OutAcc) | Chunks]);
chunkify(Bt, [InElement | RestInList], [InSize | RestSizes], Threshold, OutAcc,
       OutAccSize, Chunks) ->
   case InSize of
InSize when (InSize + OutAccSize) > Threshold andalso OutAcc /= [] ->
       chunkify(Bt, RestInList, RestSizes, Threshold, [], 0,
           [lists:reverse([InElement | OutAcc]) | Chunks]);
   InSize ->
chunkify(Bt, RestInList, RestSizes, Threshold, [InElement | OutAcc],
           OutAccSize + InSize, Chunks)
   end.




--
Chris Anderson
http://jchrisa.net
http://couch.io

Reply via email to