Chris reminded me that I had an optimization patch laying around for
couch_btree:chunkify and his tests show that it gets a bit of a speed
increase when running some tests with hovercraft. The basic outline
of
what I did was to swap a call like term_to_binary([ListOfTuples])
to a
sequence of ListOfSizes = lists:map(term_to_binary, ListOfTuples),
Size = sum(ListOfSizes), and then when we go through the list of
tuples to split them into chunks I use the pre calculated sizes.
Anyway, I just wanted to run it across the list before I commit it in
case anyone sees anything subtle I might be missing.
chunkify(_Bt, []) ->
[];
chunkify(Bt, InList) ->
ToSize = fun(X) -> size(term_to_binary(X)) end,
SizeList = lists:map(ToSize, InList),
TotalSize = lists:sum(SizeList),
case TotalSize of
Size when Size > ?CHUNK_THRESHOLD ->
NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1),
ChunkThreshold = Size div NumberOfChunksLikely,
chunkify(Bt, InList, SizeList, ChunkThreshold, [], 0, []);
_Else ->
[InList]
end.
chunkify(_Bt, [], [], _Threshold, [], 0, Chunks) ->
lists:reverse(Chunks);
chunkify(_Bt, [], [], _Threshold, OutAcc, _OutAccSize, Chunks) ->
lists:reverse([lists:reverse(OutAcc) | Chunks]);
chunkify(Bt, [InElement | RestInList], [InSize | RestSizes],
Threshold, OutAcc,
OutAccSize, Chunks) ->
case InSize of
InSize when (InSize + OutAccSize) > Threshold andalso OutAcc /=
[] ->
chunkify(Bt, RestInList, RestSizes, Threshold, [], 0,
[lists:reverse([InElement | OutAcc]) | Chunks]);
InSize ->
chunkify(Bt, RestInList, RestSizes, Threshold, [InElement |
OutAcc],
OutAccSize + InSize, Chunks)
end.