This is an automated email from the ASF dual-hosted git repository. jaydoane pushed a commit to branch configurable-binary-chunk-size in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit e9facf7254aaa2af8f2f501c2609b94210c7f5e4 Author: Jay Doane <[email protected]> AuthorDate: Fri Apr 10 16:01:30 2020 -0700 Enable configurable binary chunk size Currently, the size of binary chunks used for values is fixed at the FDB imposed limit of 100kB, although they recommend using 10KB [1], (also note they subtly change units). This makes that value configurable, allowing e.g. benchmarks to compare performance of runs with varying chunk size. The cost is a ~10µs config lookup penalty for all database creation or open operations, whether documents are written or not. [1] https://www.foundationdb.org/files/record-layer-paper.pdf --- rel/overlay/etc/default.ini | 3 +++ src/fabric/include/fabric2.hrl | 2 +- src/fabric/src/fabric2_fdb.erl | 31 +++++++++++++++++++++---------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index e10a5a0..dfc67f7 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -239,6 +239,9 @@ port = 6984 ; ; Enable or disable automatic stale index removal in the auto-updater ;index_updater_remove_old_indices = false +; +; Byte size of binary chunks written to FDB values. Defaults to FDB max limit. +;binary_chunk_size = 100000 ; [rexi] ; buffer_count = 2000 diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 587b4f8..2e588f8 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -77,4 +77,4 @@ -define(TRANSACTION_CANCELLED, 1025). --define(BINARY_CHUNK_SIZE, 100000). +-define(DEFAULT_BINARY_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index d96c3ae..03eeae5 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -235,6 +235,7 @@ create(#{} = Db0, Options) -> validate_doc_update_funs => [], before_doc_update => undefined, after_doc_read => undefined, + binary_chunk_size => binary_chunk_size(), % All other db things as we add features, db_options => Options1 @@ -278,6 +279,7 @@ open(#{} = Db0, Options) -> validate_doc_update_funs => [], before_doc_update => undefined, after_doc_read => undefined, + binary_chunk_size => binary_chunk_size(), db_options => Options2 }, @@ -996,7 +998,8 @@ write_attachment(#{} = Db, DocId, Data, Encoding) when is_binary(Data), is_atom(Encoding) -> #{ tx := Tx, - db_prefix := DbPrefix + db_prefix := DbPrefix, + binary_chunk_size := BinaryChunkSize } = ensure_current(Db), AttId = fabric2_util:uuid(), @@ -1017,7 +1020,7 @@ write_attachment(#{} = Db, DocId, Data, Encoding) InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}), ok = erlfdb:set(Tx, IdKey, InfoVal), - Chunks = chunkify_binary(Data1), + Chunks = chunkify_binary(Data1, BinaryChunkSize), lists:foldl(fun(Chunk, ChunkId) -> AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), @@ -1488,7 +1491,8 @@ fdb_to_revinfo(Key, {1, RPath, AttHash}) -> doc_to_fdb(Db, #doc{} = Doc) -> #{ - db_prefix := DbPrefix + db_prefix := DbPrefix, + binary_chunk_size := BinaryChunkSize } = Db, #doc{ @@ -1503,7 +1507,7 @@ doc_to_fdb(Db, #doc{} = Doc) -> Opts = [{minor_version, 1}, {compressed, 6}], Value = term_to_binary({Body, DiskAtts, Deleted}, Opts), - Chunks = chunkify_binary(Value), + Chunks = chunkify_binary(Value, BinaryChunkSize), {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix), @@ -1538,7 +1542,8 @@ fdb_to_doc(Db, DocId, Pos, Path, BinRows) when is_list(BinRows) -> local_doc_to_fdb(Db, #doc{} = Doc) -> #{ - db_prefix := DbPrefix + db_prefix := DbPrefix, + binary_chunk_size := BinaryChunkSize } = Db, #doc{ @@ -1558,7 +1563,7 @@ local_doc_to_fdb(Db, #doc{} = Doc) -> {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix), {{K, Chunk}, ChunkId + 1} - end, 0, chunkify_binary(BVal)), + end, 0, chunkify_binary(BVal, BinaryChunkSize)), NewSize = fabric2_util:ldoc_size(Doc), RawValue = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, StoreRev, NewSize}), @@ -1627,13 +1632,14 @@ sum_rem_rev_sizes(RevInfos) -> end, 0, RevInfos). -chunkify_binary(Data) -> +chunkify_binary(Data, Size) -> + couch_log:info("chunkify_binary ~p", [Size]), case Data of <<>> -> []; - <<Head:?BINARY_CHUNK_SIZE/binary, Rest/binary>> -> - [Head | chunkify_binary(Rest)]; - <<_/binary>> when size(Data) < ?BINARY_CHUNK_SIZE -> + <<Head:Size/binary, Rest/binary>> -> + [Head | chunkify_binary(Rest, Size)]; + <<_/binary>> when size(Data) < Size -> [Data] end. @@ -1988,6 +1994,11 @@ get_info_wait_int(#info_future{} = InfoFuture) -> [CProp | MProps]. +binary_chunk_size() -> + config:get_integer( + "fabric", "binary_chunk_size", ?DEFAULT_BINARY_CHUNK_SIZE). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl").
