This is an automated email from the ASF dual-hosted git repository.

jaydoane pushed a commit to branch configurable-binary-chunk-size
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2464e24902714c369b32dc759b0d4f9f3cdf259a
Author: Jay Doane <[email protected]>
AuthorDate: Fri Apr 10 15:08:54 2020 -0700

    Enable configurable binary chunk size
    
    Currently, the size of binary chunks used for values is fixed at the FDB
    imposed limit of 100kB, although they recommend using 10KB [1], (also
    note they subtly change units).
    
    This makes that value configurable, allowing e.g. benchmarks to compare
    performance of runs with varying chunk size. The cost is a ~10µs config
    lookup penalty for all database creation or open operations, whether
    documents are written or not.
    
    [1] https://www.foundationdb.org/files/record-layer-paper.pdf
---
 rel/overlay/etc/default.ini    |  3 +++
 src/fabric/include/fabric2.hrl |  2 +-
 src/fabric/src/fabric2_fdb.erl | 31 +++++++++++++++++++++----------
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index d2a2c72..3899945 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -236,6 +236,9 @@ port = 6984
 ;
 ; How often to check if databases may need their indices updated.
 ;index_autoupdater_resolution_msec = 10000
+;
+; Size in bytes of binary chunks written to FDB. Defaults to FDB maximum limit.
+;binary_chunk_size = 100000
 
 ; [rexi]
 ; buffer_count = 2000
diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index 587b4f8..2e588f8 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -77,4 +77,4 @@
 -define(TRANSACTION_CANCELLED, 1025).
 
 
--define(BINARY_CHUNK_SIZE, 100000).
+-define(DEFAULT_BINARY_CHUNK_SIZE, 100000).
diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index d96c3ae..c304b77 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -235,6 +235,7 @@ create(#{} = Db0, Options) ->
         validate_doc_update_funs => [],
         before_doc_update => undefined,
         after_doc_read => undefined,
+        binary_chunk_size => binary_chunk_size(),
         % All other db things as we add features,
 
         db_options => Options1
@@ -278,6 +279,7 @@ open(#{} = Db0, Options) ->
         validate_doc_update_funs => [],
         before_doc_update => undefined,
         after_doc_read => undefined,
+        binary_chunk_size => binary_chunk_size(),
 
         db_options => Options2
     },
@@ -996,7 +998,8 @@ write_attachment(#{} = Db, DocId, Data, Encoding)
         when is_binary(Data), is_atom(Encoding) ->
     #{
         tx := Tx,
-        db_prefix := DbPrefix
+        db_prefix := DbPrefix,
+        binary_chunk_size := BinaryChunkSize
     } = ensure_current(Db),
 
     AttId = fabric2_util:uuid(),
@@ -1017,7 +1020,7 @@ write_attachment(#{} = Db, DocId, Data, Encoding)
     InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}),
     ok = erlfdb:set(Tx, IdKey, InfoVal),
 
-    Chunks = chunkify_binary(Data1),
+    Chunks = chunkify_binary(Data1, BinaryChunkSize),
 
     lists:foldl(fun(Chunk, ChunkId) ->
         AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, 
DbPrefix),
@@ -1488,7 +1491,8 @@ fdb_to_revinfo(Key, {1, RPath, AttHash}) ->
 
 doc_to_fdb(Db, #doc{} = Doc) ->
     #{
-        db_prefix := DbPrefix
+        db_prefix := DbPrefix,
+        binary_chunk_size := BinaryChunkSize
     } = Db,
 
     #doc{
@@ -1503,7 +1507,7 @@ doc_to_fdb(Db, #doc{} = Doc) ->
 
     Opts = [{minor_version, 1}, {compressed, 6}],
     Value = term_to_binary({Body, DiskAtts, Deleted}, Opts),
-    Chunks = chunkify_binary(Value),
+    Chunks = chunkify_binary(Value, BinaryChunkSize),
 
     {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
         Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix),
@@ -1538,7 +1542,8 @@ fdb_to_doc(Db, DocId, Pos, Path, BinRows) when 
is_list(BinRows) ->
 
 local_doc_to_fdb(Db, #doc{} = Doc) ->
     #{
-        db_prefix := DbPrefix
+        db_prefix := DbPrefix,
+        binary_chunk_size := BinaryChunkSize
     } = Db,
 
     #doc{
@@ -1558,7 +1563,7 @@ local_doc_to_fdb(Db, #doc{} = Doc) ->
     {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
         K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix),
         {{K, Chunk}, ChunkId + 1}
-    end, 0, chunkify_binary(BVal)),
+    end, 0, chunkify_binary(BVal, BinaryChunkSize)),
 
     NewSize = fabric2_util:ldoc_size(Doc),
     RawValue = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, StoreRev, NewSize}),
@@ -1627,13 +1632,14 @@ sum_rem_rev_sizes(RevInfos) ->
     end, 0, RevInfos).
 
 
-chunkify_binary(Data) ->
+chunkify_binary(Data, BinaryChunkSize) ->
+    couch_log:info("chunkify_binary ~p", [BinaryChunkSize]),
     case Data of
         <<>> ->
             [];
-        <<Head:?BINARY_CHUNK_SIZE/binary, Rest/binary>> ->
-            [Head | chunkify_binary(Rest)];
-        <<_/binary>> when size(Data) < ?BINARY_CHUNK_SIZE ->
+        <<Head:BinaryChunkSize/binary, Rest/binary>> ->
+            [Head | chunkify_binary(Rest, BinaryChunkSize)];
+        <<_/binary>> when size(Data) < BinaryChunkSize ->
             [Data]
     end.
 
@@ -1988,6 +1994,11 @@ get_info_wait_int(#info_future{} = InfoFuture) ->
     [CProp | MProps].
 
 
+binary_chunk_size() ->
+    config:get_integer(
+        "fabric", "binary_chunk_size", ?DEFAULT_BINARY_CHUNK_SIZE).
+
+
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 

Reply via email to