XiangpengHao commented on code in PR #6136: URL: https://github.com/apache/arrow-rs/pull/6136#discussion_r1693965799
########## arrow-array/src/builder/generic_bytes_view_builder.rs: ########## @@ -78,15 +100,25 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> { null_buffer_builder: NullBufferBuilder::new(capacity), completed: vec![], in_progress: vec![], - block_size: DEFAULT_BLOCK_SIZE, + block_size: BlockSizeGrowthStrategy::Exponential { + current_size: STARTING_BLOCK_SIZE, + }, string_tracker: None, phantom: Default::default(), } } - /// Override the size of buffers to allocate for holding string data + /// The block size is the size of the buffer used to store the string data. + /// A new buffer will be allocated when the current buffer is full. + /// By default the builder try to keep the buffer count low by growing the size exponentially from 8KB up to 2MB. + /// This method instead set a fixed value to the buffer size, useful for advanced users that want to control the memory usage and buffer count. + /// Check <https://github.com/apache/arrow-rs/issues/6094> for more details on the implications. pub fn with_block_size(self, block_size: u32) -> Self { Review Comment: done! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org