This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 6d9b76e4a3 Perf: Port arrow-rs optimization for get_buffer_memory_size and add fast path for no buffer for gc string view (#17008) 6d9b76e4a3 is described below commit 6d9b76e4a30f6234ffa3f8100b5d4c2735558ca6 Author: Qi Zhu <821684...@qq.com> AuthorDate: Sun Aug 3 18:48:52 2025 +0800 Perf: Port arrow-rs optimization for get_buffer_memory_size and add fast path for no buffer for gc string view (#17008) * Port arrow-rs optimization for get_buffer_memory_size for gc string view * add comments and fast path --- datafusion/physical-plan/src/coalesce/mod.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index 0eca27f8e4..8e0ba072b7 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -228,6 +228,12 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { let Some(s) = c.as_string_view_opt() else { return Arc::clone(c); }; + + // Fast path: if the data buffers are empty, we can return the original array + if s.data_buffers().is_empty() { + return Arc::clone(c); + } + let ideal_buffer_size: usize = s .views() .iter() @@ -240,7 +246,11 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { } }) .sum(); - let actual_buffer_size = s.get_buffer_memory_size(); + + // We don't use get_buffer_memory_size here, because gc is for the contents of the + // data buffers, not views and nulls. + let actual_buffer_size = + s.data_buffers().iter().map(|b| b.capacity()).sum::<usize>(); // Re-creating the array copies data and can be time consuming. // We only do it if the array is sparse --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org