This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 6d9b76e4a3 Perf: Port arrow-rs optimization for get_buffer_memory_size 
and add fast path for no buffer for gc string view (#17008)
6d9b76e4a3 is described below

commit 6d9b76e4a30f6234ffa3f8100b5d4c2735558ca6
Author: Qi Zhu <821684...@qq.com>
AuthorDate: Sun Aug 3 18:48:52 2025 +0800

    Perf: Port arrow-rs optimization for get_buffer_memory_size and add fast 
path for no buffer for gc string view (#17008)
    
    * Port arrow-rs optimization for get_buffer_memory_size for gc string view
    
    * add comments and fast path
---
 datafusion/physical-plan/src/coalesce/mod.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/datafusion/physical-plan/src/coalesce/mod.rs 
b/datafusion/physical-plan/src/coalesce/mod.rs
index 0eca27f8e4..8e0ba072b7 100644
--- a/datafusion/physical-plan/src/coalesce/mod.rs
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -228,6 +228,12 @@ fn gc_string_view_batch(batch: &RecordBatch) -> 
RecordBatch {
             let Some(s) = c.as_string_view_opt() else {
                 return Arc::clone(c);
             };
+
+            // Fast path: if the data buffers are empty, we can return the 
original array
+            if s.data_buffers().is_empty() {
+                return Arc::clone(c);
+            }
+
             let ideal_buffer_size: usize = s
                 .views()
                 .iter()
@@ -240,7 +246,11 @@ fn gc_string_view_batch(batch: &RecordBatch) -> 
RecordBatch {
                     }
                 })
                 .sum();
-            let actual_buffer_size = s.get_buffer_memory_size();
+
+            // We don't use get_buffer_memory_size here, because gc is for the 
contents of the
+            // data buffers, not views and nulls.
+            let actual_buffer_size =
+                s.data_buffers().iter().map(|b| b.capacity()).sum::<usize>();
 
             // Re-creating the array copies data and can be time consuming.
             // We only do it if the array is sparse


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to