This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 79575aa343 perf: zero-copy path in `RowConverter::from_binary` (#8686)
79575aa343 is described below

commit 79575aa343c2a2aa4bf226554ec4e4f7ff1cb37e
Author: Mikhail Zabaluev <[email protected]>
AuthorDate: Fri Oct 24 18:28:03 2025 +0300

    perf: zero-copy path in `RowConverter::from_binary` (#8686)
    
    # Which issue does this PR close?
    
    - Closes #8685.
    
    # What changes are included in this PR?
    
    In the implementation of `RowConverter::from_binary`, the `BinaryArray`
    is broken into parts and an attempt is made to convert the data buffer
    into `Vec` at no copying cost with `Buffer::into_vec`. Only if this
    fails, the data is copied out for a newly allocated `Vec`.
    
    # Are these changes tested?
    
    Passes existing tests using `RowConverter::from_binary`, which all
    convert a non-shared buffer taking advantage of the optimization.
    Another test is added to cover the copying path.
    
    # Are there any user-facing changes?
    
    No
---
 arrow-row/src/lib.rs | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index db7758047c..5f690e9a67 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -913,9 +913,13 @@ impl RowConverter {
             0,
             "can't construct Rows instance from array with nulls"
         );
+        let (offsets, values, _) = array.into_parts();
+        let offsets = offsets.iter().map(|&i| i.as_usize()).collect();
+        // Try zero-copy, if it does not succeed, fall back to copying the 
values.
+        let buffer = values.into_vec().unwrap_or_else(|values| 
values.to_vec());
         Rows {
-            buffer: array.values().to_vec(),
-            offsets: array.offsets().iter().map(|&i| i.as_usize()).collect(),
+            buffer,
+            offsets,
             config: RowConfig {
                 fields: Arc::clone(&self.fields),
                 validate_utf8: true,
@@ -2474,6 +2478,19 @@ mod tests {
         assert!(rows.row(3) < rows.row(0));
     }
 
+    #[test]
+    fn test_from_binary_shared_buffer() {
+        let converter = 
RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
+        let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
+        let rows = converter.convert_columns(&[array]).unwrap();
+        let binary_rows = rows.try_into_binary().expect("known-small rows");
+        let _binary_rows_shared_buffer = binary_rows.clone();
+
+        let parsed = converter.from_binary(binary_rows);
+
+        converter.convert_rows(parsed.iter()).unwrap();
+    }
+
     #[test]
     #[should_panic(expected = "Encountered non UTF-8 data")]
     fn test_invalid_utf8() {

Reply via email to