Re: [PR] Minor: avoid clone in RunArray row decoding via buffer stealing [arrow-rs]

via GitHub Sun, 28 Dec 2025 20:46:36 -0800


lyang24 commented on code in PR #9052:
URL: https://github.com/apache/arrow-rs/pull/9052#discussion_r2650174190



##########
arrow-row/src/run.rs:
##########
@@ -134,7 +134,11 @@ pub unsafe fn decode<R: RunEndIndexType>(
                 run_ends.push(R::Native::usize_as(idx));
             }
             unique_row_indices.push(decoded_values.len());
-            decoded_values.push(decoded_data.clone());
+            let capacity = decoded_data.capacity();
+            decoded_values.push(std::mem::replace(
+                &mut decoded_data,
+                Vec::with_capacity(capacity),
+            ));

Review Comment:
   The key is we're NOT replacing "clone with allocation" - we're replacing
     "allocation + memcpy" with "allocation only".
     
     Both approaches allocate:
     - `.clone()` allocates a new Vec AND copies all the data (O(n) in data 
size)
     - `Vec::with_capacity()` allocates a new Vec (O(1), just metadata)
   
     What we eliminate is the memcpy operation - specifically sum(unique_values 
× avg_value_size) 
     bytes of memory traffic
     
     
    talk is cheap i will show the code :) i wrote a quick bench and cargo bench 
it in apple m3 chip
    
   ```
   use criterion::*;
   
   /// Benchmark simulating the RunArray decode pattern with .clone()
   fn decode_with_clone(data: &[Vec<u8>]) -> Vec<Vec<u8>> {
       let mut decoded_data = Vec::new();
       let mut decoded_values = Vec::new();
   
       for bytes in data {
           decoded_data.clear();
           decoded_data.extend_from_slice(bytes);
   
           // Simulate checking if it's a new unique value
           let is_new = decoded_values.is_empty()
               || decoded_data != decoded_values[decoded_values.len() - 1];
   
           if is_new {
               decoded_values.push(decoded_data.clone());
           }
       }
   
       decoded_values
   }
   
   /// Benchmark simulating the RunArray decode pattern with mem::replace
   fn decode_with_replace(data: &[Vec<u8>]) -> Vec<Vec<u8>> {
       let mut decoded_data = Vec::new();
       let mut decoded_values = Vec::new();
   
       for bytes in data {
           decoded_data.clear();
           decoded_data.extend_from_slice(bytes);
   
           // Simulate checking if it's a new unique value
           let is_new = decoded_values.is_empty()
               || decoded_data != decoded_values[decoded_values.len() - 1];
   
           if is_new {
               let capacity = decoded_data.capacity();
               decoded_values.push(std::mem::replace(&mut decoded_data, 
Vec::with_capacity(capacity)));
           }
       }
   
       decoded_values
   }
   
   fn criterion_benchmark(c: &mut Criterion) {
       // Test with various data sizes to show impact scales with data size
       for size in [64, 256, 1024, 4096] {
           let mut group = c.benchmark_group(format!("run_decode_{}_bytes", 
size));
   
           // Generate test data: 1000 rows with 100 unique values (10% 
uniqueness, typical for RLE)
           let mut data = Vec::new();
           for i in 0..1000 {
               let unique_id = i / 10; // 100 unique values, each repeated 10 
times
               let bytes = vec![unique_id as u8; size];
               data.push(bytes);
           }
   
           group.bench_function("clone", |b| {
               b.iter(|| black_box(decode_with_clone(&data)));
           });
   
           group.bench_function("mem_replace", |b| {
               b.iter(|| black_box(decode_with_replace(&data)));
           });
   
           group.finish();
       }
   }
   
   criterion_group!(benches, criterion_benchmark);
   criterion_main!(benches);
   ```
     
   
   result
     
     | Data Size | Clone (µs) | mem::replace (µs) | Improvement | Time Saved |
     |-----------|------------|-------------------|-------------|------------|
     | 64 bytes  | 5.80       | 5.37              | 7.4%        | 0.43 µs    |
     | 256 bytes | 13.66      | 13.29             | 2.7%        | 0.37 µs    |
     | 1024 bytes| 36.49      | 35.44             | 2.9%        | 1.05 µs    |
     | 4096 bytes| 152.54     | 145.62            | 4.5%        | 6.92 µs    |
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Minor: avoid clone in RunArray row decoding via buffer stealing [arrow-rs]

Reply via email to