etseidl commented on code in PR #6159:
URL: https://github.com/apache/arrow-rs/pull/6159#discussion_r1702333489


##########
parquet/src/encodings/decoding/byte_stream_split_decoder.rs:
##########
@@ -119,3 +136,117 @@ impl<T: DataType> Decoder<T> for 
ByteStreamSplitDecoder<T> {
         Ok(to_skip)
     }
 }
+
+pub struct VariableWidthByteStreamSplitDecoder<T: DataType> {
+    _phantom: PhantomData<T>,
+    encoded_bytes: Bytes,
+    total_num_values: usize,
+    values_decoded: usize,
+    type_width: usize,
+}
+
+impl<T: DataType> VariableWidthByteStreamSplitDecoder<T> {
+    pub(crate) fn new(type_length: i32) -> Self {
+        Self {
+            _phantom: PhantomData,
+            encoded_bytes: Bytes::new(),
+            total_num_values: 0,
+            values_decoded: 0,
+            type_width: type_length as usize,
+        }
+    }
+}
+
+impl<T: DataType> Decoder<T> for VariableWidthByteStreamSplitDecoder<T> {
+    fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
+        // Rough check that all data elements are the same length
+        if data.len() % self.type_width != 0 {
+            return Err(general_err!("Input data is not of fixed length"));
+        }
+
+        match T::get_physical_type() {
+            Type::FIXED_LEN_BYTE_ARRAY => {
+                self.encoded_bytes = data;
+                self.total_num_values = num_values;
+                self.values_decoded = 0;
+                Ok(())
+            }
+            _ => Err(general_err!(
+                "VariableWidthByteStreamSplitDecoder only supports 
FixedLenByteArrayType"
+            )),
+        }
+    }
+
+    fn get(&mut self, buffer: &mut [<T as DataType>::T]) -> Result<usize> {
+        let total_remaining_values = self.values_left();
+        let num_values = buffer.len().min(total_remaining_values);
+        let buffer = &mut buffer[..num_values];
+        let type_size = self.type_width;
+
+        // Since this is FIXED_LEN_BYTE_ARRAY data, we can't use 
slice_as_bytes_mut. Instead we'll
+        // have to do some data copies.
+        let mut tmp_vec = vec![0_u8; num_values * type_size];
+        let raw_out_bytes = tmp_vec.as_mut_slice();
+
+        let stride = self.encoded_bytes.len() / type_size;
+        match type_size {
+            2 => join_streams_const::<2>(

Review Comment:
   I modified the benchmark to work with f16 as FixedLenByteArray(2). Good news 
is that using the templated `_static` variants is significantly faster for both 
encode and decode.
   
   The bad news is that FixedLenByteArray is _very_ slow. This is not shocking 
due to the need for so many buffer copies. Get it working, then get it working 
fast, right? 😉 
   
   ```
   % cargo bench -p parquet --bench encoding --all-features -- --baseline bssopt
      Compiling parquet v52.2.0 (/Users/seidl/src/arrow-rs/parquet)
       Finished `bench` profile [optimized] target(s) in 45.92s
        Running benches/encoding.rs 
(target/release/deps/encoding-534b69246994059e)
   encoding: dtype=parquet::data_type::FixedLenByteArray, 
encoding=BYTE_STREAM_SPLIT
                           time:   [142.70 µs 144.02 µs 145.71 µs]
                           change: [+26.586% +27.751% +29.264%] (p = 0.00 < 
0.05)
                           Performance has regressed.
   Found 7 outliers among 100 measurements (7.00%)
     4 (4.00%) high mild
     3 (3.00%) high severe
   
   dtype=parquet::data_type::FixedLenByteArray, encoding=BYTE_STREAM_SPLIT 
encoded as 32768 bytes
   decoding: dtype=parquet::data_type::FixedLenByteArray, 
encoding=BYTE_STREAM_SPLIT
                           time:   [392.38 µs 393.06 µs 393.77 µs]
                           change: [+2.0067% +2.6708% +3.2941%] (p = 0.00 < 
0.05)
                           Performance has regressed.
   Found 6 outliers among 100 measurements (6.00%)
     2 (2.00%) high mild
     4 (4.00%) high severe
   
   encoding: dtype=f32, encoding=BYTE_STREAM_SPLIT
                           time:   [44.729 µs 46.314 µs 49.430 µs]
                           change: [-4.2202% -1.5434% +2.7807%] (p = 0.56 > 
0.05)
                           No change in performance detected.
   Found 4 outliers among 100 measurements (4.00%)
     2 (2.00%) high mild
     2 (2.00%) high severe
   
   dtype=f32, encoding=BYTE_STREAM_SPLIT encoded as 65536 bytes
   decoding: dtype=f32, encoding=BYTE_STREAM_SPLIT
                           time:   [38.613 µs 38.697 µs 38.784 µs]
                           change: [-0.0019% +0.5500% +1.0931%] (p = 0.06 > 
0.05)
                           No change in performance detected.
   Found 8 outliers among 100 measurements (8.00%)
     4 (4.00%) high mild
     4 (4.00%) high severe
   
   encoding: dtype=f64, encoding=BYTE_STREAM_SPLIT
                           time:   [108.86 µs 109.25 µs 109.66 µs]
                           change: [-4.3488% -3.0332% -1.8343%] (p = 0.00 < 
0.05)
                           Performance has improved.
   Found 3 outliers among 100 measurements (3.00%)
     2 (2.00%) high mild
     1 (1.00%) high severe
   
   dtype=f64, encoding=BYTE_STREAM_SPLIT encoded as 131072 bytes
   decoding: dtype=f64, encoding=BYTE_STREAM_SPLIT
                           time:   [81.127 µs 81.343 µs 81.566 µs]
                           change: [-3.6443% -2.9616% -2.2527%] (p = 0.00 < 
0.05)
                           Performance has improved.
   Found 8 outliers among 100 measurements (8.00%)
     5 (5.00%) high mild
     3 (3.00%) high severe
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to