etseidl commented on code in PR #6159:
URL: https://github.com/apache/arrow-rs/pull/6159#discussion_r1702333489
##########
parquet/src/encodings/decoding/byte_stream_split_decoder.rs:
##########
@@ -119,3 +136,117 @@ impl<T: DataType> Decoder<T> for
ByteStreamSplitDecoder<T> {
Ok(to_skip)
}
}
+
+pub struct VariableWidthByteStreamSplitDecoder<T: DataType> {
+ _phantom: PhantomData<T>,
+ encoded_bytes: Bytes,
+ total_num_values: usize,
+ values_decoded: usize,
+ type_width: usize,
+}
+
+impl<T: DataType> VariableWidthByteStreamSplitDecoder<T> {
+ pub(crate) fn new(type_length: i32) -> Self {
+ Self {
+ _phantom: PhantomData,
+ encoded_bytes: Bytes::new(),
+ total_num_values: 0,
+ values_decoded: 0,
+ type_width: type_length as usize,
+ }
+ }
+}
+
+impl<T: DataType> Decoder<T> for VariableWidthByteStreamSplitDecoder<T> {
+ fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
+ // Rough check that all data elements are the same length
+ if data.len() % self.type_width != 0 {
+ return Err(general_err!("Input data is not of fixed length"));
+ }
+
+ match T::get_physical_type() {
+ Type::FIXED_LEN_BYTE_ARRAY => {
+ self.encoded_bytes = data;
+ self.total_num_values = num_values;
+ self.values_decoded = 0;
+ Ok(())
+ }
+ _ => Err(general_err!(
+ "VariableWidthByteStreamSplitDecoder only supports
FixedLenByteArrayType"
+ )),
+ }
+ }
+
+ fn get(&mut self, buffer: &mut [<T as DataType>::T]) -> Result<usize> {
+ let total_remaining_values = self.values_left();
+ let num_values = buffer.len().min(total_remaining_values);
+ let buffer = &mut buffer[..num_values];
+ let type_size = self.type_width;
+
+ // Since this is FIXED_LEN_BYTE_ARRAY data, we can't use
slice_as_bytes_mut. Instead we'll
+ // have to do some data copies.
+ let mut tmp_vec = vec![0_u8; num_values * type_size];
+ let raw_out_bytes = tmp_vec.as_mut_slice();
+
+ let stride = self.encoded_bytes.len() / type_size;
+ match type_size {
+ 2 => join_streams_const::<2>(
Review Comment:
I modified the benchmark to work with f16 as FixedLenByteArray(2). Good news
is that using the templated `_static` variants is significantly faster for both
encode and decode.
The bad news is that FixedLenByteArray is _very_ slow. This is not shocking
due to the need for so many buffer copies. Get it working, then get it working
fast, right? 😉
```
% cargo bench -p parquet --bench encoding --all-features -- --baseline bssopt
Compiling parquet v52.2.0 (/Users/seidl/src/arrow-rs/parquet)
Finished `bench` profile [optimized] target(s) in 45.92s
Running benches/encoding.rs
(target/release/deps/encoding-534b69246994059e)
encoding: dtype=parquet::data_type::FixedLenByteArray,
encoding=BYTE_STREAM_SPLIT
time: [142.70 µs 144.02 µs 145.71 µs]
change: [+26.586% +27.751% +29.264%] (p = 0.00 <
0.05)
Performance has regressed.
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
dtype=parquet::data_type::FixedLenByteArray, encoding=BYTE_STREAM_SPLIT
encoded as 32768 bytes
decoding: dtype=parquet::data_type::FixedLenByteArray,
encoding=BYTE_STREAM_SPLIT
time: [392.38 µs 393.06 µs 393.77 µs]
change: [+2.0067% +2.6708% +3.2941%] (p = 0.00 <
0.05)
Performance has regressed.
Found 6 outliers among 100 measurements (6.00%)
2 (2.00%) high mild
4 (4.00%) high severe
encoding: dtype=f32, encoding=BYTE_STREAM_SPLIT
time: [44.729 µs 46.314 µs 49.430 µs]
change: [-4.2202% -1.5434% +2.7807%] (p = 0.56 >
0.05)
No change in performance detected.
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe
dtype=f32, encoding=BYTE_STREAM_SPLIT encoded as 65536 bytes
decoding: dtype=f32, encoding=BYTE_STREAM_SPLIT
time: [38.613 µs 38.697 µs 38.784 µs]
change: [-0.0019% +0.5500% +1.0931%] (p = 0.06 >
0.05)
No change in performance detected.
Found 8 outliers among 100 measurements (8.00%)
4 (4.00%) high mild
4 (4.00%) high severe
encoding: dtype=f64, encoding=BYTE_STREAM_SPLIT
time: [108.86 µs 109.25 µs 109.66 µs]
change: [-4.3488% -3.0332% -1.8343%] (p = 0.00 <
0.05)
Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
dtype=f64, encoding=BYTE_STREAM_SPLIT encoded as 131072 bytes
decoding: dtype=f64, encoding=BYTE_STREAM_SPLIT
time: [81.127 µs 81.343 µs 81.566 µs]
change: [-3.6443% -2.9616% -2.2527%] (p = 0.00 <
0.05)
Performance has improved.
Found 8 outliers among 100 measurements (8.00%)
5 (5.00%) high mild
3 (3.00%) high severe
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]