Re: [PR] Implement special min/max accumulator for Strings and Binary (10% faster for Clickbench Q28) [datafusion]

via GitHub Fri, 11 Oct 2024 14:47:38 -0700


findepi commented on code in PR #12792:
URL: https://github.com/apache/datafusion/pull/12792#discussion_r1797446554



##########
datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs:
##########
@@ -91,3 +100,105 @@ pub fn filtered_null_mask(
     let opt_filter = opt_filter.and_then(filter_to_nulls);
     NullBuffer::union(opt_filter.as_ref(), input.nulls())
 }
+
+/// Applies optional filter to input, returning a new array of the same type
+/// with the same data, but with any values that were filtered out set to null
+pub fn apply_filter_as_nulls(
+    input: &dyn Array,
+    opt_filter: Option<&BooleanArray>,
+) -> Result<ArrayRef> {
+    let nulls = filtered_null_mask(opt_filter, input);
+    set_nulls_dyn(input, nulls)
+}
+
+/// Replaces the nulls in the input array with the given `NullBuffer`
+///
+/// Can replace when upstreamed in arrow-rs: 
<https://github.com/apache/arrow-rs/issues/6528>
+pub fn set_nulls_dyn(input: &dyn Array, nulls: Option<NullBuffer>) -> 
Result<ArrayRef> {

Review Comment:
   nit: this could be private
   
   



##########
datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs:
##########
@@ -91,3 +100,105 @@ pub fn filtered_null_mask(
     let opt_filter = opt_filter.and_then(filter_to_nulls);
     NullBuffer::union(opt_filter.as_ref(), input.nulls())
 }
+
+/// Applies optional filter to input, returning a new array of the same type
+/// with the same data, but with any values that were filtered out set to null
+pub fn apply_filter_as_nulls(
+    input: &dyn Array,
+    opt_filter: Option<&BooleanArray>,
+) -> Result<ArrayRef> {
+    let nulls = filtered_null_mask(opt_filter, input);
+    set_nulls_dyn(input, nulls)
+}
+
+/// Replaces the nulls in the input array with the given `NullBuffer`
+///
+/// Can replace when upstreamed in arrow-rs: 
<https://github.com/apache/arrow-rs/issues/6528>

Review Comment:
   This could be a TODO comment 



##########
datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs:
##########
@@ -91,3 +100,105 @@ pub fn filtered_null_mask(
     let opt_filter = opt_filter.and_then(filter_to_nulls);
     NullBuffer::union(opt_filter.as_ref(), input.nulls())
 }
+
+/// Applies optional filter to input, returning a new array of the same type
+/// with the same data, but with any values that were filtered out set to null
+pub fn apply_filter_as_nulls(
+    input: &dyn Array,
+    opt_filter: Option<&BooleanArray>,
+) -> Result<ArrayRef> {
+    let nulls = filtered_null_mask(opt_filter, input);
+    set_nulls_dyn(input, nulls)
+}
+
+/// Replaces the nulls in the input array with the given `NullBuffer`
+///
+/// Can replace when upstreamed in arrow-rs: 
<https://github.com/apache/arrow-rs/issues/6528>
+pub fn set_nulls_dyn(input: &dyn Array, nulls: Option<NullBuffer>) -> 
Result<ArrayRef> {
+    if let Some(nulls) = nulls.as_ref() {
+        assert_eq!(nulls.len(), input.len());
+    }
+
+    let output: ArrayRef = match input.data_type() {
+        DataType::Utf8 => {
+            let input = input.as_string::<i32>();
+            // safety: values / offsets came from a valid string array, so are 
valid utf8
+            // and we checked nulls has the same length as values
+            unsafe {
+                Arc::new(StringArray::new_unchecked(
+                    input.offsets().clone(),
+                    input.values().clone(),
+                    nulls,
+                ))
+            }
+        }
+        DataType::LargeUtf8 => {
+            let input = input.as_string::<i64>();
+            // safety: values / offsets came from a valid string array, so are 
valid utf8
+            // and we checked nulls has the same length as values
+            unsafe {
+                Arc::new(LargeStringArray::new_unchecked(
+                    input.offsets().clone(),
+                    input.values().clone(),
+                    nulls,
+                ))
+            }
+        }
+        DataType::Utf8View => {
+            let input = input.as_string_view();
+            // safety: values / views came from a valid string view array, so 
are valid utf8
+            // and we checked nulls has the same length as values
+            unsafe {
+                Arc::new(StringViewArray::new_unchecked(
+                    input.views().clone(),
+                    input.data_buffers().to_vec(),
+                    nulls,
+                ))
+            }
+        }
+
+        DataType::Binary => {
+            let input = input.as_binary::<i32>();
+            // safety: values / offsets came from a valid binary array
+            // and we checked nulls has the same length as values
+            unsafe {
+                Arc::new(BinaryArray::new_unchecked(
+                    input.offsets().clone(),
+                    input.values().clone(),
+                    nulls,
+                ))
+            }
+        }
+        DataType::LargeBinary => {
+            let input = input.as_binary::<i64>();
+            // safety: values / offsets came from a valid large binary array
+            // and we checked nulls has the same length as values
+            unsafe {
+                Arc::new(LargeBinaryArray::new_unchecked(
+                    input.offsets().clone(),
+                    input.values().clone(),
+                    nulls,
+                ))
+            }
+        }
+        DataType::BinaryView => {
+            let input = input.as_binary_view();
+            // safety: values / views came from a valid binary view array
+            // and we checked nulls has the same length as values
+            unsafe {
+                Arc::new(BinaryViewArray::new_unchecked(
+                    input.views().clone(),
+                    input.data_buffers().to_vec(),
+                    nulls,
+                ))
+            }
+        }
+        _ => {
+            return not_impl_err!("Applying nulls {:?}", input.data_type());

Review Comment:
   Do we _have to_ support this for any other data types?



##########
datafusion/functions-aggregate/src/min_max/min_max_bytes.rs:
##########
@@ -0,0 +1,515 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{
+    Array, ArrayRef, AsArray, BinaryBuilder, BinaryViewBuilder, BooleanArray,
+    LargeBinaryBuilder, LargeStringBuilder, StringBuilder, StringViewBuilder,
+};
+use arrow_schema::DataType;
+use datafusion_common::{internal_err, Result};
+use datafusion_expr::{EmitTo, GroupsAccumulator};
+use 
datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::apply_filter_as_nulls;
+use std::sync::Arc;
+
+/// Implements fast Min/Max [`GroupsAccumulator`] for "bytes" types 
([`StringArray`],
+/// [`BinaryArray`], [`StringViewArray`], etc)
+///
+/// This implementation dispatches to the appropriate specialized code in
+/// [`MinMaxBytesState`] based on data type and comparison function
+///
+/// [`StringArray`]: arrow::array::StringArray
+/// [`BinaryArray`]: arrow::array::BinaryArray
+/// [`StringViewArray`]: arrow::array::StringViewArray
+#[derive(Debug)]
+pub(crate) struct MinMaxBytesAccumulator {
+    /// Inner data storage.
+    inner: MinMaxBytesState,
+    /// if true, is `MIN` otherwise is `MAX`
+    is_min: bool,
+}
+
+impl MinMaxBytesAccumulator {
+    /// Create a new accumulator for computing `min(val)`
+    pub fn new_min(data_type: DataType) -> Self {
+        Self {
+            inner: MinMaxBytesState::new(data_type),
+            is_min: true,
+        }
+    }
+
+    /// Create a new accumulator fo computing `max(val)`
+    pub fn new_max(data_type: DataType) -> Self {
+        Self {
+            inner: MinMaxBytesState::new(data_type),
+            is_min: false,
+        }
+    }
+}
+
+impl GroupsAccumulator for MinMaxBytesAccumulator {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        let array = &values[0];
+        assert_eq!(array.len(), group_indices.len());
+        assert_eq!(array.data_type(), &self.inner.data_type);
+
+        // apply filter if needed
+        let array = apply_filter_as_nulls(array, opt_filter)?;
+
+        // dispatch to appropriate kernel / specialized implementation
+        fn string_min(a: &[u8], b: &[u8]) -> bool {
+            // safety: only called from this function, which ensures a and b 
come
+            // from an array with valid utf8 data
+            unsafe {
+                let a = std::str::from_utf8_unchecked(a);
+                let b = std::str::from_utf8_unchecked(b);
+                a < b
+            }
+        }
+        fn string_max(a: &[u8], b: &[u8]) -> bool {
+            // safety: only called from this function, which ensures a and b 
come
+            // from an array with valid utf8 data
+            unsafe {
+                let a = std::str::from_utf8_unchecked(a);
+                let b = std::str::from_utf8_unchecked(b);
+                a > b
+            }
+        }
+        fn binary_min(a: &[u8], b: &[u8]) -> bool {
+            a < b
+        }
+
+        fn binary_max(a: &[u8], b: &[u8]) -> bool {
+            a > b
+        }
+
+        fn str_to_bytes<'a>(
+            it: impl Iterator<Item = Option<&'a str>>,
+        ) -> impl Iterator<Item = Option<&'a [u8]>> {
+            it.map(|s| s.map(|s| s.as_bytes()))
+        }
+
+        match (self.is_min, &self.inner.data_type) {
+            // Utf8/LargeUtf8/Utf8View Min
+            (true, &DataType::Utf8) => self.inner.update_batch(
+                str_to_bytes(array.as_string::<i32>().iter()),
+                group_indices,
+                total_num_groups,
+                string_min,
+            ),
+            (true, &DataType::LargeUtf8) => self.inner.update_batch(
+                str_to_bytes(array.as_string::<i64>().iter()),
+                group_indices,
+                total_num_groups,
+                string_min,
+            ),
+            (true, &DataType::Utf8View) => self.inner.update_batch(
+                str_to_bytes(array.as_string_view().iter()),
+                group_indices,
+                total_num_groups,
+                string_min,
+            ),
+
+            // Utf8/LargeUtf8/Utf8View Max
+            (false, &DataType::Utf8) => self.inner.update_batch(
+                str_to_bytes(array.as_string::<i32>().iter()),
+                group_indices,
+                total_num_groups,
+                string_max,
+            ),
+            (false, &DataType::LargeUtf8) => self.inner.update_batch(
+                str_to_bytes(array.as_string::<i64>().iter()),
+                group_indices,
+                total_num_groups,
+                string_max,
+            ),
+            (false, &DataType::Utf8View) => self.inner.update_batch(
+                str_to_bytes(array.as_string_view().iter()),
+                group_indices,
+                total_num_groups,
+                string_max,
+            ),
+
+            // Binary/LargeBinary/BinaryView Min
+            (true, &DataType::Binary) => self.inner.update_batch(
+                array.as_binary::<i32>().iter(),
+                group_indices,
+                total_num_groups,
+                binary_min,
+            ),
+            (true, &DataType::LargeBinary) => self.inner.update_batch(
+                array.as_binary::<i64>().iter(),
+                group_indices,
+                total_num_groups,
+                binary_min,
+            ),
+            (true, &DataType::BinaryView) => self.inner.update_batch(
+                array.as_binary_view().iter(),
+                group_indices,
+                total_num_groups,
+                binary_min,
+            ),
+
+            // Binary/LargeBinary/BinaryView Max
+            (false, &DataType::Binary) => self.inner.update_batch(
+                array.as_binary::<i32>().iter(),
+                group_indices,
+                total_num_groups,
+                binary_max,
+            ),
+            (false, &DataType::LargeBinary) => self.inner.update_batch(
+                array.as_binary::<i64>().iter(),
+                group_indices,
+                total_num_groups,
+                binary_max,
+            ),
+            (false, &DataType::BinaryView) => self.inner.update_batch(
+                array.as_binary_view().iter(),
+                group_indices,
+                total_num_groups,
+                binary_max,
+            ),
+
+            _ => internal_err!(
+                "Unexpected combination for MinMaxBytesAccumulator: ({:?}, 
{:?})",
+                self.is_min,
+                self.inner.data_type
+            ),
+        }
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        let (data_capacity, min_maxes) = self.inner.emit_to(emit_to);
+
+        // Convert the Vec of bytes to a vec of Strings (at no cost)
+        fn bytes_to_str(
+            min_maxes: Vec<Option<Vec<u8>>>,
+        ) -> impl Iterator<Item = Option<String>> {
+            min_maxes.into_iter().map(|opt| {
+                opt.map(|bytes| {
+                    // Safety: only called on data added from update_batch 
which ensures
+                    // the input type matched the output type
+                    unsafe { String::from_utf8_unchecked(bytes) }
+                })
+            })
+        }
+
+        let result: ArrayRef = match self.inner.data_type {
+            DataType::Utf8 => {
+                let mut builder =
+                    StringBuilder::with_capacity(min_maxes.len(), 
data_capacity);
+                for opt in bytes_to_str(min_maxes) {
+                    match opt {
+                        None => builder.append_null(),
+                        Some(s) => builder.append_value(s.as_str()),
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            DataType::LargeUtf8 => {
+                let mut builder =
+                    LargeStringBuilder::with_capacity(min_maxes.len(), 
data_capacity);
+                for opt in bytes_to_str(min_maxes) {
+                    match opt {
+                        None => builder.append_null(),
+                        Some(s) => builder.append_value(s.as_str()),
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            DataType::Utf8View => {
+                let block_size = capacity_to_view_block_size(data_capacity);
+
+                let mut builder = 
StringViewBuilder::with_capacity(min_maxes.len())
+                    .with_fixed_block_size(block_size);
+                for opt in bytes_to_str(min_maxes) {
+                    match opt {
+                        None => builder.append_null(),
+                        Some(s) => builder.append_value(s.as_str()),
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            DataType::Binary => {
+                let mut builder =
+                    BinaryBuilder::with_capacity(min_maxes.len(), 
data_capacity);
+                for opt in min_maxes {
+                    match opt {
+                        None => builder.append_null(),
+                        Some(s) => builder.append_value(s.as_ref() as &[u8]),
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            DataType::LargeBinary => {
+                let mut builder =
+                    LargeBinaryBuilder::with_capacity(min_maxes.len(), 
data_capacity);
+                for opt in min_maxes {
+                    match opt {
+                        None => builder.append_null(),
+                        Some(s) => builder.append_value(s.as_ref() as &[u8]),
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            DataType::BinaryView => {
+                let block_size = capacity_to_view_block_size(data_capacity);
+
+                let mut builder = 
BinaryViewBuilder::with_capacity(min_maxes.len())
+                    .with_fixed_block_size(block_size);
+                for opt in min_maxes {
+                    match opt {
+                        None => builder.append_null(),
+                        Some(s) => builder.append_value(s.as_ref() as &[u8]),
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            _ => {
+                return internal_err!(
+                    "Unexpected data type for MinMaxBytesAccumulator: {:?}",
+                    self.inner.data_type
+                );
+            }
+        };
+
+        assert_eq!(&self.inner.data_type, result.data_type());
+        Ok(result)
+    }
+
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        // min/max are their own states (no transition needed)
+        self.evaluate(emit_to).map(|arr| vec![arr])
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // min/max are their own states (no transition needed)
+        self.update_batch(values, group_indices, opt_filter, total_num_groups)
+    }
+
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        // Min/max do not change the values as they are their own states
+        // apply the filter by combining with the null mask, if any
+        let output = apply_filter_as_nulls(&values[0], opt_filter)?;
+        Ok(vec![output])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
+    fn size(&self) -> usize {
+        self.inner.size()
+    }
+}
+
+/// Returns the block size in (contiguous buffer size) to use
+/// for a given data capacity (total string length)
+///
+/// This is a heuristic to avoid allocating too many small buffers
+fn capacity_to_view_block_size(data_capacity: usize) -> u32 {
+    let max_block_size = 2 * 1024 * 1024;
+    if let Ok(block_size) = u32::try_from(data_capacity) {
+        block_size.min(max_block_size)
+    } else {
+        max_block_size
+    }
+}
+
+/// Stores internal Min/Max state for "bytes" types.
+///
+/// This implementation is general and stores the minimum/maximum for each
+/// groups in an individual byte array, which balances allocations and memory
+/// fragmentation (aka garbage).
+///
+/// ```text
+///                    ┌─────────────────────────────────┐
+///   ┌─────┐    ┌────▶│Option<Vec<u8>> (["A"])          │───────────▶   "A"
+///   │  0  │────┘     └─────────────────────────────────┘
+///   ├─────┤          ┌─────────────────────────────────┐
+///   │  1  │─────────▶│Option<Vec<u8>> (["Z"])          │───────────▶   "Z"
+///   └─────┘          └─────────────────────────────────┘               ...
+///     ...               ...
+///   ┌─────┐          ┌────────────────────────────────┐
+///   │ N-2 │─────────▶│Option<Vec<u8>> (["A"])         │────────────▶   "A"
+///   ├─────┤          └────────────────────────────────┘
+///   │ N-1 │────┐     ┌────────────────────────────────┐
+///   └─────┘    └────▶│Option<Vec<u8>> (["Q"])         │────────────▶   "Q"
+///                    └────────────────────────────────┘
+///
+///                      min_max: Vec<Option<Vec<u8>>

Review Comment:
   nice!
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Implement special min/max accumulator for Strings and Binary (10% faster for Clickbench Q28) [datafusion]

Reply via email to