dharanad commented on code in PR #10930: URL: https://github.com/apache/datafusion/pull/10930#discussion_r1642130995
########## datafusion/functions-aggregate/src/bit_and_or_xor.rs: ########## @@ -0,0 +1,400 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines `BitAnd`, `BitOr`, `BitXor` and `BitXor DISTINCT` aggregate accumulators + +use std::any::Any; +use std::collections::HashSet; +use std::fmt::{Display, Formatter}; + +use ahash::RandomState; +use arrow::array::{Array, ArrayRef, AsArray}; +use arrow::datatypes::{ + ArrowNativeType, ArrowNumericType, DataType, Int16Type, Int32Type, Int64Type, + Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; +use arrow_schema::Field; + +use datafusion_common::cast::as_list_array; +use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue}; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::type_coercion::aggregates::INTEGERS; +use datafusion_expr::utils::format_state_name; +use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility}; + +/// `accumulator_helper` is a macro accepting (ArrowPrimitiveType, BitwiseOperationType) +macro_rules! accumulator_helper { + ($t:ty, $opr:expr) => { + match $opr { + BitwiseOperationType::And => Ok(Box::<BitAndAccumulator<$t>>::default()), + BitwiseOperationType::Or => Ok(Box::<BitOrAccumulator<$t>>::default()), + BitwiseOperationType::Xor => Ok(Box::<BitXorAccumulator<$t>>::default()), + BitwiseOperationType::XorDistinct => { + Ok(Box::<DistinctBitXorAccumulator<$t>>::default()) + } + } + }; +} + +/// AND, OR and XOR only supports a subset of numeric types +/// +/// `args` is [AccumulatorArgs] +/// `opr` is [BitwiseOperationType] +macro_rules! downcast_bitwise_accumulator { + ($args:ident, $opr:expr) => { + match $args.data_type { + DataType::Int8 => accumulator_helper!(Int8Type, $opr), + DataType::Int16 => accumulator_helper!(Int16Type, $opr), + DataType::Int32 => accumulator_helper!(Int32Type, $opr), + DataType::Int64 => accumulator_helper!(Int64Type, $opr), + DataType::UInt8 => accumulator_helper!(UInt8Type, $opr), + DataType::UInt16 => accumulator_helper!(UInt16Type, $opr), + DataType::UInt32 => accumulator_helper!(UInt32Type, $opr), + DataType::UInt64 => accumulator_helper!(UInt64Type, $opr), + _ => { + not_impl_err!( + "{} not supported for {}: {}", + stringify!($opr), + $args.name, + $args.data_type + ) + } + } + }; +} + +/// Simplifies the creation of User-Defined Aggregate Functions (UDAFs) for performing bitwise operations in a declarative manner. +/// +/// `EXPR_FN` identifier used to name the generated expression function. +/// `AGGREGATE_UDF_FN` is an identifier used to name the underlying UDAF function. +/// `OPR_TYPE` is an expression that evaluates to the type of bitwise operation to be performed. +macro_rules! make_bitwise_udaf_expr_and_func { + ($EXPR_FN:ident, $AGGREGATE_UDF_FN:ident, $OPR_TYPE:expr) => { + make_udaf_expr!($EXPR_FN, expr_y expr_x, concat!("Returns the bitwise", stringify!($OPR_TYPE), "of a group of values"), $AGGREGATE_UDF_FN); + create_func!($EXPR_FN, $AGGREGATE_UDF_FN, BitwiseOperation::new($OPR_TYPE, stringify!($EXPR_FN))); + } +} + +make_bitwise_udaf_expr_and_func!(bit_and, bit_and_udaf, BitwiseOperationType::And); +make_bitwise_udaf_expr_and_func!(bit_or, bit_or_udaf, BitwiseOperationType::Or); +make_bitwise_udaf_expr_and_func!(bit_xor, bit_xor_udaf, BitwiseOperationType::Xor); + +/// The different types of bitwise operations that can be performed. +#[derive(Debug, Clone, Eq, PartialEq)] +enum BitwiseOperationType { + And, + Or, + Xor, + /// `XorDistinct` is a variation of the bitwise XOR operation specifically for the scenario of BitXor DISTINCT + XorDistinct, +} + +impl Display for BitwiseOperationType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +/// [BitwiseOperation] struct encapsulates information about a bitwise operation. +#[derive(Debug)] +struct BitwiseOperation { + signature: Signature, + /// `operation` indicates the type of bitwise operation to be performed. + operation: BitwiseOperationType, + func_name: &'static str, +} + +impl BitwiseOperation { + pub fn new(operator: BitwiseOperationType, func_name: &'static str) -> Self { + Self { + operation: operator, + signature: Signature::uniform(1, INTEGERS.to_vec(), Volatility::Immutable), + func_name, + } + } +} + +impl AggregateUDFImpl for BitwiseOperation { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + self.func_name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { + let arg_type = &arg_types[0]; + if !is_bit_and_or_xor_support_arg_type(arg_type) { Review Comment: Sure. That would be much simpler, will make the change. In my earlier implementation i was using it at many, places, so i have moved it out to a function. ########## datafusion/functions-aggregate/src/bit_and_or_xor.rs: ########## @@ -0,0 +1,400 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines `BitAnd`, `BitOr`, `BitXor` and `BitXor DISTINCT` aggregate accumulators + +use std::any::Any; +use std::collections::HashSet; +use std::fmt::{Display, Formatter}; + +use ahash::RandomState; +use arrow::array::{Array, ArrayRef, AsArray}; +use arrow::datatypes::{ + ArrowNativeType, ArrowNumericType, DataType, Int16Type, Int32Type, Int64Type, + Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; +use arrow_schema::Field; + +use datafusion_common::cast::as_list_array; +use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue}; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::type_coercion::aggregates::INTEGERS; +use datafusion_expr::utils::format_state_name; +use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility}; + +/// `accumulator_helper` is a macro accepting (ArrowPrimitiveType, BitwiseOperationType) +macro_rules! accumulator_helper { + ($t:ty, $opr:expr) => { + match $opr { + BitwiseOperationType::And => Ok(Box::<BitAndAccumulator<$t>>::default()), + BitwiseOperationType::Or => Ok(Box::<BitOrAccumulator<$t>>::default()), + BitwiseOperationType::Xor => Ok(Box::<BitXorAccumulator<$t>>::default()), + BitwiseOperationType::XorDistinct => { + Ok(Box::<DistinctBitXorAccumulator<$t>>::default()) + } + } + }; +} + +/// AND, OR and XOR only supports a subset of numeric types +/// +/// `args` is [AccumulatorArgs] +/// `opr` is [BitwiseOperationType] +macro_rules! downcast_bitwise_accumulator { + ($args:ident, $opr:expr) => { + match $args.data_type { + DataType::Int8 => accumulator_helper!(Int8Type, $opr), + DataType::Int16 => accumulator_helper!(Int16Type, $opr), + DataType::Int32 => accumulator_helper!(Int32Type, $opr), + DataType::Int64 => accumulator_helper!(Int64Type, $opr), + DataType::UInt8 => accumulator_helper!(UInt8Type, $opr), + DataType::UInt16 => accumulator_helper!(UInt16Type, $opr), + DataType::UInt32 => accumulator_helper!(UInt32Type, $opr), + DataType::UInt64 => accumulator_helper!(UInt64Type, $opr), + _ => { + not_impl_err!( + "{} not supported for {}: {}", + stringify!($opr), + $args.name, + $args.data_type + ) + } + } + }; +} + +/// Simplifies the creation of User-Defined Aggregate Functions (UDAFs) for performing bitwise operations in a declarative manner. +/// +/// `EXPR_FN` identifier used to name the generated expression function. +/// `AGGREGATE_UDF_FN` is an identifier used to name the underlying UDAF function. +/// `OPR_TYPE` is an expression that evaluates to the type of bitwise operation to be performed. +macro_rules! make_bitwise_udaf_expr_and_func { + ($EXPR_FN:ident, $AGGREGATE_UDF_FN:ident, $OPR_TYPE:expr) => { + make_udaf_expr!($EXPR_FN, expr_y expr_x, concat!("Returns the bitwise", stringify!($OPR_TYPE), "of a group of values"), $AGGREGATE_UDF_FN); + create_func!($EXPR_FN, $AGGREGATE_UDF_FN, BitwiseOperation::new($OPR_TYPE, stringify!($EXPR_FN))); + } +} + +make_bitwise_udaf_expr_and_func!(bit_and, bit_and_udaf, BitwiseOperationType::And); +make_bitwise_udaf_expr_and_func!(bit_or, bit_or_udaf, BitwiseOperationType::Or); +make_bitwise_udaf_expr_and_func!(bit_xor, bit_xor_udaf, BitwiseOperationType::Xor); + +/// The different types of bitwise operations that can be performed. +#[derive(Debug, Clone, Eq, PartialEq)] +enum BitwiseOperationType { + And, + Or, + Xor, + /// `XorDistinct` is a variation of the bitwise XOR operation specifically for the scenario of BitXor DISTINCT + XorDistinct, +} + +impl Display for BitwiseOperationType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +/// [BitwiseOperation] struct encapsulates information about a bitwise operation. +#[derive(Debug)] +struct BitwiseOperation { + signature: Signature, + /// `operation` indicates the type of bitwise operation to be performed. + operation: BitwiseOperationType, + func_name: &'static str, +} + +impl BitwiseOperation { + pub fn new(operator: BitwiseOperationType, func_name: &'static str) -> Self { + Self { + operation: operator, + signature: Signature::uniform(1, INTEGERS.to_vec(), Volatility::Immutable), + func_name, + } + } +} + +impl AggregateUDFImpl for BitwiseOperation { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + self.func_name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { + let arg_type = &arg_types[0]; + if !is_bit_and_or_xor_support_arg_type(arg_type) { + return exec_err!( + "[return_type] {} not supported for {}", + self.operation.to_string(), + arg_type + ); + } + Ok(arg_type.clone()) + } + + fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> { + if acc_args.is_distinct && self.operation == BitwiseOperationType::Xor { + downcast_bitwise_accumulator!(acc_args, BitwiseOperationType::XorDistinct) Review Comment: I was thinking the same, but though it would easier for maintainer to look at the enum and figure out the what operation are implemented this file. Any which ways, i agree with you. Will make changes. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org