This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new ee55d89cb feat: Support Binary bitwise shift operators (<< and >>)
(#3037)
ee55d89cb is described below
commit ee55d89cbf20f4a6d17fe399c72d60dca6d67912
Author: Dmitry Patsura <[email protected]>
AuthorDate: Mon Aug 15 19:59:59 2022 +0300
feat: Support Binary bitwise shift operators (<< and >>) (#3037)
* feat: Support Binary bitwise shift operators (<< and >>)
* no panic on overflow
* null coercion test + fix
* Update datafusion/expr/src/operator.rs
Co-authored-by: Jiayu Liu <[email protected]>
Co-authored-by: Jiayu Liu <[email protected]>
---
datafusion/core/tests/sql/expr.rs | 10 +
datafusion/expr/src/binary_rule.rs | 16 +-
datafusion/expr/src/operator.rs | 6 +
datafusion/physical-expr/src/expressions/binary.rs | 56 ++++-
.../src/expressions/binary/kernels.rs | 225 +++++++++++++++++++--
datafusion/sql/src/planner.rs | 2 +
6 files changed, 289 insertions(+), 26 deletions(-)
diff --git a/datafusion/core/tests/sql/expr.rs
b/datafusion/core/tests/sql/expr.rs
index 093ab3433..4fa1f54d2 100644
--- a/datafusion/core/tests/sql/expr.rs
+++ b/datafusion/core/tests/sql/expr.rs
@@ -644,6 +644,16 @@ async fn test_struct_literals() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn binary_bitwise_shift() -> Result<()> {
+ test_expression!("2 << 10", "2048");
+ test_expression!("2048 >> 10", "2");
+ test_expression!("2048 << NULL", "NULL");
+ test_expression!("2048 >> NULL", "NULL");
+
+ Ok(())
+}
+
#[tokio::test]
async fn test_interval_expressions() -> Result<()> {
// day nano intervals
diff --git a/datafusion/expr/src/binary_rule.rs
b/datafusion/expr/src/binary_rule.rs
index d6994d688..f71e97e49 100644
--- a/datafusion/expr/src/binary_rule.rs
+++ b/datafusion/expr/src/binary_rule.rs
@@ -55,7 +55,10 @@ pub fn binary_operator_data_type(
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => Ok(DataType::Boolean),
// bitwise operations return the common coerced type
- Operator::BitwiseAnd | Operator::BitwiseOr => Ok(result_type),
+ Operator::BitwiseAnd
+ | Operator::BitwiseOr
+ | Operator::BitwiseShiftLeft
+ | Operator::BitwiseShiftRight => Ok(result_type),
// math operations return the same value as the common coerced type
Operator::Plus
| Operator::Minus
@@ -76,9 +79,10 @@ pub fn coerce_types(
) -> Result<DataType> {
// This result MUST be compatible with `binary_coerce`
let result = match op {
- Operator::BitwiseAnd | Operator::BitwiseOr => {
- bitwise_coercion(lhs_type, rhs_type)
- }
+ Operator::BitwiseAnd
+ | Operator::BitwiseOr
+ | Operator::BitwiseShiftRight
+ | Operator::BitwiseShiftLeft => bitwise_coercion(lhs_type, rhs_type),
Operator::And | Operator::Or => match (lhs_type, rhs_type) {
// logical binary boolean operators can only be evaluated in bools
(DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
@@ -135,12 +139,14 @@ pub fn coerce_types(
fn bitwise_coercion(left_type: &DataType, right_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
- if !is_numeric(left_type) || !is_numeric(right_type) {
+ if !both_numeric_or_null_and_numeric(left_type, right_type) {
return None;
}
+
if left_type == right_type && !is_dictionary(left_type) {
return Some(left_type.clone());
}
+
// TODO support other data type
match (left_type, right_type) {
(Int64, _) | (_, Int64) => Some(Int64),
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index d22cb8569..f2cdb3555 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -71,6 +71,10 @@ pub enum Operator {
BitwiseAnd,
/// Bitwise or, like `|`
BitwiseOr,
+ /// Bitwise right, like `>>`
+ BitwiseShiftRight,
+ /// Bitwise left, like `<<`
+ BitwiseShiftLeft,
/// String concat
StringConcat,
}
@@ -101,6 +105,8 @@ impl fmt::Display for Operator {
Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM",
Operator::BitwiseAnd => "&",
Operator::BitwiseOr => "|",
+ Operator::BitwiseShiftRight => ">>",
+ Operator::BitwiseShiftLeft => "<<",
Operator::StringConcat => "||",
};
write!(f, "{}", display)
diff --git a/datafusion/physical-expr/src/expressions/binary.rs
b/datafusion/physical-expr/src/expressions/binary.rs
index 64e353116..6769032bf 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -50,7 +50,10 @@ use arrow::compute::kernels::comparison::{
use adapter::{eq_dyn, gt_dyn, gt_eq_dyn, lt_dyn, lt_eq_dyn, neq_dyn};
use arrow::compute::kernels::concat_elements::concat_elements_utf8;
-use kernels::{bitwise_and, bitwise_and_scalar, bitwise_or, bitwise_or_scalar};
+use kernels::{
+ bitwise_and, bitwise_and_scalar, bitwise_or, bitwise_or_scalar,
bitwise_shift_left,
+ bitwise_shift_left_scalar, bitwise_shift_right, bitwise_shift_right_scalar,
+};
use kernels_arrow::{
add_decimal, add_decimal_scalar, divide_decimal, divide_decimal_scalar,
eq_decimal_scalar, gt_decimal_scalar, gt_eq_decimal_scalar,
is_distinct_from,
@@ -740,6 +743,12 @@ impl BinaryExpr {
),
Operator::BitwiseAnd => bitwise_and_scalar(array, scalar.clone()),
Operator::BitwiseOr => bitwise_or_scalar(array, scalar.clone()),
+ Operator::BitwiseShiftRight => {
+ bitwise_shift_right_scalar(array, scalar.clone())
+ }
+ Operator::BitwiseShiftLeft => {
+ bitwise_shift_left_scalar(array, scalar.clone())
+ }
// if scalar operation is not supported - fallback to array
implementation
_ => None,
};
@@ -850,6 +859,8 @@ impl BinaryExpr {
}
Operator::BitwiseAnd => bitwise_and(left, right),
Operator::BitwiseOr => bitwise_or(left, right),
+ Operator::BitwiseShiftRight => bitwise_shift_right(left, right),
+ Operator::BitwiseShiftLeft => bitwise_shift_left(left, right),
Operator::StringConcat => {
binary_string_array_op!(left, right, concat_elements)
}
@@ -2481,6 +2492,34 @@ mod tests {
Ok(())
}
+ #[test]
+ fn bitwise_shift_array_test() -> Result<()> {
+ let input = Arc::new(Int32Array::from(vec![Some(2), None, Some(10)]))
as ArrayRef;
+ let modules =
+ Arc::new(Int32Array::from(vec![Some(2), Some(4), Some(8)])) as
ArrayRef;
+ let mut result = bitwise_shift_left(input.clone(), modules.clone())?;
+
+ let expected = Int32Array::from(vec![Some(8), None, Some(2560)]);
+ assert_eq!(result.as_ref(), &expected);
+
+ result = bitwise_shift_right(result.clone(), modules.clone())?;
+ assert_eq!(result.as_ref(), &input);
+
+ Ok(())
+ }
+
+ #[test]
+ fn bitwise_shift_array_overflow_test() -> Result<()> {
+ let input = Arc::new(Int32Array::from(vec![Some(2)])) as ArrayRef;
+ let modules = Arc::new(Int32Array::from(vec![Some(100)])) as ArrayRef;
+ let result = bitwise_shift_left(input.clone(), modules.clone())?;
+
+ let expected = Int32Array::from(vec![Some(32)]);
+ assert_eq!(result.as_ref(), &expected);
+
+ Ok(())
+ }
+
#[test]
fn bitwise_scalar_test() -> Result<()> {
let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)]))
as ArrayRef;
@@ -2494,4 +2533,19 @@ mod tests {
assert_eq!(result.as_ref(), &expected);
Ok(())
}
+
+ #[test]
+ fn bitwise_shift_scalar_test() -> Result<()> {
+ let input = Arc::new(Int32Array::from(vec![Some(2), None, Some(4)]))
as ArrayRef;
+ let module = ScalarValue::from(10i32);
+ let mut result = bitwise_shift_left_scalar(&input,
module.clone()).unwrap()?;
+
+ let expected = Int32Array::from(vec![Some(2048), None, Some(4096)]);
+ assert_eq!(result.as_ref(), &expected);
+
+ result = bitwise_shift_right_scalar(&result, module).unwrap()?;
+ assert_eq!(result.as_ref(), &input);
+
+ Ok(())
+ }
}
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs
b/datafusion/physical-expr/src/expressions/binary/kernels.rs
index a89957447..3ca4a447c 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs
@@ -21,13 +21,14 @@ use arrow::array::*;
use arrow::datatypes::DataType;
use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_expr::Operator;
+
use std::sync::Arc;
/// The binary_bitwise_array_op macro only evaluates for integer types
/// like int64, int32.
/// It is used to do bitwise operation.
macro_rules! binary_bitwise_array_op {
- ($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{
+ ($LEFT:expr, $RIGHT:expr, $METHOD:expr, $ARRAY_TYPE:ident) => {{
let len = $LEFT.len();
let left = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let right = $RIGHT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
@@ -37,7 +38,7 @@ macro_rules! binary_bitwise_array_op {
if left.is_null(i) || right.is_null(i) {
None
} else {
- Some(left.value(i) $OP right.value(i))
+ Some($METHOD(left.value(i), right.value(i)))
}
})
.collect::<$ARRAY_TYPE>();
@@ -49,7 +50,7 @@ macro_rules! binary_bitwise_array_op {
/// like int64, int32.
/// It is used to do bitwise operation on an array with a scalar.
macro_rules! binary_bitwise_array_scalar {
- ($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{
+ ($LEFT:expr, $RIGHT:expr, $METHOD:expr, $ARRAY_TYPE:ident, $TYPE:ty) => {{
let len = $LEFT.len();
let array = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let scalar = $RIGHT;
@@ -63,7 +64,7 @@ macro_rules! binary_bitwise_array_scalar {
if array.is_null(i) {
None
} else {
- Some(array.value(i) $OP right)
+ Some($METHOD(array.value(i), right))
}
})
.collect::<$ARRAY_TYPE>();
@@ -75,16 +76,16 @@ macro_rules! binary_bitwise_array_scalar {
pub(crate) fn bitwise_and(left: ArrayRef, right: ArrayRef) -> Result<ArrayRef>
{
match &left.data_type() {
DataType::Int8 => {
- binary_bitwise_array_op!(left, right, &, Int8Array, i8)
+ binary_bitwise_array_op!(left, right, |a, b| a & b, Int8Array)
}
DataType::Int16 => {
- binary_bitwise_array_op!(left, right, &, Int16Array, i16)
+ binary_bitwise_array_op!(left, right, |a, b| a & b, Int16Array)
}
DataType::Int32 => {
- binary_bitwise_array_op!(left, right, &, Int32Array, i32)
+ binary_bitwise_array_op!(left, right, |a, b| a & b, Int32Array)
}
DataType::Int64 => {
- binary_bitwise_array_op!(left, right, &, Int64Array, i64)
+ binary_bitwise_array_op!(left, right, |a, b| a & b, Int64Array)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn
arrays",
@@ -94,19 +95,103 @@ pub(crate) fn bitwise_and(left: ArrayRef, right: ArrayRef)
-> Result<ArrayRef> {
}
}
+pub(crate) fn bitwise_shift_right(left: ArrayRef, right: ArrayRef) ->
Result<ArrayRef> {
+ match &left.data_type() {
+ DataType::Int8 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i8, b: i8| a.wrapping_shr(b as u32),
+ Int8Array
+ )
+ }
+ DataType::Int16 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i16, b: i16| a.wrapping_shr(b as u32),
+ Int16Array
+ )
+ }
+ DataType::Int32 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i32, b: i32| a.wrapping_shr(b as u32),
+ Int32Array
+ )
+ }
+ DataType::Int64 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i64, b: i64| a.wrapping_shr(b as u32),
+ Int64Array
+ )
+ }
+ other => Err(DataFusionError::Internal(format!(
+ "Data type {:?} not supported for binary operation '{}' on dyn
arrays",
+ other,
+ Operator::BitwiseShiftRight
+ ))),
+ }
+}
+
+pub(crate) fn bitwise_shift_left(left: ArrayRef, right: ArrayRef) ->
Result<ArrayRef> {
+ match &left.data_type() {
+ DataType::Int8 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i8, b: i8| a.wrapping_shl(b as u32),
+ Int8Array
+ )
+ }
+ DataType::Int16 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i16, b: i16| a.wrapping_shl(b as u32),
+ Int16Array
+ )
+ }
+ DataType::Int32 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i32, b: i32| a.wrapping_shl(b as u32),
+ Int32Array
+ )
+ }
+ DataType::Int64 => {
+ binary_bitwise_array_op!(
+ left,
+ right,
+ |a: i64, b: i64| a.wrapping_shl(b as u32),
+ Int64Array
+ )
+ }
+ other => Err(DataFusionError::Internal(format!(
+ "Data type {:?} not supported for binary operation '{}' on dyn
arrays",
+ other,
+ Operator::BitwiseShiftLeft
+ ))),
+ }
+}
+
pub(crate) fn bitwise_or(left: ArrayRef, right: ArrayRef) -> Result<ArrayRef> {
match &left.data_type() {
DataType::Int8 => {
- binary_bitwise_array_op!(left, right, |, Int8Array, i8)
+ binary_bitwise_array_op!(left, right, |a, b| a | b, Int8Array)
}
DataType::Int16 => {
- binary_bitwise_array_op!(left, right, |, Int16Array, i16)
+ binary_bitwise_array_op!(left, right, |a, b| a | b, Int16Array)
}
DataType::Int32 => {
- binary_bitwise_array_op!(left, right, |, Int32Array, i32)
+ binary_bitwise_array_op!(left, right, |a, b| a | b, Int32Array)
}
DataType::Int64 => {
- binary_bitwise_array_op!(left, right, |, Int64Array, i64)
+ binary_bitwise_array_op!(left, right, |a, b| a | b, Int64Array)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn
arrays",
@@ -122,16 +207,16 @@ pub(crate) fn bitwise_and_scalar(
) -> Option<Result<ArrayRef>> {
let result = match array.data_type() {
DataType::Int8 => {
- binary_bitwise_array_scalar!(array, scalar, &, Int8Array, i8)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a & b,
Int8Array, i8)
}
DataType::Int16 => {
- binary_bitwise_array_scalar!(array, scalar, &, Int16Array, i16)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a & b,
Int16Array, i16)
}
DataType::Int32 => {
- binary_bitwise_array_scalar!(array, scalar, &, Int32Array, i32)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a & b,
Int32Array, i32)
}
DataType::Int64 => {
- binary_bitwise_array_scalar!(array, scalar, &, Int64Array, i64)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a & b,
Int64Array, i64)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn
arrays",
@@ -148,16 +233,16 @@ pub(crate) fn bitwise_or_scalar(
) -> Option<Result<ArrayRef>> {
let result = match array.data_type() {
DataType::Int8 => {
- binary_bitwise_array_scalar!(array, scalar, |, Int8Array, i8)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a | b,
Int8Array, i8)
}
DataType::Int16 => {
- binary_bitwise_array_scalar!(array, scalar, |, Int16Array, i16)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a | b,
Int16Array, i16)
}
DataType::Int32 => {
- binary_bitwise_array_scalar!(array, scalar, |, Int32Array, i32)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a | b,
Int32Array, i32)
}
DataType::Int64 => {
- binary_bitwise_array_scalar!(array, scalar, |, Int64Array, i64)
+ binary_bitwise_array_scalar!(array, scalar, |a, b| a | b,
Int64Array, i64)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn
arrays",
@@ -167,3 +252,103 @@ pub(crate) fn bitwise_or_scalar(
};
Some(result)
}
+
+pub(crate) fn bitwise_shift_right_scalar(
+ array: &dyn Array,
+ scalar: ScalarValue,
+) -> Option<Result<ArrayRef>> {
+ let result = match array.data_type() {
+ DataType::Int8 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i8, b: i8| a.wrapping_shr(b as u32),
+ Int8Array,
+ i8
+ )
+ }
+ DataType::Int16 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i16, b: i16| a.wrapping_shr(b as u32),
+ Int16Array,
+ i16
+ )
+ }
+ DataType::Int32 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i32, b: i32| a.wrapping_shr(b as u32),
+ Int32Array,
+ i32
+ )
+ }
+ DataType::Int64 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i64, b: i64| a.wrapping_shr(b as u32),
+ Int64Array,
+ i64
+ )
+ }
+ other => Err(DataFusionError::Internal(format!(
+ "Data type {:?} not supported for binary operation '{}' on dyn
arrays",
+ other,
+ Operator::BitwiseShiftRight
+ ))),
+ };
+ Some(result)
+}
+
+pub(crate) fn bitwise_shift_left_scalar(
+ array: &dyn Array,
+ scalar: ScalarValue,
+) -> Option<Result<ArrayRef>> {
+ let result = match array.data_type() {
+ DataType::Int8 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i8, b: i8| a.wrapping_shl(b as u32),
+ Int8Array,
+ i8
+ )
+ }
+ DataType::Int16 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i16, b: i16| a.wrapping_shl(b as u32),
+ Int16Array,
+ i16
+ )
+ }
+ DataType::Int32 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i32, b: i32| a.wrapping_shl(b as u32),
+ Int32Array,
+ i32
+ )
+ }
+ DataType::Int64 => {
+ binary_bitwise_array_scalar!(
+ array,
+ scalar,
+ |a: i64, b: i64| a.wrapping_shl(b as u32),
+ Int64Array,
+ i64
+ )
+ }
+ other => Err(DataFusionError::Internal(format!(
+ "Data type {:?} not supported for binary operation '{}' on dyn
arrays",
+ other,
+ Operator::BitwiseShiftLeft
+ ))),
+ };
+ Some(result)
+}
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index daa4753e4..ddf9198e5 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -1535,6 +1535,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr),
+ BinaryOperator::PGBitwiseShiftRight =>
Ok(Operator::BitwiseShiftRight),
+ BinaryOperator::PGBitwiseShiftLeft =>
Ok(Operator::BitwiseShiftLeft),
BinaryOperator::StringConcat => Ok(Operator::StringConcat),
_ => Err(DataFusionError::NotImplemented(format!(
"Unsupported SQL binary operator {:?}",