Jefffrey commented on code in PR #17013:
URL: https://github.com/apache/datafusion/pull/17013#discussion_r2307154278


##########
datafusion/spark/src/function/bitwise/bit_shift.rs:
##########
@@ -0,0 +1,678 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
+use arrow::compute;
+use arrow::datatypes::{
+    ArrowNativeType, DataType, Int32Type, Int64Type, UInt32Type, UInt64Type,
+};
+use datafusion_common::{plan_err, Result};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use datafusion_functions::utils::make_scalar_function;
+
+use crate::function::error_utils::{
+    invalid_arg_count_exec_err, unsupported_data_type_exec_err,
+};
+
+fn shift_left<T: ArrowPrimitiveType>(
+    value: &PrimitiveArray<T>,
+    shift: &PrimitiveArray<Int32Type>,
+) -> Result<PrimitiveArray<T>>
+where
+    T::Native: ArrowNativeType + std::ops::Shl<i32, Output = T::Native>,
+{
+    let bit_num = (T::Native::get_byte_width() * 8) as i32;
+    let result = compute::binary::<_, Int32Type, _, _>(
+        value,
+        shift,
+        |value: T::Native, shift: i32| {
+            let shift = ((shift % bit_num) + bit_num) % bit_num;
+            value << shift
+        },
+    )?;
+    Ok(result)
+}
+
+fn shift_right<T: ArrowPrimitiveType>(
+    value: &PrimitiveArray<T>,
+    shift: &PrimitiveArray<Int32Type>,
+) -> Result<PrimitiveArray<T>>
+where
+    T::Native: ArrowNativeType + std::ops::Shr<i32, Output = T::Native>,
+{
+    let bit_num = (T::Native::get_byte_width() * 8) as i32;
+    let result = compute::binary::<_, Int32Type, _, _>(
+        value,
+        shift,
+        |value: T::Native, shift: i32| {
+            let shift = ((shift % bit_num) + bit_num) % bit_num;
+            value >> shift
+        },
+    )?;
+    Ok(result)
+}
+
+pub trait UShr<Rhs = Self> {
+    type Output;
+
+    #[must_use]
+    fn ushr(self, rhs: Rhs) -> Self::Output;
+}

Review Comment:
   So took a closer look and I see it's for ease of doing unsigned shift 
rights, which don't seem to be represented in the stdlib for signed types; I 
guess pulling in `num-traits` dependency for this might be overkill, but would 
benefit from having a little documentation here explaining the purpose.



##########
datafusion/spark/src/function/bitwise/bit_shift.rs:
##########
@@ -0,0 +1,678 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
+use arrow::compute;
+use arrow::datatypes::{
+    ArrowNativeType, DataType, Int32Type, Int64Type, UInt32Type, UInt64Type,
+};
+use datafusion_common::{plan_err, Result};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use datafusion_functions::utils::make_scalar_function;
+
+use crate::function::error_utils::{
+    invalid_arg_count_exec_err, unsupported_data_type_exec_err,
+};
+
+fn shift_left<T: ArrowPrimitiveType>(
+    value: &PrimitiveArray<T>,
+    shift: &PrimitiveArray<Int32Type>,
+) -> Result<PrimitiveArray<T>>
+where
+    T::Native: ArrowNativeType + std::ops::Shl<i32, Output = T::Native>,
+{
+    let bit_num = (T::Native::get_byte_width() * 8) as i32;
+    let result = compute::binary::<_, Int32Type, _, _>(
+        value,
+        shift,
+        |value: T::Native, shift: i32| {
+            let shift = ((shift % bit_num) + bit_num) % bit_num;
+            value << shift
+        },
+    )?;
+    Ok(result)
+}
+
+fn shift_right<T: ArrowPrimitiveType>(
+    value: &PrimitiveArray<T>,
+    shift: &PrimitiveArray<Int32Type>,
+) -> Result<PrimitiveArray<T>>
+where
+    T::Native: ArrowNativeType + std::ops::Shr<i32, Output = T::Native>,
+{
+    let bit_num = (T::Native::get_byte_width() * 8) as i32;
+    let result = compute::binary::<_, Int32Type, _, _>(
+        value,
+        shift,
+        |value: T::Native, shift: i32| {
+            let shift = ((shift % bit_num) + bit_num) % bit_num;
+            value >> shift
+        },
+    )?;
+    Ok(result)
+}
+
+pub trait UShr<Rhs = Self> {
+    type Output;
+
+    #[must_use]
+    fn ushr(self, rhs: Rhs) -> Self::Output;
+}
+
+impl UShr<i32> for u32 {
+    type Output = u32;
+    fn ushr(self, rhs: i32) -> Self::Output {
+        self >> rhs
+    }
+}
+
+impl UShr<i32> for u64 {
+    type Output = u64;
+    fn ushr(self, rhs: i32) -> Self::Output {
+        self >> rhs
+    }
+}
+
+impl UShr<i32> for i32 {
+    type Output = i32;
+    fn ushr(self, rhs: i32) -> Self::Output {
+        ((self as u32) >> rhs) as i32
+    }
+}
+
+impl UShr<i32> for i64 {
+    type Output = i64;
+    fn ushr(self, rhs: i32) -> Self::Output {
+        ((self as u64) >> rhs) as i64
+    }
+}
+
+fn shift_right_unsigned<T: ArrowPrimitiveType>(
+    value: &PrimitiveArray<T>,
+    shift: &PrimitiveArray<Int32Type>,
+) -> Result<PrimitiveArray<T>>
+where
+    T::Native: ArrowNativeType + UShr<i32, Output = T::Native>,
+{
+    let bit_num = (T::Native::get_byte_width() * 8) as i32;
+    let result = compute::binary::<_, Int32Type, _, _>(
+        value,
+        shift,
+        |value: T::Native, shift: i32| {
+            let shift = ((shift % bit_num) + bit_num) % bit_num;
+            value.ushr(shift)
+        },
+    )?;
+    Ok(result)
+}
+
+fn bit_shift_coerce_types(arg_types: &[DataType], func: &str) -> 
Result<Vec<DataType>> {
+    if arg_types.len() != 2 {
+        return Err(invalid_arg_count_exec_err(func, (2, 2), arg_types.len()));
+    }
+    if !arg_types[0].is_integer() && !arg_types[0].is_null() {
+        return Err(unsupported_data_type_exec_err(
+            func,
+            "Integer Type",
+            &arg_types[0],
+        ));
+    }
+    if !arg_types[1].is_integer() && !arg_types[1].is_null() {
+        return Err(unsupported_data_type_exec_err(
+            func,
+            "Integer Type",
+            &arg_types[1],
+        ));
+    }
+
+    // Coerce smaller integer types to Int32
+    let coerced_first = match &arg_types[0] {
+        DataType::Int8 | DataType::Int16 | DataType::Null => DataType::Int32,
+        DataType::UInt8 | DataType::UInt16 => DataType::UInt32,
+        _ => arg_types[0].clone(),
+    };
+
+    Ok(vec![coerced_first, DataType::Int32])

Review Comment:
   I see, so it's to align with Spark (well Java) only having access to 
Int/Long types; I do feel this coercion logic might already be expressible via 
the `TypeSignature` API, have you given that a shot?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to