This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push: new d8c62f380 chore: Use `chr` function from datafusion-spark (#2080) d8c62f380 is described below commit d8c62f3806cc160830114dbc9f7b9ef9bc3f105a Author: Andy Grove <agr...@apache.org> AuthorDate: Thu Aug 7 12:45:32 2025 -0600 chore: Use `chr` function from datafusion-spark (#2080) --- native/core/src/execution/jni_api.rs | 2 + native/spark-expr/src/comet_scalar_funcs.rs | 4 +- native/spark-expr/src/string_funcs/chr.rs | 127 --------------------- native/spark-expr/src/string_funcs/mod.rs | 2 - .../scala/org/apache/comet/serde/strings.scala | 2 +- 5 files changed, 4 insertions(+), 133 deletions(-) diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index f8cf2a33e..52ef184b1 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -42,6 +42,7 @@ use datafusion::{ use datafusion_comet_proto::spark_operator::Operator; use datafusion_spark::function::hash::sha2::SparkSha2; use datafusion_spark::function::math::expm1::SparkExpm1; +use datafusion_spark::function::string::char::SparkChar; use futures::poll; use futures::stream::StreamExt; use jni::objects::JByteBuffer; @@ -290,6 +291,7 @@ fn prepare_datafusion_session_context( // register UDFs from datafusion-spark crate session_ctx.register_udf(ScalarUDF::new_from_impl(SparkExpm1::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha2::default())); + session_ctx.register_udf(ScalarUDF::new_from_impl(SparkChar::default())); // Must be the last one to override existing functions with the same name datafusion_comet_spark_expr::register_all_comet_functions(&mut session_ctx)?; diff --git a/native/spark-expr/src/comet_scalar_funcs.rs b/native/spark-expr/src/comet_scalar_funcs.rs index 7a65d5eef..961309b78 100644 --- a/native/spark-expr/src/comet_scalar_funcs.rs +++ b/native/spark-expr/src/comet_scalar_funcs.rs @@ -21,8 +21,7 @@ use crate::{ spark_array_repeat, spark_ceil, spark_date_add, spark_date_sub, spark_decimal_div, spark_decimal_integral_div, spark_floor, spark_hex, spark_isnan, spark_make_decimal, spark_read_side_padding, spark_round, spark_rpad, spark_unhex, spark_unscaled_value, - SparkBitwiseCount, SparkBitwiseGet, SparkBitwiseNot, SparkChrFunc, SparkDateTrunc, - SparkStringSpace, + SparkBitwiseCount, SparkBitwiseGet, SparkBitwiseNot, SparkDateTrunc, SparkStringSpace, }; use arrow::datatypes::DataType; use datafusion::common::{DataFusionError, Result as DataFusionResult}; @@ -155,7 +154,6 @@ pub fn create_comet_physical_fun( fn all_scalar_functions() -> Vec<Arc<ScalarUDF>> { vec![ - Arc::new(ScalarUDF::new_from_impl(SparkChrFunc::default())), Arc::new(ScalarUDF::new_from_impl(SparkBitwiseNot::default())), Arc::new(ScalarUDF::new_from_impl(SparkBitwiseCount::default())), Arc::new(ScalarUDF::new_from_impl(SparkBitwiseGet::default())), diff --git a/native/spark-expr/src/string_funcs/chr.rs b/native/spark-expr/src/string_funcs/chr.rs deleted file mode 100644 index 20a7a8cc7..000000000 --- a/native/spark-expr/src/string_funcs/chr.rs +++ /dev/null @@ -1,127 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{any::Any, sync::Arc}; - -use arrow::{ - array::{ArrayRef, StringArray}, - datatypes::{ - DataType, - DataType::{Int64, Utf8}, - }, -}; - -use datafusion::common::{cast::as_int64_array, exec_err, Result, ScalarValue}; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, -}; - -fn chr(args: &[ArrayRef]) -> Result<ArrayRef> { - let integer_array = as_int64_array(&args[0])?; - - // first map is the iterator, second is for the `Option<_>` - let result = integer_array - .iter() - .map(|integer: Option<i64>| { - integer - .map(|integer| { - if integer < 0 { - return Ok("".to_string()); // Return empty string for negative integers - } - match core::char::from_u32((integer % 256) as u32) { - Some(ch) => Ok(ch.to_string()), - None => { - exec_err!("requested character not compatible for encoding.") - } - } - }) - .transpose() - }) - .collect::<Result<StringArray>>()?; - - Ok(Arc::new(result) as ArrayRef) -} - -/// Spark-compatible `chr` expression -#[derive(Debug)] -pub struct SparkChrFunc { - signature: Signature, -} - -impl Default for SparkChrFunc { - fn default() -> Self { - Self::new() - } -} - -impl SparkChrFunc { - pub fn new() -> Self { - Self { - signature: Signature::uniform(1, vec![Int64], Volatility::Immutable), - } - } -} - -impl ScalarUDFImpl for SparkChrFunc { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "chr" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { - Ok(Utf8) - } - - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { - spark_chr(&args.args) - } -} - -/// Returns the ASCII character having the binary equivalent to the input expression. -/// E.g., chr(65) = 'A'. -/// Compatible with Apache Spark's Chr function -fn spark_chr(args: &[ColumnarValue]) -> Result<ColumnarValue> { - let array = args[0].clone(); - match array { - ColumnarValue::Array(array) => { - let array = chr(&[array])?; - Ok(ColumnarValue::Array(array)) - } - ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) => { - if value < 0 { - Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some( - "".to_string(), - )))) - } else { - match core::char::from_u32((value % 256) as u32) { - Some(ch) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some( - ch.to_string(), - )))), - None => exec_err!("requested character was incompatible for encoding."), - } - } - } - _ => exec_err!("The argument must be an Int64 array or scalar."), - } -} diff --git a/native/spark-expr/src/string_funcs/mod.rs b/native/spark-expr/src/string_funcs/mod.rs index 18d30b111..aac8204e2 100644 --- a/native/spark-expr/src/string_funcs/mod.rs +++ b/native/spark-expr/src/string_funcs/mod.rs @@ -15,10 +15,8 @@ // specific language governing permissions and limitations // under the License. -mod chr; mod string_space; mod substring; -pub use chr::SparkChrFunc; pub use string_space::SparkStringSpace; pub use substring::SubstringExpr; diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index de54a074c..de896e0df 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -287,7 +287,7 @@ object CometChr extends CometExpressionSerde { binding: Boolean): Option[Expr] = { val child = expr.children.head val childExpr = exprToProtoInternal(child, inputs, binding) - val optExpr = scalarFunctionExprToProto("chr", childExpr) + val optExpr = scalarFunctionExprToProto("char", childExpr) optExprWithInfo(optExpr, expr, child) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org