Copilot commented on code in PR #1647: URL: https://github.com/apache/auron/pull/1647#discussion_r2588075450
########## native-engine/datafusion-ext-functions/src/spark_isnan.rs: ########## @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow::{ + array::{Array, BooleanArray, Float32Array, Float64Array}, + datatypes::DataType, +}; +use datafusion::{ + common::{Result, ScalarValue}, + logical_expr::ColumnarValue, +}; +use datafusion_ext_commons::arrow::boolean::nulls_to_false; + +pub fn spark_isnan(args: &[ColumnarValue]) -> Result<ColumnarValue> { + let value = &args[0]; + match value { + ColumnarValue::Array(array) => match array.data_type() { + DataType::Float64 => { + let array = array.as_any().downcast_ref::<Float64Array>().unwrap(); + let is_nan = BooleanArray::from_unary(array, |x| x.is_nan()); + let cleaned = nulls_to_false(&is_nan); + Ok(ColumnarValue::Array(Arc::new(cleaned))) + } + DataType::Float32 => { + let array = array.as_any().downcast_ref::<Float32Array>().unwrap(); + let is_nan = BooleanArray::from_unary(array, |x| x.is_nan()); + let cleaned = nulls_to_false(&is_nan); + Ok(ColumnarValue::Array(Arc::new(cleaned))) + } + _other => { + // For non-float arrays, Spark's isnan is effectively false. + let len = array.len(); + let out = ScalarValue::Boolean(Some(false)).to_array_of_size(len)?; + Ok(ColumnarValue::Array(out)) + } + }, + ColumnarValue::Scalar(sv) => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some( + match sv { + ScalarValue::Float64(a) => a.map(|x| x.is_nan()).unwrap_or(false), + ScalarValue::Float32(a) => a.map(|x| x.is_nan()).unwrap_or(false), + _ => false, Review Comment: The code handles non-float scalar types (line 55) by returning `false`, but there are no unit tests covering this behavior. Consider adding test cases for non-float scalar types (e.g., integers, strings) to ensure `isnan` correctly returns false for these types, matching Spark's semantics. ########## native-engine/datafusion-ext-functions/src/spark_isnan.rs: ########## @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow::{ + array::{Array, BooleanArray, Float32Array, Float64Array}, + datatypes::DataType, +}; +use datafusion::{ + common::{Result, ScalarValue}, + logical_expr::ColumnarValue, +}; +use datafusion_ext_commons::arrow::boolean::nulls_to_false; + +pub fn spark_isnan(args: &[ColumnarValue]) -> Result<ColumnarValue> { + let value = &args[0]; + match value { + ColumnarValue::Array(array) => match array.data_type() { + DataType::Float64 => { + let array = array.as_any().downcast_ref::<Float64Array>().unwrap(); + let is_nan = BooleanArray::from_unary(array, |x| x.is_nan()); + let cleaned = nulls_to_false(&is_nan); + Ok(ColumnarValue::Array(Arc::new(cleaned))) + } + DataType::Float32 => { + let array = array.as_any().downcast_ref::<Float32Array>().unwrap(); + let is_nan = BooleanArray::from_unary(array, |x| x.is_nan()); + let cleaned = nulls_to_false(&is_nan); + Ok(ColumnarValue::Array(Arc::new(cleaned))) + } + _other => { + // For non-float arrays, Spark's isnan is effectively false. + let len = array.len(); + let out = ScalarValue::Boolean(Some(false)).to_array_of_size(len)?; + Ok(ColumnarValue::Array(out)) + } Review Comment: The code handles non-float types (lines 44-49) by returning an array of all `false` values, but there are no unit tests covering this behavior. Consider adding test cases for non-float types (e.g., integers, strings) to ensure `isnan` correctly returns false for these types, matching Spark's semantics. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
