This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e1f7b24516 Move `nullif` and `isnan` to datafusion-functions (#9216)
e1f7b24516 is described below

commit e1f7b245168c5762135abc4e594bd81c508d7186
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Feb 14 11:24:06 2024 -0500

    Move `nullif` and `isnan` to datafusion-functions (#9216)
    
    * Move nullif and isnan to datafusion-functions
    
    move nullif
    
    * complete
    
    * update tests
    
    * fix
---
 .github/workflows/rust.yml                         |  3 +
 datafusion/core/Cargo.toml                         |  1 +
 datafusion/expr/src/built_in_function.rs           | 29 ++-----
 datafusion/expr/src/expr_fn.rs                     |  8 --
 datafusion/expr/src/lib.rs                         |  2 -
 datafusion/functions/Cargo.toml                    |  8 +-
 .../src/nullif.rs => functions/src/core/mod.rs}    | 30 +++----
 .../expressions => functions/src/core}/nullif.rs   | 78 +++++++++++++++++-
 datafusion/functions/src/lib.rs                    | 15 +++-
 datafusion/functions/src/macros.rs                 | 39 +++++++++
 .../src/nullif.rs => functions/src/math/mod.rs}    | 30 +++----
 datafusion/functions/src/math/nans.rs              | 92 ++++++++++++++++++++++
 datafusion/physical-expr/src/expressions/mod.rs    |  2 -
 datafusion/physical-expr/src/functions.rs          |  9 +--
 datafusion/proto/proto/datafusion.proto            |  4 +-
 datafusion/proto/src/generated/pbjson.rs           |  6 --
 datafusion/proto/src/generated/prost.rs            |  8 +-
 datafusion/proto/src/logical_plan/from_proto.rs    | 23 ++----
 datafusion/proto/src/logical_plan/to_proto.rs      |  2 -
 datafusion/sql/tests/sql_integration.rs            | 65 ++++++++++++++-
 datafusion/sqllogictest/test_files/errors.slt      |  2 +-
 datafusion/sqllogictest/test_files/scalar.slt      |  3 +-
 22 files changed, 335 insertions(+), 124 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 726eb07ff6..027134a3c5 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -82,6 +82,9 @@ jobs:
       - name: Check function packages (encoding_expressions)
         run: cargo check --no-default-features --features=encoding_expressions 
-p datafusion
 
+      - name: Check function packages (math_expressions)
+        run: cargo check --no-default-features --features=math_expressions -p 
datafusion
+
       - name: Check function packages (array_expressions)
         run: cargo check --no-default-features --features=array_expressions -p 
datafusion
 
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 83904264ae..38d3012a5a 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -47,6 +47,7 @@ default = ["array_expressions", "crypto_expressions", 
"encoding_expressions", "r
 encoding_expressions = ["datafusion-functions/encoding_expressions"]
 # Used for testing ONLY: causes all values to hash to the same value (test for 
collisions)
 force_hash_collisions = []
+math_expressions = ["datafusion-functions/math_expressions"]
 parquet = ["datafusion-common/parquet", "dep:parquet"]
 pyarrow = ["datafusion-common/pyarrow", "parquet"]
 regex_expressions = ["datafusion-physical-expr/regex_expressions", 
"datafusion-optimizer/regex_expressions"]
diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index e4a9471f07..09077a557a 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -23,7 +23,6 @@ use std::fmt;
 use std::str::FromStr;
 use std::sync::{Arc, OnceLock};
 
-use crate::nullif::SUPPORTED_NULLIF_TYPES;
 use crate::signature::TIMEZONE_WILDCARD;
 use crate::type_coercion::binary::get_wider_type;
 use crate::type_coercion::functions::data_types;
@@ -83,8 +82,6 @@ pub enum BuiltinScalarFunction {
     Gcd,
     /// lcm, Least common multiple
     Lcm,
-    /// isnan
-    Isnan,
     /// iszero
     Iszero,
     /// ln, Natural logarithm
@@ -233,8 +230,6 @@ pub enum BuiltinScalarFunction {
     Ltrim,
     /// md5
     MD5,
-    /// nullif
-    NullIf,
     /// octet_length
     OctetLength,
     /// random
@@ -386,7 +381,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Factorial => Volatility::Immutable,
             BuiltinScalarFunction::Floor => Volatility::Immutable,
             BuiltinScalarFunction::Gcd => Volatility::Immutable,
-            BuiltinScalarFunction::Isnan => Volatility::Immutable,
             BuiltinScalarFunction::Iszero => Volatility::Immutable,
             BuiltinScalarFunction::Lcm => Volatility::Immutable,
             BuiltinScalarFunction::Ln => Volatility::Immutable,
@@ -458,7 +452,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Lower => Volatility::Immutable,
             BuiltinScalarFunction::Ltrim => Volatility::Immutable,
             BuiltinScalarFunction::MD5 => Volatility::Immutable,
-            BuiltinScalarFunction::NullIf => Volatility::Immutable,
             BuiltinScalarFunction::OctetLength => Volatility::Immutable,
             BuiltinScalarFunction::Radians => Volatility::Immutable,
             BuiltinScalarFunction::RegexpLike => Volatility::Immutable,
@@ -729,11 +722,6 @@ impl BuiltinScalarFunction {
                 utf8_to_str_type(&input_expr_types[0], "ltrim")
             }
             BuiltinScalarFunction::MD5 => 
utf8_to_str_type(&input_expr_types[0], "md5"),
-            BuiltinScalarFunction::NullIf => {
-                // NULLIF has two args and they might get coerced, get a 
preview of this
-                let coerced_types = data_types(input_expr_types, 
&self.signature());
-                coerced_types.map(|typs| typs[0].clone())
-            }
             BuiltinScalarFunction::OctetLength => {
                 utf8_to_int_type(&input_expr_types[0], "octet_length")
             }
@@ -875,7 +863,7 @@ impl BuiltinScalarFunction {
                 _ => Ok(Float64),
             },
 
-            BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => 
Ok(Boolean),
+            BuiltinScalarFunction::Iszero => Ok(Boolean),
 
             BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),
 
@@ -1300,9 +1288,6 @@ impl BuiltinScalarFunction {
                 self.volatility(),
             ),
 
-            BuiltinScalarFunction::NullIf => {
-                Signature::uniform(2, SUPPORTED_NULLIF_TYPES.to_vec(), 
self.volatility())
-            }
             BuiltinScalarFunction::Pi => Signature::exact(vec![], 
self.volatility()),
             BuiltinScalarFunction::Random => Signature::exact(vec![], 
self.volatility()),
             BuiltinScalarFunction::Uuid => Signature::exact(vec![], 
self.volatility()),
@@ -1407,12 +1392,10 @@ impl BuiltinScalarFunction {
                 vec![Int32, Int64, UInt32, UInt64, Utf8],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => {
-                Signature::one_of(
-                    vec![Exact(vec![Float32]), Exact(vec![Float64])],
-                    self.volatility(),
-                )
-            }
+            BuiltinScalarFunction::Iszero => Signature::one_of(
+                vec![Exact(vec![Float32]), Exact(vec![Float64])],
+                self.volatility(),
+            ),
         }
     }
 
@@ -1478,7 +1461,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Factorial => &["factorial"],
             BuiltinScalarFunction::Floor => &["floor"],
             BuiltinScalarFunction::Gcd => &["gcd"],
-            BuiltinScalarFunction::Isnan => &["isnan"],
             BuiltinScalarFunction::Iszero => &["iszero"],
             BuiltinScalarFunction::Lcm => &["lcm"],
             BuiltinScalarFunction::Ln => &["ln"],
@@ -1501,7 +1483,6 @@ impl BuiltinScalarFunction {
 
             // conditional functions
             BuiltinScalarFunction::Coalesce => &["coalesce"],
-            BuiltinScalarFunction::NullIf => &["nullif"],
 
             // string functions
             BuiltinScalarFunction::Ascii => &["ascii"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index ad0eae8985..4aa270e6dd 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -565,7 +565,6 @@ scalar_expr!(Lcm, lcm, arg_1 arg_2, "least common 
multiple");
 scalar_expr!(Log2, log2, num, "base 2 logarithm");
 scalar_expr!(Log10, log10, num, "base 10 logarithm");
 scalar_expr!(Ln, ln, num, "natural logarithm");
-scalar_expr!(NullIf, nullif, arg_1 arg_2, "returns NULL if value1 equals 
value2; otherwise it returns value1. This can be used to perform the inverse 
operation of the COALESCE expression.");
 scalar_expr!(Power, power, base exponent, "`base` raised to the power of 
`exponent`");
 scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the 
argument");
 scalar_expr!(
@@ -932,12 +931,6 @@ scalar_expr!(Now, now, ,"returns current timestamp in 
nanoseconds, using the sam
 scalar_expr!(CurrentTime, current_time, , "returns current UTC time as a 
[`DataType::Time64`] value");
 scalar_expr!(MakeDate, make_date, year month day, "make a date from year, 
month and day component parts");
 scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns 
y");
-scalar_expr!(
-    Isnan,
-    isnan,
-    num,
-    "returns true if a given number is +NaN or -NaN otherwise returns false"
-);
 scalar_expr!(
     Iszero,
     iszero,
@@ -1369,7 +1362,6 @@ mod test {
         test_unary_scalar_expr!(Ln, ln);
         test_scalar_expr!(Atan2, atan2, y, x);
         test_scalar_expr!(Nanvl, nanvl, x, y);
-        test_scalar_expr!(Isnan, isnan, input);
         test_scalar_expr!(Iszero, iszero, input);
 
         test_scalar_expr!(Ascii, ascii, input);
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index c295354563..8c73ae5ae7 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -30,7 +30,6 @@ mod built_in_function;
 mod built_in_window_function;
 mod columnar_value;
 mod literal;
-mod nullif;
 mod operator;
 mod partition_evaluator;
 mod signature;
@@ -74,7 +73,6 @@ pub use function::{
 pub use groups_accumulator::{EmitTo, GroupsAccumulator};
 pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
 pub use logical_plan::*;
-pub use nullif::SUPPORTED_NULLIF_TYPES;
 pub use operator::Operator;
 pub use partition_evaluator::PartitionEvaluator;
 pub use signature::{
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 6d4a716e2e..7109261cc7 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -29,10 +29,14 @@ authors = { workspace = true }
 rust-version = { workspace = true }
 
 [features]
+# enable core functions
+core_expressions = []
 # Enable encoding by default so the doctests work. In general don't 
automatically enable all packages.
-default = ["encoding_expressions"]
-# enable the encode/decode functions
+default = ["core_expressions", "encoding_expressions", "math_expressions"]
+# enable encode/decode functions
 encoding_expressions = ["base64", "hex"]
+# enable math functions
+math_expressions = []
 
 
 [lib]
diff --git a/datafusion/expr/src/nullif.rs 
b/datafusion/functions/src/core/mod.rs
similarity index 58%
copy from datafusion/expr/src/nullif.rs
copy to datafusion/functions/src/core/mod.rs
index f17bd793b8..9aab4bd450 100644
--- a/datafusion/expr/src/nullif.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -15,23 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
+//! "core" DataFusion functions
+
+mod nullif;
+
+// create UDFs
+make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
+
+// Export the functions out of this package, both as expr_fn as well as a list 
of functions
+export_functions!(
+    (nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it 
returns value1. This can be used to perform the inverse operation of the 
COALESCE expression.")
+);
 
-/// Currently supported types by the nullif function.
-/// The order of these types correspond to the order on which coercion applies
-/// This should thus be from least informative to most informative
-pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
-    DataType::Boolean,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::Float32,
-    DataType::Float64,
-    DataType::Utf8,
-    DataType::LargeUtf8,
-];
diff --git a/datafusion/physical-expr/src/expressions/nullif.rs 
b/datafusion/functions/src/core/nullif.rs
similarity index 78%
rename from datafusion/physical-expr/src/expressions/nullif.rs
rename to datafusion/functions/src/core/nullif.rs
index dcd883f929..1007f349f7 100644
--- a/datafusion/physical-expr/src/expressions/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -15,17 +15,89 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Encoding expressions
+
+use arrow::{
+    datatypes::DataType,
+};
+use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_expr::{ColumnarValue};
+
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
 use arrow::array::Array;
 use arrow::compute::kernels::cmp::eq;
 use arrow::compute::kernels::nullif::nullif;
-use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
-use datafusion_expr::ColumnarValue;
+use datafusion_common::{ ScalarValue};
+
+#[derive(Debug)]
+pub(super) struct NullIfFunc {
+    signature: Signature,
+}
+
+/// Currently supported types by the nullif function.
+/// The order of these types correspond to the order on which coercion applies
+/// This should thus be from least informative to most informative
+static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
+    DataType::Boolean,
+    DataType::UInt8,
+    DataType::UInt16,
+    DataType::UInt32,
+    DataType::UInt64,
+    DataType::Int8,
+    DataType::Int16,
+    DataType::Int32,
+    DataType::Int64,
+    DataType::Float32,
+    DataType::Float64,
+    DataType::Utf8,
+    DataType::LargeUtf8,
+];
+
+
+impl NullIfFunc {
+    pub fn new() -> Self {
+        Self {
+            signature:
+            Signature::uniform(2, SUPPORTED_NULLIF_TYPES.to_vec(),
+                Volatility::Immutable,
+            )
+        }
+    }
+}
+
+impl ScalarUDFImpl for NullIfFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "nullif"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        // NULLIF has two args and they might get coerced, get a preview of 
this
+        let coerced_types = 
datafusion_expr::type_coercion::functions::data_types(arg_types, 
&self.signature);
+        coerced_types.map(|typs| typs[0].clone())
+            .map_err(|e| e.context("Failed to coerce arguments for NULLIF")
+        )
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        nullif_func(args)
+    }
+}
+
+
 
 /// Implements NULLIF(expr1, expr2)
 /// Args: 0 - left expr is any array
 ///       1 - if the left is equal to this expr2, then the result is NULL, 
otherwise left value is passed.
 ///
-pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 2 {
         return internal_err!(
             "{:?} args were supplied but NULLIF takes exactly two args",
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 91a5c510f0..981174c141 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -84,21 +84,34 @@ use log::debug;
 #[macro_use]
 pub mod macros;
 
+make_package!(core, "core_expressions", "Core datafusion expressions");
+
 make_package!(
     encoding,
     "encoding_expressions",
     "Hex and binary `encode` and `decode` functions."
 );
 
+make_package!(math, "math_expressions", "Mathematical functions.");
+
 /// Fluent-style API for creating `Expr`s
 pub mod expr_fn {
+    #[cfg(feature = "core_expressions")]
+    pub use super::core::expr_fn::*;
     #[cfg(feature = "encoding_expressions")]
     pub use super::encoding::expr_fn::*;
+    #[cfg(feature = "math_expressions")]
+    pub use super::math::expr_fn::*;
 }
 
 /// Registers all enabled packages with a [`FunctionRegistry`]
 pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
-    encoding::functions().into_iter().try_for_each(|udf| {
+    let mut all_functions = core::functions()
+        .into_iter()
+        .chain(encoding::functions())
+        .chain(math::functions());
+
+    all_functions.try_for_each(|udf| {
         let existing_udf = registry.register_udf(udf)?;
         if let Some(existing_udf) = existing_udf {
             debug!("Overwrite existing UDF: {}", existing_udf.name());
diff --git a/datafusion/functions/src/macros.rs 
b/datafusion/functions/src/macros.rs
index 1931ee2794..5debcbda30 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -121,3 +121,42 @@ macro_rules! make_package {
         }
     };
 }
+
+/// Invokes a function on each element of an array and returns the result as a 
new array
+///
+/// $ARG: ArrayRef
+/// $NAME: name of the function (for error messages)
+/// $ARGS_TYPE: the type of array to cast the argument to
+/// $RETURN_TYPE: the type of array to return
+/// $FUNC: the function to apply to each element of $ARG
+///
+macro_rules! make_function_scalar_inputs_return_type {
+    ($ARG: expr, $NAME:expr, $ARG_TYPE:ident, $RETURN_TYPE:ident, $FUNC: 
block) => {{
+        let arg = downcast_arg!($ARG, $NAME, $ARG_TYPE);
+
+        arg.iter()
+            .map(|a| match a {
+                Some(a) => Some($FUNC(a)),
+                _ => None,
+            })
+            .collect::<$RETURN_TYPE>()
+    }};
+}
+
+/// Downcast an argument to a specific array type, returning an internal error
+/// if the cast fails
+///
+/// $ARG: ArrayRef
+/// $NAME: name of the argument (for error messages)
+/// $ARRAY_TYPE: the type of array to cast the argument to
+macro_rules! downcast_arg {
+    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
+        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
+            DataFusionError::Internal(format!(
+                "could not cast {} to {}",
+                $NAME,
+                std::any::type_name::<$ARRAY_TYPE>()
+            ))
+        })?
+    }};
+}
diff --git a/datafusion/expr/src/nullif.rs 
b/datafusion/functions/src/math/mod.rs
similarity index 58%
rename from datafusion/expr/src/nullif.rs
rename to datafusion/functions/src/math/mod.rs
index f17bd793b8..67d2d957ea 100644
--- a/datafusion/expr/src/nullif.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -15,23 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
+//! "core" DataFusion functions
+
+mod nans;
+
+// create  UDFs
+make_udf_function!(nans::IsNanFunc, ISNAN, isnan);
+
+// Export the functions out of this package, both as expr_fn as well as a list 
of functions
+export_functions!(
+    (isnan, num, "returns true if a given number is +NaN or -NaN otherwise 
returns false")
+);
 
-/// Currently supported types by the nullif function.
-/// The order of these types correspond to the order on which coercion applies
-/// This should thus be from least informative to most informative
-pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
-    DataType::Boolean,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::Float32,
-    DataType::Float64,
-    DataType::Utf8,
-    DataType::LargeUtf8,
-];
diff --git a/datafusion/functions/src/math/nans.rs 
b/datafusion/functions/src/math/nans.rs
new file mode 100644
index 0000000000..228039e5f6
--- /dev/null
+++ b/datafusion/functions/src/math/nans.rs
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Encoding expressions
+
+use arrow::{
+    datatypes::DataType,
+};
+use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_expr::ColumnarValue;
+
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
+
+#[derive(Debug)]
+pub(super) struct IsNanFunc {
+    signature: Signature,
+}
+
+impl IsNanFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature:
+            Signature::one_of(
+                vec![Exact(vec![Float32]), Exact(vec![Float64])],
+                Volatility::Immutable,
+            )
+        }
+    }
+}
+
+impl ScalarUDFImpl for IsNanFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "isnan"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Boolean)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+
+        let arr: ArrayRef = match args[0].data_type() {
+            DataType::Float64 => {
+                Arc::new(make_function_scalar_inputs_return_type!(
+                                &args[0],
+                                "x",
+                                Float64Array,
+                                BooleanArray,
+                                { f64::is_nan }
+                            ))
+            },
+            DataType::Float32 => {
+                Arc::new(make_function_scalar_inputs_return_type!(
+                            &args[0],
+                            "x",
+                            Float32Array,
+                            BooleanArray,
+                            { f32::is_nan }
+                        ))
+            },
+            other => return internal_err!("Unsupported data type {other:?} for 
function isnan"),
+        };
+        Ok(ColumnarValue::Array(arr))
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/mod.rs 
b/datafusion/physical-expr/src/expressions/mod.rs
index 007a03985f..09e908586c 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -32,7 +32,6 @@ mod literal;
 mod negative;
 mod no_op;
 mod not;
-mod nullif;
 mod try_cast;
 
 /// Module with some convenient methods used in expression building
@@ -92,7 +91,6 @@ pub use literal::{lit, Literal};
 pub use negative::{negative, NegativeExpr};
 pub use no_op::NoOp;
 pub use not::{not, NotExpr};
-pub use nullif::nullif_func;
 pub use try_cast::{try_cast, TryCastExpr};
 
 /// returns the name of the state
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index ca07351698..8446a65d72 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -33,9 +33,8 @@
 use crate::execution_props::ExecutionProps;
 use crate::sort_properties::SortProperties;
 use crate::{
-    array_expressions, conditional_expressions, datetime_expressions,
-    expressions::nullif_func, math_expressions, string_expressions, 
struct_expressions,
-    PhysicalExpr, ScalarFunctionExpr,
+    array_expressions, conditional_expressions, datetime_expressions, 
math_expressions,
+    string_expressions, struct_expressions, PhysicalExpr, ScalarFunctionExpr,
 };
 use arrow::{
     array::ArrayRef,
@@ -282,9 +281,6 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Gcd => {
             Arc::new(|args| 
make_scalar_function_inner(math_expressions::gcd)(args))
         }
-        BuiltinScalarFunction::Isnan => {
-            Arc::new(|args| 
make_scalar_function_inner(math_expressions::isnan)(args))
-        }
         BuiltinScalarFunction::Iszero => {
             Arc::new(|args| 
make_scalar_function_inner(math_expressions::iszero)(args))
         }
@@ -593,7 +589,6 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Digest => {
             Arc::new(invoke_if_crypto_expressions_feature_flag!(digest, 
"digest"))
         }
-        BuiltinScalarFunction::NullIf => Arc::new(nullif_func),
         BuiltinScalarFunction::OctetLength => Arc::new(|args| match &args[0] {
             ColumnarValue::Array(v) => 
Ok(ColumnarValue::Array(length(v.as_ref())?)),
             ColumnarValue::Scalar(v) => match v {
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index 3c8bd4ef30..e779e29cb8 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -580,7 +580,7 @@ enum ScalarFunction {
   Lower = 33;
   Ltrim = 34;
   MD5 = 35;
-  NullIf = 36;
+  //  36 was NullIf
   OctetLength = 37;
   Random = 38;
   RegexpReplace = 39;
@@ -655,7 +655,7 @@ enum ScalarFunction {
   ArrayReplaceAll = 110;
   Nanvl = 111;
   Flatten = 112;
-  Isnan = 113;
+  // 113 was IsNan
   Iszero = 114;
   ArrayEmpty = 115;
   ArrayPopBack = 116;
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index 89e170d3ec..f5f15aa3e4 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22339,7 +22339,6 @@ impl serde::Serialize for ScalarFunction {
             Self::Lower => "Lower",
             Self::Ltrim => "Ltrim",
             Self::Md5 => "MD5",
-            Self::NullIf => "NullIf",
             Self::OctetLength => "OctetLength",
             Self::Random => "Random",
             Self::RegexpReplace => "RegexpReplace",
@@ -22413,7 +22412,6 @@ impl serde::Serialize for ScalarFunction {
             Self::ArrayReplaceAll => "ArrayReplaceAll",
             Self::Nanvl => "Nanvl",
             Self::Flatten => "Flatten",
-            Self::Isnan => "Isnan",
             Self::Iszero => "Iszero",
             Self::ArrayEmpty => "ArrayEmpty",
             Self::ArrayPopBack => "ArrayPopBack",
@@ -22484,7 +22482,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Lower",
             "Ltrim",
             "MD5",
-            "NullIf",
             "OctetLength",
             "Random",
             "RegexpReplace",
@@ -22558,7 +22555,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "ArrayReplaceAll",
             "Nanvl",
             "Flatten",
-            "Isnan",
             "Iszero",
             "ArrayEmpty",
             "ArrayPopBack",
@@ -22658,7 +22654,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Lower" => Ok(ScalarFunction::Lower),
                     "Ltrim" => Ok(ScalarFunction::Ltrim),
                     "MD5" => Ok(ScalarFunction::Md5),
-                    "NullIf" => Ok(ScalarFunction::NullIf),
                     "OctetLength" => Ok(ScalarFunction::OctetLength),
                     "Random" => Ok(ScalarFunction::Random),
                     "RegexpReplace" => Ok(ScalarFunction::RegexpReplace),
@@ -22732,7 +22727,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "ArrayReplaceAll" => Ok(ScalarFunction::ArrayReplaceAll),
                     "Nanvl" => Ok(ScalarFunction::Nanvl),
                     "Flatten" => Ok(ScalarFunction::Flatten),
-                    "Isnan" => Ok(ScalarFunction::Isnan),
                     "Iszero" => Ok(ScalarFunction::Iszero),
                     "ArrayEmpty" => Ok(ScalarFunction::ArrayEmpty),
                     "ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack),
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index 679df2b8d9..69d035239c 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2667,7 +2667,7 @@ pub enum ScalarFunction {
     Lower = 33,
     Ltrim = 34,
     Md5 = 35,
-    NullIf = 36,
+    ///   36 was NullIf
     OctetLength = 37,
     Random = 38,
     RegexpReplace = 39,
@@ -2742,7 +2742,7 @@ pub enum ScalarFunction {
     ArrayReplaceAll = 110,
     Nanvl = 111,
     Flatten = 112,
-    Isnan = 113,
+    /// 113 was IsNan
     Iszero = 114,
     ArrayEmpty = 115,
     ArrayPopBack = 116,
@@ -2810,7 +2810,6 @@ impl ScalarFunction {
             ScalarFunction::Lower => "Lower",
             ScalarFunction::Ltrim => "Ltrim",
             ScalarFunction::Md5 => "MD5",
-            ScalarFunction::NullIf => "NullIf",
             ScalarFunction::OctetLength => "OctetLength",
             ScalarFunction::Random => "Random",
             ScalarFunction::RegexpReplace => "RegexpReplace",
@@ -2884,7 +2883,6 @@ impl ScalarFunction {
             ScalarFunction::ArrayReplaceAll => "ArrayReplaceAll",
             ScalarFunction::Nanvl => "Nanvl",
             ScalarFunction::Flatten => "Flatten",
-            ScalarFunction::Isnan => "Isnan",
             ScalarFunction::Iszero => "Iszero",
             ScalarFunction::ArrayEmpty => "ArrayEmpty",
             ScalarFunction::ArrayPopBack => "ArrayPopBack",
@@ -2949,7 +2947,6 @@ impl ScalarFunction {
             "Lower" => Some(Self::Lower),
             "Ltrim" => Some(Self::Ltrim),
             "MD5" => Some(Self::Md5),
-            "NullIf" => Some(Self::NullIf),
             "OctetLength" => Some(Self::OctetLength),
             "Random" => Some(Self::Random),
             "RegexpReplace" => Some(Self::RegexpReplace),
@@ -3023,7 +3020,6 @@ impl ScalarFunction {
             "ArrayReplaceAll" => Some(Self::ArrayReplaceAll),
             "Nanvl" => Some(Self::Nanvl),
             "Flatten" => Some(Self::Flatten),
-            "Isnan" => Some(Self::Isnan),
             "Iszero" => Some(Self::Iszero),
             "ArrayEmpty" => Some(Self::ArrayEmpty),
             "ArrayPopBack" => Some(Self::ArrayPopBack),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index 3a2494c015..f1ee84a822 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -58,15 +58,15 @@ use datafusion_expr::{
     current_time, date_bin, date_part, date_trunc, degrees, digest, ends_with, 
exp,
     expr::{self, InList, Sort, WindowFunction},
     factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range, 
initcap,
-    instr, isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
+    instr, iszero, lcm, left, levenshtein, ln, log, log10, log2,
     logical_plan::{PlanType, StringifiedPlan},
-    lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi, 
power,
-    radians, random, regexp_like, regexp_match, regexp_replace, repeat, 
replace, reverse,
-    right, round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, 
sinh,
-    split_part, sqrt, starts_with, string_to_array, strpos, struct_fun, substr,
-    substr_index, substring, tan, tanh, to_hex, translate, trim, trunc, upper, 
uuid,
-    AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, 
BuiltinScalarFunction,
-    Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet,
+    lower, lpad, ltrim, md5, nanvl, now, octet_length, overlay, pi, power, 
radians,
+    random, regexp_like, regexp_match, regexp_replace, repeat, replace, 
reverse, right,
+    round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, 
split_part,
+    sqrt, starts_with, string_to_array, strpos, struct_fun, substr, 
substr_index,
+    substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid, 
AggregateFunction,
+    Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, 
Cast, Expr,
+    GetFieldAccess, GetIndexedField, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, 
WindowFrameBound,
     WindowFrameUnits,
@@ -512,7 +512,6 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::Range => Self::Range,
             ScalarFunction::Cardinality => Self::Cardinality,
             ScalarFunction::Array => Self::MakeArray,
-            ScalarFunction::NullIf => Self::NullIf,
             ScalarFunction::DatePart => Self::DatePart,
             ScalarFunction::DateTrunc => Self::DateTrunc,
             ScalarFunction::DateBin => Self::DateBin,
@@ -569,7 +568,6 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::FromUnixtime => Self::FromUnixtime,
             ScalarFunction::Atan2 => Self::Atan2,
             ScalarFunction::Nanvl => Self::Nanvl,
-            ScalarFunction::Isnan => Self::Isnan,
             ScalarFunction::Iszero => Self::Iszero,
             ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
             ScalarFunction::OverLay => Self::OverLay,
@@ -1561,10 +1559,6 @@ pub fn parse_expr(
                 ScalarFunction::Sha384 => Ok(sha384(parse_expr(&args[0], 
registry)?)),
                 ScalarFunction::Sha512 => Ok(sha512(parse_expr(&args[0], 
registry)?)),
                 ScalarFunction::Md5 => Ok(md5(parse_expr(&args[0], 
registry)?)),
-                ScalarFunction::NullIf => Ok(nullif(
-                    parse_expr(&args[0], registry)?,
-                    parse_expr(&args[1], registry)?,
-                )),
                 ScalarFunction::Digest => Ok(digest(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
@@ -1800,7 +1794,6 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                 )),
-                ScalarFunction::Isnan => Ok(isnan(parse_expr(&args[0], 
registry)?)),
                 ScalarFunction::Iszero => Ok(iszero(parse_expr(&args[0], 
registry)?)),
                 ScalarFunction::ArrowTypeof => {
                     Ok(arrow_typeof(parse_expr(&args[0], registry)?))
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 9510846f70..a6348e909c 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1492,7 +1492,6 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::Range => Self::Range,
             BuiltinScalarFunction::Cardinality => Self::Cardinality,
             BuiltinScalarFunction::MakeArray => Self::Array,
-            BuiltinScalarFunction::NullIf => Self::NullIf,
             BuiltinScalarFunction::DatePart => Self::DatePart,
             BuiltinScalarFunction::DateTrunc => Self::DateTrunc,
             BuiltinScalarFunction::DateBin => Self::DateBin,
@@ -1547,7 +1546,6 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::FromUnixtime => Self::FromUnixtime,
             BuiltinScalarFunction::Atan2 => Self::Atan2,
             BuiltinScalarFunction::Nanvl => Self::Nanvl,
-            BuiltinScalarFunction::Isnan => Self::Isnan,
             BuiltinScalarFunction::Iszero => Self::Iszero,
             BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
             BuiltinScalarFunction::OverLay => Self::OverLay,
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 3c72bf334e..2124a5224a 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
 #[cfg(test)]
 use std::collections::HashMap;
 use std::{sync::Arc, vec};
@@ -29,7 +30,8 @@ use datafusion_common::{
 use datafusion_common::{plan_err, ParamValues};
 use datafusion_expr::{
     logical_plan::{LogicalPlan, Prepare},
-    AggregateUDF, ScalarUDF, TableSource, WindowUDF,
+    AggregateUDF, ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, 
TableSource,
+    Volatility, WindowUDF,
 };
 use datafusion_sql::{
     parser::DFParser,
@@ -2671,13 +2673,62 @@ fn logical_plan_with_dialect_and_options(
     dialect: &dyn Dialect,
     options: ParserOptions,
 ) -> Result<LogicalPlan> {
-    let context = MockContextProvider::default();
+    let context = MockContextProvider::default().with_udf(make_udf(
+        "nullif",
+        vec![DataType::Int32, DataType::Int32],
+        DataType::Int32,
+    ));
+
     let planner = SqlToRel::new_with_options(&context, options);
     let result = DFParser::parse_sql_with_dialect(sql, dialect);
     let mut ast = result?;
     planner.statement_to_plan(ast.pop_front().unwrap())
 }
 
+fn make_udf(name: &'static str, args: Vec<DataType>, return_type: DataType) -> 
ScalarUDF {
+    ScalarUDF::new_from_impl(DummyUDF::new(name, args, return_type))
+}
+
+/// Mocked UDF
+#[derive(Debug)]
+struct DummyUDF {
+    name: &'static str,
+    signature: Signature,
+    return_type: DataType,
+}
+
+impl DummyUDF {
+    fn new(name: &'static str, args: Vec<DataType>, return_type: DataType) -> 
Self {
+        Self {
+            name,
+            signature: Signature::exact(args, Volatility::Immutable),
+            return_type,
+        }
+    }
+}
+
+impl ScalarUDFImpl for DummyUDF {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        self.name
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(self.return_type.clone())
+    }
+
+    fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        unimplemented!("DummyUDF::invoke")
+    }
+}
+
 /// Create logical plan, write with formatter, compare to expected output
 fn quick_test(sql: &str, expected: &str) {
     let plan = logical_plan(sql).unwrap();
@@ -2724,6 +2775,7 @@ fn prepare_stmt_replace_params_quick_test(
 #[derive(Default)]
 struct MockContextProvider {
     options: ConfigOptions,
+    udfs: HashMap<String, Arc<ScalarUDF>>,
     udafs: HashMap<String, Arc<AggregateUDF>>,
 }
 
@@ -2731,6 +2783,11 @@ impl MockContextProvider {
     fn options_mut(&mut self) -> &mut ConfigOptions {
         &mut self.options
     }
+
+    fn with_udf(mut self, udf: ScalarUDF) -> Self {
+        self.udfs.insert(udf.name().to_string(), Arc::new(udf));
+        self
+    }
 }
 
 impl ContextProvider for MockContextProvider {
@@ -2823,8 +2880,8 @@ impl ContextProvider for MockContextProvider {
         }
     }
 
-    fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
-        None
+    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
+        self.udfs.get(name).map(Arc::clone)
     }
 
     fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
diff --git a/datafusion/sqllogictest/test_files/errors.slt 
b/datafusion/sqllogictest/test_files/errors.slt
index e3b2610e51..3a23f3615d 100644
--- a/datafusion/sqllogictest/test_files/errors.slt
+++ b/datafusion/sqllogictest/test_files/errors.slt
@@ -84,7 +84,7 @@ statement error Error during planning: No function matches 
the given name and ar
 SELECT concat();
 
 # error message for wrong function signature (Uniform: t args all from some 
common types)
-statement error Error during planning: No function matches the given name and 
argument types 'nullif\(Int64\)'. You might need to add explicit type 
casts.\n\tCandidate 
functions:\n\tnullif\(Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8,
 
Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8\)
+statement error DataFusion error: Failed to coerce arguments for NULLIF
 SELECT nullif(1);
 
 # error message for wrong function signature (Exact: exact number of args of 
an exact type)
diff --git a/datafusion/sqllogictest/test_files/scalar.slt 
b/datafusion/sqllogictest/test_files/scalar.slt
index 129eb6508b..eb2f05dc74 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1840,9 +1840,10 @@ statement error Error during planning: No function 
matches the given name and ar
 SELECT concat();
 
 # error message for wrong function signature (Uniform: t args all from some 
common types)
-statement error Error during planning: No function matches the given name and 
argument types 'nullif\(Int64\)'. You might need to add explicit type 
casts.\n\tCandidate 
functions:\n\tnullif\(Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8,
 
Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8\)
+statement error DataFusion error: Failed to coerce arguments for NULLIF
 SELECT nullif(1);
 
+
 # error message for wrong function signature (Exact: exact number of args of 
an exact type)
 statement error Error during planning: No function matches the given name and 
argument types 'pi\(Float64\)'. You might need to add explicit type 
casts.\n\tCandidate functions:\n\tpi\(\)
 SELECT pi(3.14);


Reply via email to