This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1b3608da7c fix: coalesce schema issues (#12308)
1b3608da7c is described below

commit 1b3608da7ca59d8d987804834d004e8b3e349d18
Author: Daniel Mesejo <[email protected]>
AuthorDate: Fri Sep 27 04:42:51 2024 +0200

    fix: coalesce schema issues (#12308)
    
    closes #12307
---
 datafusion/core/src/dataframe/mod.rs               |  37 ++++++++
 datafusion/expr/src/expr_schema.rs                 |  23 ++---
 datafusion/expr/src/logical_plan/builder.rs        |   4 +-
 datafusion/functions/src/core/coalesce.rs          |  30 +++++-
 datafusion/functions/src/datetime/to_local_time.rs |  57 ++++++------
 datafusion/functions/src/encoding/inner.rs         | 103 ++++++++++++---------
 datafusion/functions/src/unicode/strpos.rs         |  36 ++++---
 datafusion/functions/src/unicode/substr.rs         |  77 ++++++++++++---
 datafusion/sqllogictest/test_files/encoding.slt    |   8 +-
 datafusion/sqllogictest/test_files/functions.slt   |  61 ++++++++++++
 datafusion/sqllogictest/test_files/scalar.slt      |   2 +-
 datafusion/sqllogictest/test_files/select.slt      |  19 +++-
 .../test_files/string/string_literal.slt           |   4 +-
 datafusion/sqllogictest/test_files/timestamps.slt  |   2 +-
 14 files changed, 335 insertions(+), 128 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs 
b/datafusion/core/src/dataframe/mod.rs
index b52dcaa2f7..72b763ce0f 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -2029,6 +2029,43 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_coalesce_schema() -> Result<()> {
+        let ctx = SessionContext::new();
+
+        let query = r#"SELECT COALESCE(null, 5)"#;
+
+        let result = ctx.sql(query).await?;
+        assert_logical_expr_schema_eq_physical_expr_schema(result).await?;
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_coalesce_from_values_schema() -> Result<()> {
+        let ctx = SessionContext::new();
+
+        let query = r#"SELECT COALESCE(column1, column2) FROM VALUES (null, 
1.2)"#;
+
+        let result = ctx.sql(query).await?;
+        assert_logical_expr_schema_eq_physical_expr_schema(result).await?;
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_coalesce_from_values_schema_multiple_rows() -> Result<()> {
+        let ctx = SessionContext::new();
+
+        let query = r#"SELECT COALESCE(column1, column2)
+        FROM VALUES
+        (null, 1.2),
+        (1.1, null),
+        (2, 5);"#;
+
+        let result = ctx.sql(query).await?;
+        assert_logical_expr_schema_eq_physical_expr_schema(result).await?;
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_array_agg_schema() -> Result<()> {
         let ctx = SessionContext::new();
diff --git a/datafusion/expr/src/expr_schema.rs 
b/datafusion/expr/src/expr_schema.rs
index f40ac409dd..ad617c53d6 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -151,21 +151,22 @@ impl ExprSchemable for Expr {
                     .collect::<Result<Vec<_>>>()?;
 
                 // verify that function is invoked with correct number and 
type of arguments as defined in `TypeSignature`
-                data_types_with_scalar_udf(&arg_data_types, 
func).map_err(|err| {
-                    plan_datafusion_err!(
-                        "{} {}",
-                        err,
-                        utils::generate_signature_error_msg(
-                            func.name(),
-                            func.signature().clone(),
-                            &arg_data_types,
+                let new_data_types = 
data_types_with_scalar_udf(&arg_data_types, func)
+                    .map_err(|err| {
+                        plan_datafusion_err!(
+                            "{} {}",
+                            err,
+                            utils::generate_signature_error_msg(
+                                func.name(),
+                                func.signature().clone(),
+                                &arg_data_types,
+                            )
                         )
-                    )
-                })?;
+                    })?;
 
                 // perform additional function arguments validation (due to 
limited
                 // expressiveness of `TypeSignature`), then infer return type
-                Ok(func.return_type_from_exprs(args, schema, &arg_data_types)?)
+                Ok(func.return_type_from_exprs(args, schema, &new_data_types)?)
             }
             Expr::WindowFunction(window_function) => self
                 .data_type_and_nullable_with_window_function(schema, 
window_function)
diff --git a/datafusion/expr/src/logical_plan/builder.rs 
b/datafusion/expr/src/logical_plan/builder.rs
index c38ce2d02f..dba7c2e983 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -216,7 +216,9 @@ impl LogicalPlanBuilder {
                     common_type = Some(data_type);
                 }
             }
-            field_types.push(common_type.unwrap_or(DataType::Utf8));
+            // assuming common_type was not set, and no error, therefore the 
type should be NULL
+            // since the code loop skips NULL
+            field_types.push(common_type.unwrap_or(DataType::Null));
         }
         // wrap cast if data type is not same as common type.
         for row in &mut values {
diff --git a/datafusion/functions/src/core/coalesce.rs 
b/datafusion/functions/src/core/coalesce.rs
index 19db58c181..2fa6d7c197 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -21,11 +21,11 @@ use arrow::array::{new_null_array, BooleanArray};
 use arrow::compute::kernels::zip::zip;
 use arrow::compute::{and, is_not_null, is_null};
 use arrow::datatypes::DataType;
-
 use datafusion_common::{exec_err, ExprSchema, Result};
 use datafusion_expr::type_coercion::binary::type_union_resolution;
 use datafusion_expr::{ColumnarValue, Expr, ExprSchemable};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use itertools::Itertools;
 
 #[derive(Debug)]
 pub struct CoalesceFunc {
@@ -60,12 +60,16 @@ impl ScalarUDFImpl for CoalesceFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+        Ok(arg_types
+            .iter()
+            .find_or_first(|d| !d.is_null())
+            .unwrap()
+            .clone())
     }
 
-    // If all the element in coalesce is non-null, the result is non-null
+    // If any the arguments in coalesce is non-null, the result is non-null
     fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
-        args.iter().any(|e| e.nullable(schema).ok().unwrap_or(true))
+        args.iter().all(|e| e.nullable(schema).ok().unwrap_or(true))
     }
 
     /// coalesce evaluates to the first value which is not NULL
@@ -154,4 +158,22 @@ mod test {
             .unwrap();
         assert_eq!(return_type, DataType::Date32);
     }
+
+    #[test]
+    fn test_coalesce_return_types_with_nulls_first() {
+        let coalesce = core::coalesce::CoalesceFunc::new();
+        let return_type = coalesce
+            .return_type(&[DataType::Null, DataType::Date32])
+            .unwrap();
+        assert_eq!(return_type, DataType::Date32);
+    }
+
+    #[test]
+    fn test_coalesce_return_types_with_nulls_last() {
+        let coalesce = core::coalesce::CoalesceFunc::new();
+        let return_type = coalesce
+            .return_type(&[DataType::Int64, DataType::Null])
+            .unwrap();
+        assert_eq!(return_type, DataType::Int64);
+    }
 }
diff --git a/datafusion/functions/src/datetime/to_local_time.rs 
b/datafusion/functions/src/datetime/to_local_time.rs
index 634e28e6f3..0e33da1454 100644
--- a/datafusion/functions/src/datetime/to_local_time.rs
+++ b/datafusion/functions/src/datetime/to_local_time.rs
@@ -22,22 +22,16 @@ use std::sync::Arc;
 use arrow::array::timezone::Tz;
 use arrow::array::{Array, ArrayRef, PrimitiveBuilder};
 use arrow::datatypes::DataType::Timestamp;
+use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{
     ArrowTimestampType, DataType, TimestampMicrosecondType, 
TimestampMillisecondType,
     TimestampNanosecondType, TimestampSecondType,
 };
-use arrow::datatypes::{
-    TimeUnit,
-    TimeUnit::{Microsecond, Millisecond, Nanosecond, Second},
-};
 
 use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
 use datafusion_common::cast::as_primitive_array;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
-use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{
-    ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
-};
+use datafusion_common::{exec_err, plan_err, DataFusionError, Result, 
ScalarValue};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 /// A UDF function that converts a timezone-aware timestamp to local time 
(with no offset or
 /// timezone information). In other words, this function strips off the 
timezone from the timestamp,
@@ -55,20 +49,8 @@ impl Default for ToLocalTimeFunc {
 
 impl ToLocalTimeFunc {
     pub fn new() -> Self {
-        let base_sig = |array_type: TimeUnit| {
-            [
-                Exact(vec![Timestamp(array_type, None)]),
-                Exact(vec![Timestamp(array_type, 
Some(TIMEZONE_WILDCARD.into()))]),
-            ]
-        };
-
-        let full_sig = [Nanosecond, Microsecond, Millisecond, Second]
-            .into_iter()
-            .flat_map(base_sig)
-            .collect::<Vec<_>>();
-
         Self {
-            signature: Signature::one_of(full_sig, Volatility::Immutable),
+            signature: Signature::user_defined(Volatility::Immutable),
         }
     }
 
@@ -328,13 +310,10 @@ impl ScalarUDFImpl for ToLocalTimeFunc {
         }
 
         match &arg_types[0] {
-            Timestamp(Nanosecond, _) => Ok(Timestamp(Nanosecond, None)),
-            Timestamp(Microsecond, _) => Ok(Timestamp(Microsecond, None)),
-            Timestamp(Millisecond, _) => Ok(Timestamp(Millisecond, None)),
-            Timestamp(Second, _) => Ok(Timestamp(Second, None)),
+            Timestamp(timeunit, _) => Ok(Timestamp(*timeunit, None)),
             _ => exec_err!(
                 "The to_local_time function can only accept timestamp as the 
arg, got {:?}", arg_types[0]
-            ),
+            )
         }
     }
 
@@ -348,6 +327,30 @@ impl ScalarUDFImpl for ToLocalTimeFunc {
 
         self.to_local_time(args)
     }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        if arg_types.len() != 1 {
+            return plan_err!(
+                "to_local_time function requires 1 argument, got {:?}",
+                arg_types.len()
+            );
+        }
+
+        let first_arg = arg_types[0].clone();
+        match &first_arg {
+            Timestamp(Nanosecond, timezone) => {
+                Ok(vec![Timestamp(Nanosecond, timezone.clone())])
+            }
+            Timestamp(Microsecond, timezone) => {
+                Ok(vec![Timestamp(Microsecond, timezone.clone())])
+            }
+            Timestamp(Millisecond, timezone) => {
+                Ok(vec![Timestamp(Millisecond, timezone.clone())])
+            }
+            Timestamp(Second, timezone) => Ok(vec![Timestamp(Second, 
timezone.clone())]),
+            _ => plan_err!("The to_local_time function can only accept 
Timestamp as the arg got {first_arg}"),
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions/src/encoding/inner.rs 
b/datafusion/functions/src/encoding/inner.rs
index d9ce299a26..5b80c908cf 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -32,7 +32,6 @@ use datafusion_expr::ColumnarValue;
 use std::sync::Arc;
 use std::{fmt, str::FromStr};
 
-use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 
@@ -49,17 +48,8 @@ impl Default for EncodeFunc {
 
 impl EncodeFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![Binary, Utf8]),
-                    Exact(vec![LargeBinary, Utf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
         }
     }
 }
@@ -77,23 +67,39 @@ impl ScalarUDFImpl for EncodeFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-
-        Ok(match arg_types[0] {
-            Utf8 => Utf8,
-            LargeUtf8 => LargeUtf8,
-            Binary => Utf8,
-            LargeBinary => LargeUtf8,
-            Null => Null,
-            _ => {
-                return plan_err!("The encode function can only accept utf8 or 
binary.");
-            }
-        })
+        Ok(arg_types[0].to_owned())
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         encode(args)
     }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        if arg_types.len() != 2 {
+            return plan_err!(
+                "{} expects to get 2 arguments, but got {}",
+                self.name(),
+                arg_types.len()
+            );
+        }
+
+        if arg_types[1] != DataType::Utf8 {
+            return Err(DataFusionError::Plan("2nd argument should be 
Utf8".into()));
+        }
+
+        match arg_types[0] {
+            DataType::Utf8 | DataType::Binary | DataType::Null => {
+                Ok(vec![DataType::Utf8; 2])
+            }
+            DataType::LargeUtf8 | DataType::LargeBinary => {
+                Ok(vec![DataType::LargeUtf8, DataType::Utf8])
+            }
+            _ => plan_err!(
+                "1st argument should be Utf8 or Binary or Null, got {:?}",
+                arg_types[0]
+            ),
+        }
+    }
 }
 
 #[derive(Debug)]
@@ -109,17 +115,8 @@ impl Default for DecodeFunc {
 
 impl DecodeFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![Binary, Utf8]),
-                    Exact(vec![LargeBinary, Utf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
         }
     }
 }
@@ -137,23 +134,39 @@ impl ScalarUDFImpl for DecodeFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-
-        Ok(match arg_types[0] {
-            Utf8 => Binary,
-            LargeUtf8 => LargeBinary,
-            Binary => Binary,
-            LargeBinary => LargeBinary,
-            Null => Null,
-            _ => {
-                return plan_err!("The decode function can only accept utf8 or 
binary.");
-            }
-        })
+        Ok(arg_types[0].to_owned())
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         decode(args)
     }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        if arg_types.len() != 2 {
+            return plan_err!(
+                "{} expects to get 2 arguments, but got {}",
+                self.name(),
+                arg_types.len()
+            );
+        }
+
+        if arg_types[1] != DataType::Utf8 {
+            return plan_err!("2nd argument should be Utf8");
+        }
+
+        match arg_types[0] {
+            DataType::Utf8 | DataType::Binary | DataType::Null => {
+                Ok(vec![DataType::Binary, DataType::Utf8])
+            }
+            DataType::LargeUtf8 | DataType::LargeBinary => {
+                Ok(vec![DataType::LargeBinary, DataType::Utf8])
+            }
+            _ => plan_err!(
+                "1st argument should be Utf8 or Binary or Null, got {:?}",
+                arg_types[0]
+            ),
+        }
+    }
 }
 
 #[derive(Debug, Copy, Clone)]
diff --git a/datafusion/functions/src/unicode/strpos.rs 
b/datafusion/functions/src/unicode/strpos.rs
index 6da67c8a27..3879f779eb 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -23,8 +23,7 @@ use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, 
Int64Type};
 
 use crate::string::common::StringArrayType;
 use crate::utils::{make_scalar_function, utf8_to_int_type};
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::TypeSignature::Exact;
+use datafusion_common::{exec_err, plan_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
@@ -41,20 +40,8 @@ impl Default for StrposFunc {
 
 impl StrposFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8, LargeUtf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
-                    Exact(vec![Utf8View, Utf8View]),
-                    Exact(vec![Utf8View, Utf8]),
-                    Exact(vec![Utf8View, LargeUtf8]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
             aliases: vec![String::from("instr"), String::from("position")],
         }
     }
@@ -84,6 +71,25 @@ impl ScalarUDFImpl for StrposFunc {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        match arg_types {
+            [first, second ] => {
+                match (first, second) {
+                    (DataType::LargeUtf8 | DataType::Utf8View | 
DataType::Utf8, DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8) => 
Ok(arg_types.to_vec()),
+                    (DataType::Null, DataType::Null)   => 
Ok(vec![DataType::Utf8, DataType::Utf8]),
+                    (DataType::Null, _)   => Ok(vec![DataType::Utf8, 
second.to_owned()]),
+                    (_, DataType::Null) => Ok(vec![first.to_owned(), 
DataType::Utf8]),
+                    (DataType::Dictionary(_, value_type), DataType::LargeUtf8 
| DataType::Utf8View | DataType::Utf8)  => match **value_type {
+                        DataType::LargeUtf8 | DataType::Utf8View | 
DataType::Utf8 | DataType::Null | DataType::Binary => 
Ok(vec![*value_type.clone(), second.to_owned()]),
+                        _ => plan_err!("The STRPOS/INSTR/POSITION function can 
only accept strings, but got {:?}.", **value_type),
+                    },
+                    _ => plan_err!("The STRPOS/INSTR/POSITION function can 
only accept strings, but got {:?}.", arg_types)
+                }
+            },
+            _ => plan_err!("The STRPOS/INSTR/POSITION function can only accept 
strings, but got {:?}", arg_types)
+        }
+    }
 }
 
 fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/functions/src/unicode/substr.rs 
b/datafusion/functions/src/unicode/substr.rs
index 934fa4bfe1..205de0b30b 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -27,8 +27,7 @@ use arrow::array::{
 use arrow::datatypes::DataType;
 use arrow_buffer::{NullBufferBuilder, ScalarBuffer};
 use datafusion_common::cast::as_int64_array;
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::TypeSignature::Exact;
+use datafusion_common::{exec_err, plan_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
 #[derive(Debug)]
@@ -45,19 +44,8 @@ impl Default for SubstrFunc {
 
 impl SubstrFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Int64]),
-                    Exact(vec![LargeUtf8, Int64]),
-                    Exact(vec![Utf8, Int64, Int64]),
-                    Exact(vec![LargeUtf8, Int64, Int64]),
-                    Exact(vec![Utf8View, Int64]),
-                    Exact(vec![Utf8View, Int64, Int64]),
-                ],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
             aliases: vec![String::from("substring")],
         }
     }
@@ -91,6 +79,65 @@ impl ScalarUDFImpl for SubstrFunc {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        let first_data_type = match &arg_types[0] {
+            DataType::Null => Ok(DataType::Utf8),
+            DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8 => 
Ok(arg_types[0].clone()),
+            DataType::Dictionary(key_type, value_type) => {
+                if key_type.is_integer() {
+                    match value_type.as_ref() {
+                        DataType::Null => Ok(DataType::Utf8),
+                        DataType::LargeUtf8 | DataType::Utf8View | 
DataType::Utf8 => Ok(*value_type.clone()),
+                        _ => plan_err!(
+                                "The first argument of the {} function can 
only be a string, but got {:?}.",
+                                self.name(),
+                                arg_types[0]
+                        ),
+                    }
+                } else {
+                    plan_err!(
+                        "The first argument of the {} function can only be a 
string, but got {:?}.",
+                        self.name(),
+                        arg_types[0]
+                    )
+                }
+            }
+            _ => plan_err!(
+                "The first argument of the {} function can only be a string, 
but got {:?}.",
+                self.name(),
+                arg_types[0]
+            )
+        }?;
+
+        if ![DataType::Int64, DataType::Int32, 
DataType::Null].contains(&arg_types[1]) {
+            return plan_err!(
+                "The second argument of the {} function can only be an 
integer, but got {:?}.",
+                self.name(),
+                arg_types[1]
+            );
+        }
+
+        if arg_types.len() == 3
+            && ![DataType::Int64, DataType::Int32, 
DataType::Null].contains(&arg_types[2])
+        {
+            return plan_err!(
+                "The third argument of the {} function can only be an integer, 
but got {:?}.",
+                self.name(),
+                arg_types[2]
+            );
+        }
+
+        if arg_types.len() == 2 {
+            Ok(vec![first_data_type.to_owned(), DataType::Int64])
+        } else {
+            Ok(vec![
+                first_data_type.to_owned(),
+                DataType::Int64,
+                DataType::Int64,
+            ])
+        }
+    }
 }
 
 /// Extracts the substring of string starting at the start'th character, and 
extending for count characters if that is specified. (Same as substring(string 
from start for count).)
@@ -394,7 +441,7 @@ where
                             )
                             } else {
                                 if start == i64::MIN {
-                                    return exec_err!("negative overflow when 
calculating skip value")
+                                    return exec_err!("negative overflow when 
calculating skip value");
                                 }
                                 let (start, end) = get_true_start_end(
                                     string,
diff --git a/datafusion/sqllogictest/test_files/encoding.slt 
b/datafusion/sqllogictest/test_files/encoding.slt
index 7a6ac5ca71..68bdf78115 100644
--- a/datafusion/sqllogictest/test_files/encoding.slt
+++ b/datafusion/sqllogictest/test_files/encoding.slt
@@ -28,14 +28,14 @@ CREATE TABLE test(
 ;
 
 # errors
-query error DataFusion error: Error during planning: The encode function can 
only accept utf8 or binary\.
-select encode(12, 'hex')
+query error 1st argument should be Utf8 or Binary or Null, got Int64
+select encode(12, 'hex');
 
 query error DataFusion error: Error during planning: There is no built\-in 
encoding named 'non_encoding', currently supported encodings are: base64, hex
 select encode(bin_field, 'non_encoding') from test;
 
-query error DataFusion error: Error during planning: The decode function can 
only accept utf8 or binary\.
-select decode(12, 'hex')
+query error 1st argument should be Utf8 or Binary or Null, got Int64
+select decode(12, 'hex');
 
 query error DataFusion error: Error during planning: There is no built\-in 
encoding named 'non_encoding', currently supported encodings are: base64, hex
 select decode(hex_field, 'non_encoding') from test;
diff --git a/datafusion/sqllogictest/test_files/functions.slt 
b/datafusion/sqllogictest/test_files/functions.slt
index 228d7ca2f0..e887b1934e 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -442,6 +442,67 @@ SELECT strpos('joséésoj', NULL)
 ----
 NULL
 
+query T
+SELECT substr('alphabet', -3)
+----
+alphabet
+
+query T
+SELECT substr('alphabet', 0)
+----
+alphabet
+
+query T
+SELECT substr('alphabet', 1)
+----
+alphabet
+
+query T
+SELECT substr('alphabet', 2)
+----
+lphabet
+
+query T
+SELECT substr('alphabet', 3)
+----
+phabet
+
+query T
+SELECT substr('alphabet', 30)
+----
+(empty)
+
+query T
+SELECT substr('alphabet', CAST(NULL AS int))
+----
+NULL
+
+query T
+SELECT substr('alphabet', 3, 2)
+----
+ph
+
+query T
+SELECT substr('alphabet', 3, 20)
+----
+phabet
+
+query T
+SELECT substr('alphabet', CAST(NULL AS int), 20)
+----
+NULL
+
+query T
+SELECT substr('alphabet', 3, CAST(NULL AS int))
+----
+NULL
+
+statement error The first argument of the substr function can only be a 
string, but got Int64
+SELECT substr(1, 3)
+
+statement error The first argument of the substr function can only be a 
string, but got Int64
+SELECT substr(1, 3, 4)
+
 query T
 SELECT translate('12345', '143', 'ax')
 ----
diff --git a/datafusion/sqllogictest/test_files/scalar.slt 
b/datafusion/sqllogictest/test_files/scalar.slt
index 6eed72e914..3b9c9a1604 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1907,7 +1907,7 @@ select position('' in '')
 1
 
 
-query error DataFusion error: Execution error: The STRPOS/INSTR/POSITION 
function can only accept strings, but got Int64.
+query error POSITION function can only accept strings
 select position(1 in 1)
 
 
diff --git a/datafusion/sqllogictest/test_files/select.slt 
b/datafusion/sqllogictest/test_files/select.slt
index 3286ae32d4..5df5f313af 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -374,7 +374,7 @@ NULL a
 NULL b
 3 c
 
-query TT
+query ?T
 VALUES (NULL,'a'),(NULL,'b'),(NULL,'c')
 ----
 NULL a
@@ -395,7 +395,7 @@ VALUES (1,NULL),(2,NULL),(3,'c')
 2 NULL
 3 c
 
-query IIIIIIIIIIIIITTR
+query IIIIIIIIIIIII?TR
 VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,NULL,'F',3.5)
 ----
 1 2 3 4 5 6 7 8 9 10 11 12 13 NULL F 3.5
@@ -483,6 +483,21 @@ CREATE TABLE foo2(c1 double, c2 double) AS VALUES
 (2, 5),
 (3, 6);
 
+query T
+SELECT arrow_typeof(COALESCE(column1, column2)) FROM VALUES (null, 1.2);
+----
+Float64
+
+# multiple rows and columns with null need type coercion
+query TTT
+select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) 
from (SELECT column1, column2, column3 FROM VALUES
+(null, 2, 'a'),
+(1.1, null, 'b'),
+(2, 5, null)) LIMIT 1;
+----
+Float64 Int64 Utf8
+
+
 # foo distinct
 query T
 select distinct '1' from foo;
diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt 
b/datafusion/sqllogictest/test_files/string/string_literal.slt
index 7cb921a660..24e03fdb71 100644
--- a/datafusion/sqllogictest/test_files/string/string_literal.slt
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -123,10 +123,10 @@ SELECT substr('Hello🌏世界', 5, 3)
 ----
 o🌏世
 
-statement error The SUBSTR function can only accept strings, but got Int64.
+statement error The first argument of the substr function can only be a 
string, but got Int64
 SELECT substr(1, 3)
 
-statement error The SUBSTR function can only accept strings, but got Int64.
+statement error The first argument of the substr function can only be a 
string, but got Int64
 SELECT substr(1, 3, 4)
 
 statement error Execution error: negative substring length not allowed
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt 
b/datafusion/sqllogictest/test_files/timestamps.slt
index 4b11e338da..7a7a8a8703 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2858,7 +2858,7 @@ statement error
 select to_local_time('2024-04-01T00:00:20Z'::timestamp, 'some string');
 
 # invalid argument data type
-statement error DataFusion error: Execution error: The to_local_time function 
can only accept timestamp as the arg, got Utf8
+statement error The to_local_time function can only accept Timestamp as the 
arg got Utf8
 select to_local_time('2024-04-01T00:00:20Z');
 
 # invalid timezone


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to