This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1b3608da7c fix: coalesce schema issues (#12308)
1b3608da7c is described below
commit 1b3608da7ca59d8d987804834d004e8b3e349d18
Author: Daniel Mesejo <[email protected]>
AuthorDate: Fri Sep 27 04:42:51 2024 +0200
fix: coalesce schema issues (#12308)
closes #12307
---
datafusion/core/src/dataframe/mod.rs | 37 ++++++++
datafusion/expr/src/expr_schema.rs | 23 ++---
datafusion/expr/src/logical_plan/builder.rs | 4 +-
datafusion/functions/src/core/coalesce.rs | 30 +++++-
datafusion/functions/src/datetime/to_local_time.rs | 57 ++++++------
datafusion/functions/src/encoding/inner.rs | 103 ++++++++++++---------
datafusion/functions/src/unicode/strpos.rs | 36 ++++---
datafusion/functions/src/unicode/substr.rs | 77 ++++++++++++---
datafusion/sqllogictest/test_files/encoding.slt | 8 +-
datafusion/sqllogictest/test_files/functions.slt | 61 ++++++++++++
datafusion/sqllogictest/test_files/scalar.slt | 2 +-
datafusion/sqllogictest/test_files/select.slt | 19 +++-
.../test_files/string/string_literal.slt | 4 +-
datafusion/sqllogictest/test_files/timestamps.slt | 2 +-
14 files changed, 335 insertions(+), 128 deletions(-)
diff --git a/datafusion/core/src/dataframe/mod.rs
b/datafusion/core/src/dataframe/mod.rs
index b52dcaa2f7..72b763ce0f 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -2029,6 +2029,43 @@ mod tests {
Ok(())
}
+ #[tokio::test]
+ async fn test_coalesce_schema() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let query = r#"SELECT COALESCE(null, 5)"#;
+
+ let result = ctx.sql(query).await?;
+ assert_logical_expr_schema_eq_physical_expr_schema(result).await?;
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn test_coalesce_from_values_schema() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let query = r#"SELECT COALESCE(column1, column2) FROM VALUES (null,
1.2)"#;
+
+ let result = ctx.sql(query).await?;
+ assert_logical_expr_schema_eq_physical_expr_schema(result).await?;
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn test_coalesce_from_values_schema_multiple_rows() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ let query = r#"SELECT COALESCE(column1, column2)
+ FROM VALUES
+ (null, 1.2),
+ (1.1, null),
+ (2, 5);"#;
+
+ let result = ctx.sql(query).await?;
+ assert_logical_expr_schema_eq_physical_expr_schema(result).await?;
+ Ok(())
+ }
+
#[tokio::test]
async fn test_array_agg_schema() -> Result<()> {
let ctx = SessionContext::new();
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index f40ac409dd..ad617c53d6 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -151,21 +151,22 @@ impl ExprSchemable for Expr {
.collect::<Result<Vec<_>>>()?;
// verify that function is invoked with correct number and
type of arguments as defined in `TypeSignature`
- data_types_with_scalar_udf(&arg_data_types,
func).map_err(|err| {
- plan_datafusion_err!(
- "{} {}",
- err,
- utils::generate_signature_error_msg(
- func.name(),
- func.signature().clone(),
- &arg_data_types,
+ let new_data_types =
data_types_with_scalar_udf(&arg_data_types, func)
+ .map_err(|err| {
+ plan_datafusion_err!(
+ "{} {}",
+ err,
+ utils::generate_signature_error_msg(
+ func.name(),
+ func.signature().clone(),
+ &arg_data_types,
+ )
)
- )
- })?;
+ })?;
// perform additional function arguments validation (due to
limited
// expressiveness of `TypeSignature`), then infer return type
- Ok(func.return_type_from_exprs(args, schema, &arg_data_types)?)
+ Ok(func.return_type_from_exprs(args, schema, &new_data_types)?)
}
Expr::WindowFunction(window_function) => self
.data_type_and_nullable_with_window_function(schema,
window_function)
diff --git a/datafusion/expr/src/logical_plan/builder.rs
b/datafusion/expr/src/logical_plan/builder.rs
index c38ce2d02f..dba7c2e983 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -216,7 +216,9 @@ impl LogicalPlanBuilder {
common_type = Some(data_type);
}
}
- field_types.push(common_type.unwrap_or(DataType::Utf8));
+ // assuming common_type was not set, and no error, therefore the
type should be NULL
+ // since the code loop skips NULL
+ field_types.push(common_type.unwrap_or(DataType::Null));
}
// wrap cast if data type is not same as common type.
for row in &mut values {
diff --git a/datafusion/functions/src/core/coalesce.rs
b/datafusion/functions/src/core/coalesce.rs
index 19db58c181..2fa6d7c197 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -21,11 +21,11 @@ use arrow::array::{new_null_array, BooleanArray};
use arrow::compute::kernels::zip::zip;
use arrow::compute::{and, is_not_null, is_null};
use arrow::datatypes::DataType;
-
use datafusion_common::{exec_err, ExprSchema, Result};
use datafusion_expr::type_coercion::binary::type_union_resolution;
use datafusion_expr::{ColumnarValue, Expr, ExprSchemable};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use itertools::Itertools;
#[derive(Debug)]
pub struct CoalesceFunc {
@@ -60,12 +60,16 @@ impl ScalarUDFImpl for CoalesceFunc {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- Ok(arg_types[0].clone())
+ Ok(arg_types
+ .iter()
+ .find_or_first(|d| !d.is_null())
+ .unwrap()
+ .clone())
}
- // If all the element in coalesce is non-null, the result is non-null
+ // If any the arguments in coalesce is non-null, the result is non-null
fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
- args.iter().any(|e| e.nullable(schema).ok().unwrap_or(true))
+ args.iter().all(|e| e.nullable(schema).ok().unwrap_or(true))
}
/// coalesce evaluates to the first value which is not NULL
@@ -154,4 +158,22 @@ mod test {
.unwrap();
assert_eq!(return_type, DataType::Date32);
}
+
+ #[test]
+ fn test_coalesce_return_types_with_nulls_first() {
+ let coalesce = core::coalesce::CoalesceFunc::new();
+ let return_type = coalesce
+ .return_type(&[DataType::Null, DataType::Date32])
+ .unwrap();
+ assert_eq!(return_type, DataType::Date32);
+ }
+
+ #[test]
+ fn test_coalesce_return_types_with_nulls_last() {
+ let coalesce = core::coalesce::CoalesceFunc::new();
+ let return_type = coalesce
+ .return_type(&[DataType::Int64, DataType::Null])
+ .unwrap();
+ assert_eq!(return_type, DataType::Int64);
+ }
}
diff --git a/datafusion/functions/src/datetime/to_local_time.rs
b/datafusion/functions/src/datetime/to_local_time.rs
index 634e28e6f3..0e33da1454 100644
--- a/datafusion/functions/src/datetime/to_local_time.rs
+++ b/datafusion/functions/src/datetime/to_local_time.rs
@@ -22,22 +22,16 @@ use std::sync::Arc;
use arrow::array::timezone::Tz;
use arrow::array::{Array, ArrayRef, PrimitiveBuilder};
use arrow::datatypes::DataType::Timestamp;
+use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{
ArrowTimestampType, DataType, TimestampMicrosecondType,
TimestampMillisecondType,
TimestampNanosecondType, TimestampSecondType,
};
-use arrow::datatypes::{
- TimeUnit,
- TimeUnit::{Microsecond, Millisecond, Nanosecond, Second},
-};
use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
use datafusion_common::cast::as_primitive_array;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
-use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{
- ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
-};
+use datafusion_common::{exec_err, plan_err, DataFusionError, Result,
ScalarValue};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
/// A UDF function that converts a timezone-aware timestamp to local time
(with no offset or
/// timezone information). In other words, this function strips off the
timezone from the timestamp,
@@ -55,20 +49,8 @@ impl Default for ToLocalTimeFunc {
impl ToLocalTimeFunc {
pub fn new() -> Self {
- let base_sig = |array_type: TimeUnit| {
- [
- Exact(vec![Timestamp(array_type, None)]),
- Exact(vec![Timestamp(array_type,
Some(TIMEZONE_WILDCARD.into()))]),
- ]
- };
-
- let full_sig = [Nanosecond, Microsecond, Millisecond, Second]
- .into_iter()
- .flat_map(base_sig)
- .collect::<Vec<_>>();
-
Self {
- signature: Signature::one_of(full_sig, Volatility::Immutable),
+ signature: Signature::user_defined(Volatility::Immutable),
}
}
@@ -328,13 +310,10 @@ impl ScalarUDFImpl for ToLocalTimeFunc {
}
match &arg_types[0] {
- Timestamp(Nanosecond, _) => Ok(Timestamp(Nanosecond, None)),
- Timestamp(Microsecond, _) => Ok(Timestamp(Microsecond, None)),
- Timestamp(Millisecond, _) => Ok(Timestamp(Millisecond, None)),
- Timestamp(Second, _) => Ok(Timestamp(Second, None)),
+ Timestamp(timeunit, _) => Ok(Timestamp(*timeunit, None)),
_ => exec_err!(
"The to_local_time function can only accept timestamp as the
arg, got {:?}", arg_types[0]
- ),
+ )
}
}
@@ -348,6 +327,30 @@ impl ScalarUDFImpl for ToLocalTimeFunc {
self.to_local_time(args)
}
+
+ fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+ if arg_types.len() != 1 {
+ return plan_err!(
+ "to_local_time function requires 1 argument, got {:?}",
+ arg_types.len()
+ );
+ }
+
+ let first_arg = arg_types[0].clone();
+ match &first_arg {
+ Timestamp(Nanosecond, timezone) => {
+ Ok(vec![Timestamp(Nanosecond, timezone.clone())])
+ }
+ Timestamp(Microsecond, timezone) => {
+ Ok(vec![Timestamp(Microsecond, timezone.clone())])
+ }
+ Timestamp(Millisecond, timezone) => {
+ Ok(vec![Timestamp(Millisecond, timezone.clone())])
+ }
+ Timestamp(Second, timezone) => Ok(vec![Timestamp(Second,
timezone.clone())]),
+ _ => plan_err!("The to_local_time function can only accept
Timestamp as the arg got {first_arg}"),
+ }
+ }
}
#[cfg(test)]
diff --git a/datafusion/functions/src/encoding/inner.rs
b/datafusion/functions/src/encoding/inner.rs
index d9ce299a26..5b80c908cf 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -32,7 +32,6 @@ use datafusion_expr::ColumnarValue;
use std::sync::Arc;
use std::{fmt, str::FromStr};
-use datafusion_expr::TypeSignature::*;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
@@ -49,17 +48,8 @@ impl Default for EncodeFunc {
impl EncodeFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![Binary, Utf8]),
- Exact(vec![LargeBinary, Utf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::user_defined(Volatility::Immutable),
}
}
}
@@ -77,23 +67,39 @@ impl ScalarUDFImpl for EncodeFunc {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
-
- Ok(match arg_types[0] {
- Utf8 => Utf8,
- LargeUtf8 => LargeUtf8,
- Binary => Utf8,
- LargeBinary => LargeUtf8,
- Null => Null,
- _ => {
- return plan_err!("The encode function can only accept utf8 or
binary.");
- }
- })
+ Ok(arg_types[0].to_owned())
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
encode(args)
}
+
+ fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+ if arg_types.len() != 2 {
+ return plan_err!(
+ "{} expects to get 2 arguments, but got {}",
+ self.name(),
+ arg_types.len()
+ );
+ }
+
+ if arg_types[1] != DataType::Utf8 {
+ return Err(DataFusionError::Plan("2nd argument should be
Utf8".into()));
+ }
+
+ match arg_types[0] {
+ DataType::Utf8 | DataType::Binary | DataType::Null => {
+ Ok(vec![DataType::Utf8; 2])
+ }
+ DataType::LargeUtf8 | DataType::LargeBinary => {
+ Ok(vec![DataType::LargeUtf8, DataType::Utf8])
+ }
+ _ => plan_err!(
+ "1st argument should be Utf8 or Binary or Null, got {:?}",
+ arg_types[0]
+ ),
+ }
+ }
}
#[derive(Debug)]
@@ -109,17 +115,8 @@ impl Default for DecodeFunc {
impl DecodeFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![Binary, Utf8]),
- Exact(vec![LargeBinary, Utf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::user_defined(Volatility::Immutable),
}
}
}
@@ -137,23 +134,39 @@ impl ScalarUDFImpl for DecodeFunc {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
-
- Ok(match arg_types[0] {
- Utf8 => Binary,
- LargeUtf8 => LargeBinary,
- Binary => Binary,
- LargeBinary => LargeBinary,
- Null => Null,
- _ => {
- return plan_err!("The decode function can only accept utf8 or
binary.");
- }
- })
+ Ok(arg_types[0].to_owned())
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
decode(args)
}
+
+ fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+ if arg_types.len() != 2 {
+ return plan_err!(
+ "{} expects to get 2 arguments, but got {}",
+ self.name(),
+ arg_types.len()
+ );
+ }
+
+ if arg_types[1] != DataType::Utf8 {
+ return plan_err!("2nd argument should be Utf8");
+ }
+
+ match arg_types[0] {
+ DataType::Utf8 | DataType::Binary | DataType::Null => {
+ Ok(vec![DataType::Binary, DataType::Utf8])
+ }
+ DataType::LargeUtf8 | DataType::LargeBinary => {
+ Ok(vec![DataType::LargeBinary, DataType::Utf8])
+ }
+ _ => plan_err!(
+ "1st argument should be Utf8 or Binary or Null, got {:?}",
+ arg_types[0]
+ ),
+ }
+ }
}
#[derive(Debug, Copy, Clone)]
diff --git a/datafusion/functions/src/unicode/strpos.rs
b/datafusion/functions/src/unicode/strpos.rs
index 6da67c8a27..3879f779eb 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -23,8 +23,7 @@ use arrow::datatypes::{ArrowNativeType, DataType, Int32Type,
Int64Type};
use crate::string::common::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::TypeSignature::Exact;
+use datafusion_common::{exec_err, plan_err, Result};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
#[derive(Debug)]
@@ -41,20 +40,8 @@ impl Default for StrposFunc {
impl StrposFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![Utf8, LargeUtf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![LargeUtf8, LargeUtf8]),
- Exact(vec![Utf8View, Utf8View]),
- Exact(vec![Utf8View, Utf8]),
- Exact(vec![Utf8View, LargeUtf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::user_defined(Volatility::Immutable),
aliases: vec![String::from("instr"), String::from("position")],
}
}
@@ -84,6 +71,25 @@ impl ScalarUDFImpl for StrposFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
+
+ fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+ match arg_types {
+ [first, second ] => {
+ match (first, second) {
+ (DataType::LargeUtf8 | DataType::Utf8View |
DataType::Utf8, DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8) =>
Ok(arg_types.to_vec()),
+ (DataType::Null, DataType::Null) =>
Ok(vec![DataType::Utf8, DataType::Utf8]),
+ (DataType::Null, _) => Ok(vec![DataType::Utf8,
second.to_owned()]),
+ (_, DataType::Null) => Ok(vec![first.to_owned(),
DataType::Utf8]),
+ (DataType::Dictionary(_, value_type), DataType::LargeUtf8
| DataType::Utf8View | DataType::Utf8) => match **value_type {
+ DataType::LargeUtf8 | DataType::Utf8View |
DataType::Utf8 | DataType::Null | DataType::Binary =>
Ok(vec![*value_type.clone(), second.to_owned()]),
+ _ => plan_err!("The STRPOS/INSTR/POSITION function can
only accept strings, but got {:?}.", **value_type),
+ },
+ _ => plan_err!("The STRPOS/INSTR/POSITION function can
only accept strings, but got {:?}.", arg_types)
+ }
+ },
+ _ => plan_err!("The STRPOS/INSTR/POSITION function can only accept
strings, but got {:?}", arg_types)
+ }
+ }
}
fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/functions/src/unicode/substr.rs
b/datafusion/functions/src/unicode/substr.rs
index 934fa4bfe1..205de0b30b 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -27,8 +27,7 @@ use arrow::array::{
use arrow::datatypes::DataType;
use arrow_buffer::{NullBufferBuilder, ScalarBuffer};
use datafusion_common::cast::as_int64_array;
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::TypeSignature::Exact;
+use datafusion_common::{exec_err, plan_err, Result};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
#[derive(Debug)]
@@ -45,19 +44,8 @@ impl Default for SubstrFunc {
impl SubstrFunc {
pub fn new() -> Self {
- use DataType::*;
Self {
- signature: Signature::one_of(
- vec![
- Exact(vec![Utf8, Int64]),
- Exact(vec![LargeUtf8, Int64]),
- Exact(vec![Utf8, Int64, Int64]),
- Exact(vec![LargeUtf8, Int64, Int64]),
- Exact(vec![Utf8View, Int64]),
- Exact(vec![Utf8View, Int64, Int64]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::user_defined(Volatility::Immutable),
aliases: vec![String::from("substring")],
}
}
@@ -91,6 +79,65 @@ impl ScalarUDFImpl for SubstrFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
+
+ fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+ let first_data_type = match &arg_types[0] {
+ DataType::Null => Ok(DataType::Utf8),
+ DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8 =>
Ok(arg_types[0].clone()),
+ DataType::Dictionary(key_type, value_type) => {
+ if key_type.is_integer() {
+ match value_type.as_ref() {
+ DataType::Null => Ok(DataType::Utf8),
+ DataType::LargeUtf8 | DataType::Utf8View |
DataType::Utf8 => Ok(*value_type.clone()),
+ _ => plan_err!(
+ "The first argument of the {} function can
only be a string, but got {:?}.",
+ self.name(),
+ arg_types[0]
+ ),
+ }
+ } else {
+ plan_err!(
+ "The first argument of the {} function can only be a
string, but got {:?}.",
+ self.name(),
+ arg_types[0]
+ )
+ }
+ }
+ _ => plan_err!(
+ "The first argument of the {} function can only be a string,
but got {:?}.",
+ self.name(),
+ arg_types[0]
+ )
+ }?;
+
+ if ![DataType::Int64, DataType::Int32,
DataType::Null].contains(&arg_types[1]) {
+ return plan_err!(
+ "The second argument of the {} function can only be an
integer, but got {:?}.",
+ self.name(),
+ arg_types[1]
+ );
+ }
+
+ if arg_types.len() == 3
+ && ![DataType::Int64, DataType::Int32,
DataType::Null].contains(&arg_types[2])
+ {
+ return plan_err!(
+ "The third argument of the {} function can only be an integer,
but got {:?}.",
+ self.name(),
+ arg_types[2]
+ );
+ }
+
+ if arg_types.len() == 2 {
+ Ok(vec![first_data_type.to_owned(), DataType::Int64])
+ } else {
+ Ok(vec![
+ first_data_type.to_owned(),
+ DataType::Int64,
+ DataType::Int64,
+ ])
+ }
+ }
}
/// Extracts the substring of string starting at the start'th character, and
extending for count characters if that is specified. (Same as substring(string
from start for count).)
@@ -394,7 +441,7 @@ where
)
} else {
if start == i64::MIN {
- return exec_err!("negative overflow when
calculating skip value")
+ return exec_err!("negative overflow when
calculating skip value");
}
let (start, end) = get_true_start_end(
string,
diff --git a/datafusion/sqllogictest/test_files/encoding.slt
b/datafusion/sqllogictest/test_files/encoding.slt
index 7a6ac5ca71..68bdf78115 100644
--- a/datafusion/sqllogictest/test_files/encoding.slt
+++ b/datafusion/sqllogictest/test_files/encoding.slt
@@ -28,14 +28,14 @@ CREATE TABLE test(
;
# errors
-query error DataFusion error: Error during planning: The encode function can
only accept utf8 or binary\.
-select encode(12, 'hex')
+query error 1st argument should be Utf8 or Binary or Null, got Int64
+select encode(12, 'hex');
query error DataFusion error: Error during planning: There is no built\-in
encoding named 'non_encoding', currently supported encodings are: base64, hex
select encode(bin_field, 'non_encoding') from test;
-query error DataFusion error: Error during planning: The decode function can
only accept utf8 or binary\.
-select decode(12, 'hex')
+query error 1st argument should be Utf8 or Binary or Null, got Int64
+select decode(12, 'hex');
query error DataFusion error: Error during planning: There is no built\-in
encoding named 'non_encoding', currently supported encodings are: base64, hex
select decode(hex_field, 'non_encoding') from test;
diff --git a/datafusion/sqllogictest/test_files/functions.slt
b/datafusion/sqllogictest/test_files/functions.slt
index 228d7ca2f0..e887b1934e 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -442,6 +442,67 @@ SELECT strpos('joséésoj', NULL)
----
NULL
+query T
+SELECT substr('alphabet', -3)
+----
+alphabet
+
+query T
+SELECT substr('alphabet', 0)
+----
+alphabet
+
+query T
+SELECT substr('alphabet', 1)
+----
+alphabet
+
+query T
+SELECT substr('alphabet', 2)
+----
+lphabet
+
+query T
+SELECT substr('alphabet', 3)
+----
+phabet
+
+query T
+SELECT substr('alphabet', 30)
+----
+(empty)
+
+query T
+SELECT substr('alphabet', CAST(NULL AS int))
+----
+NULL
+
+query T
+SELECT substr('alphabet', 3, 2)
+----
+ph
+
+query T
+SELECT substr('alphabet', 3, 20)
+----
+phabet
+
+query T
+SELECT substr('alphabet', CAST(NULL AS int), 20)
+----
+NULL
+
+query T
+SELECT substr('alphabet', 3, CAST(NULL AS int))
+----
+NULL
+
+statement error The first argument of the substr function can only be a
string, but got Int64
+SELECT substr(1, 3)
+
+statement error The first argument of the substr function can only be a
string, but got Int64
+SELECT substr(1, 3, 4)
+
query T
SELECT translate('12345', '143', 'ax')
----
diff --git a/datafusion/sqllogictest/test_files/scalar.slt
b/datafusion/sqllogictest/test_files/scalar.slt
index 6eed72e914..3b9c9a1604 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1907,7 +1907,7 @@ select position('' in '')
1
-query error DataFusion error: Execution error: The STRPOS/INSTR/POSITION
function can only accept strings, but got Int64.
+query error POSITION function can only accept strings
select position(1 in 1)
diff --git a/datafusion/sqllogictest/test_files/select.slt
b/datafusion/sqllogictest/test_files/select.slt
index 3286ae32d4..5df5f313af 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -374,7 +374,7 @@ NULL a
NULL b
3 c
-query TT
+query ?T
VALUES (NULL,'a'),(NULL,'b'),(NULL,'c')
----
NULL a
@@ -395,7 +395,7 @@ VALUES (1,NULL),(2,NULL),(3,'c')
2 NULL
3 c
-query IIIIIIIIIIIIITTR
+query IIIIIIIIIIIII?TR
VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,NULL,'F',3.5)
----
1 2 3 4 5 6 7 8 9 10 11 12 13 NULL F 3.5
@@ -483,6 +483,21 @@ CREATE TABLE foo2(c1 double, c2 double) AS VALUES
(2, 5),
(3, 6);
+query T
+SELECT arrow_typeof(COALESCE(column1, column2)) FROM VALUES (null, 1.2);
+----
+Float64
+
+# multiple rows and columns with null need type coercion
+query TTT
+select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3)
from (SELECT column1, column2, column3 FROM VALUES
+(null, 2, 'a'),
+(1.1, null, 'b'),
+(2, 5, null)) LIMIT 1;
+----
+Float64 Int64 Utf8
+
+
# foo distinct
query T
select distinct '1' from foo;
diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt
b/datafusion/sqllogictest/test_files/string/string_literal.slt
index 7cb921a660..24e03fdb71 100644
--- a/datafusion/sqllogictest/test_files/string/string_literal.slt
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -123,10 +123,10 @@ SELECT substr('Hello🌏世界', 5, 3)
----
o🌏世
-statement error The SUBSTR function can only accept strings, but got Int64.
+statement error The first argument of the substr function can only be a
string, but got Int64
SELECT substr(1, 3)
-statement error The SUBSTR function can only accept strings, but got Int64.
+statement error The first argument of the substr function can only be a
string, but got Int64
SELECT substr(1, 3, 4)
statement error Execution error: negative substring length not allowed
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index 4b11e338da..7a7a8a8703 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2858,7 +2858,7 @@ statement error
select to_local_time('2024-04-01T00:00:20Z'::timestamp, 'some string');
# invalid argument data type
-statement error DataFusion error: Execution error: The to_local_time function
can only accept timestamp as the arg, got Utf8
+statement error The to_local_time function can only accept Timestamp as the
arg got Utf8
select to_local_time('2024-04-01T00:00:20Z');
# invalid timezone
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]