This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6686e034dd Use LogicalType for TypeSignature `Numeric` and `String`,
`Coercible` (#13240)
6686e034dd is described below
commit 6686e034dd1008fc7303c482cf664402eca25d67
Author: Jay Zhan <[email protected]>
AuthorDate: Thu Nov 7 07:56:34 2024 +0800
Use LogicalType for TypeSignature `Numeric` and `String`, `Coercible`
(#13240)
* use logical type for signature
Signed-off-by: jayzhan211 <[email protected]>
* fmt & clippy
Signed-off-by: jayzhan211 <[email protected]>
* numeric
Signed-off-by: jayzhan211 <[email protected]>
* fix numeric
Signed-off-by: jayzhan211 <[email protected]>
* deprecate coercible
Signed-off-by: jayzhan211 <[email protected]>
* introduce numeric and numeric string
Signed-off-by: jayzhan211 <[email protected]>
* fix doc
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
* add back coercible
Signed-off-by: jayzhan211 <[email protected]>
* rename
Signed-off-by: jayzhan211 <[email protected]>
* fmt
Signed-off-by: jayzhan211 <[email protected]>
* rm numeric string signature
Signed-off-by: jayzhan211 <[email protected]>
* typo
Signed-off-by: jayzhan211 <[email protected]>
* improve doc and err msg
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
datafusion/common/src/types/logical.rs | 6 +
datafusion/common/src/types/native.rs | 45 ++++++-
datafusion/expr-common/src/signature.rs | 16 ++-
datafusion/expr/src/type_coercion/functions.rs | 149 +++++++++++++--------
datafusion/functions-aggregate/src/first_last.rs | 24 +---
datafusion/functions-aggregate/src/stddev.rs | 5 +-
datafusion/functions-aggregate/src/variance.rs | 5 +-
datafusion/functions/src/math/abs.rs | 2 +-
datafusion/functions/src/string/bit_length.rs | 2 +-
datafusion/functions/src/string/concat.rs | 2 +-
datafusion/functions/src/string/concat_ws.rs | 2 +-
datafusion/functions/src/string/lower.rs | 2 +-
datafusion/functions/src/string/octet_length.rs | 2 +-
datafusion/functions/src/string/repeat.rs | 16 +--
datafusion/functions/src/string/upper.rs | 2 +-
.../functions/src/unicode/character_length.rs | 2 +-
datafusion/functions/src/unicode/lpad.rs | 2 +-
datafusion/sqllogictest/test_files/expr.slt | 10 +-
datafusion/sqllogictest/test_files/math.slt | 9 +-
datafusion/sqllogictest/test_files/scalar.slt | 6 +-
datafusion/sqllogictest/test_files/timestamps.slt | 7 +
21 files changed, 199 insertions(+), 117 deletions(-)
diff --git a/datafusion/common/src/types/logical.rs
b/datafusion/common/src/types/logical.rs
index bde393992a..a65392cae3 100644
--- a/datafusion/common/src/types/logical.rs
+++ b/datafusion/common/src/types/logical.rs
@@ -98,6 +98,12 @@ impl fmt::Debug for dyn LogicalType {
}
}
+impl std::fmt::Display for dyn LogicalType {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{self:?}")
+ }
+}
+
impl PartialEq for dyn LogicalType {
fn eq(&self, other: &Self) -> bool {
self.signature().eq(&other.signature())
diff --git a/datafusion/common/src/types/native.rs
b/datafusion/common/src/types/native.rs
index bfb546783e..7e326dc15b 100644
--- a/datafusion/common/src/types/native.rs
+++ b/datafusion/common/src/types/native.rs
@@ -24,7 +24,7 @@ use arrow::compute::can_cast_types;
use arrow_schema::{
DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit, UnionFields,
};
-use std::sync::Arc;
+use std::{fmt::Display, sync::Arc};
/// Representation of a type that DataFusion can handle natively. It is a
subset
/// of the physical variants in Arrow's native [`DataType`].
@@ -183,6 +183,12 @@ pub enum NativeType {
Map(LogicalFieldRef),
}
+impl Display for NativeType {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "NativeType::{self:?}")
+ }
+}
+
impl LogicalType for NativeType {
fn native(&self) -> &NativeType {
self
@@ -348,6 +354,12 @@ impl LogicalType for NativeType {
// mapping solutions to provide backwards compatibility while transitioning
from
// the purely physical system to a logical / physical system.
+impl From<&DataType> for NativeType {
+ fn from(value: &DataType) -> Self {
+ value.clone().into()
+ }
+}
+
impl From<DataType> for NativeType {
fn from(value: DataType) -> Self {
use NativeType::*;
@@ -392,8 +404,33 @@ impl From<DataType> for NativeType {
}
}
-impl From<&DataType> for NativeType {
- fn from(value: &DataType) -> Self {
- value.clone().into()
+impl NativeType {
+ #[inline]
+ pub fn is_numeric(&self) -> bool {
+ use NativeType::*;
+ matches!(
+ self,
+ UInt8
+ | UInt16
+ | UInt32
+ | UInt64
+ | Int8
+ | Int16
+ | Int32
+ | Int64
+ | Float16
+ | Float32
+ | Float64
+ | Decimal(_, _)
+ )
+ }
+
+ #[inline]
+ pub fn is_integer(&self) -> bool {
+ use NativeType::*;
+ matches!(
+ self,
+ UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64
+ )
}
}
diff --git a/datafusion/expr-common/src/signature.rs
b/datafusion/expr-common/src/signature.rs
index 24cb54f634..6e78f31e6a 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -19,6 +19,7 @@
//! and return types of functions in DataFusion.
use arrow::datatypes::DataType;
+use datafusion_common::types::LogicalTypeRef;
/// Constant that is used as a placeholder for any valid timezone.
/// This is used where a function can accept a timestamp type with any
@@ -106,10 +107,10 @@ pub enum TypeSignature {
/// Exact number of arguments of an exact type
Exact(Vec<DataType>),
/// The number of arguments that can be coerced to in order
- /// For example, `Coercible(vec![DataType::Float64])` accepts
+ /// For example, `Coercible(vec![logical_float64()])` accepts
/// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]`
/// since i32 and f32 can be casted to f64
- Coercible(Vec<DataType>),
+ Coercible(Vec<LogicalTypeRef>),
/// Fixed number of arguments of arbitrary types
/// If a function takes 0 argument, its `TypeSignature` should be `Any(0)`
Any(usize),
@@ -123,7 +124,9 @@ pub enum TypeSignature {
/// Specifies Signatures for array functions
ArraySignature(ArrayFunctionSignature),
/// Fixed number of arguments of numeric types.
- /// See
<https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_numeric>
to know which type is considered numeric
+ /// See [`NativeType::is_numeric`] to know which type is considered numeric
+ ///
+ /// [`NativeType::is_numeric`]: datafusion_common
Numeric(usize),
/// Fixed number of arguments of all the same string types.
/// The precedence of type from high to low is Utf8View, LargeUtf8 and
Utf8.
@@ -201,7 +204,10 @@ impl TypeSignature {
TypeSignature::Numeric(num) => {
vec![format!("Numeric({num})")]
}
- TypeSignature::Exact(types) | TypeSignature::Coercible(types) => {
+ TypeSignature::Coercible(types) => {
+ vec![Self::join_types(types, ", ")]
+ }
+ TypeSignature::Exact(types) => {
vec![Self::join_types(types, ", ")]
}
TypeSignature::Any(arg_count) => {
@@ -322,7 +328,7 @@ impl Signature {
}
}
/// Target coerce types in order
- pub fn coercible(target_types: Vec<DataType>, volatility: Volatility) ->
Self {
+ pub fn coercible(target_types: Vec<LogicalTypeRef>, volatility:
Volatility) -> Self {
Self {
type_signature: TypeSignature::Coercible(target_types),
volatility,
diff --git a/datafusion/expr/src/type_coercion/functions.rs
b/datafusion/expr/src/type_coercion/functions.rs
index 85f8e20ba4..5a4d89a0b2 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -23,6 +23,7 @@ use arrow::{
};
use datafusion_common::{
exec_err, internal_datafusion_err, internal_err, plan_err,
+ types::{LogicalType, NativeType},
utils::{coerced_fixed_size_list_to_list, list_ndims},
Result,
};
@@ -395,40 +396,56 @@ fn get_valid_types(
}
}
+ fn function_length_check(length: usize, expected_length: usize) ->
Result<()> {
+ if length < 1 {
+ return plan_err!(
+ "The signature expected at least one argument but received
{expected_length}"
+ );
+ }
+
+ if length != expected_length {
+ return plan_err!(
+ "The signature expected {length} arguments but received
{expected_length}"
+ );
+ }
+
+ Ok(())
+ }
+
let valid_types = match signature {
TypeSignature::Variadic(valid_types) => valid_types
.iter()
.map(|valid_type| current_types.iter().map(|_|
valid_type.clone()).collect())
.collect(),
TypeSignature::String(number) => {
- if *number < 1 {
- return plan_err!(
- "The signature expected at least one argument but received
{}",
- current_types.len()
- );
- }
- if *number != current_types.len() {
- return plan_err!(
- "The signature expected {} arguments but received {}",
- number,
- current_types.len()
- );
+ function_length_check(current_types.len(), *number)?;
+
+ let mut new_types = Vec::with_capacity(current_types.len());
+ for data_type in current_types.iter() {
+ let logical_data_type: NativeType = data_type.into();
+ if logical_data_type == NativeType::String {
+ new_types.push(data_type.to_owned());
+ } else if logical_data_type == NativeType::Null {
+ // TODO: Switch to Utf8View if all the string functions
supports Utf8View
+ new_types.push(DataType::Utf8);
+ } else {
+ return plan_err!(
+ "The signature expected NativeType::String but
received {logical_data_type}"
+ );
+ }
}
- fn coercion_rule(
+ // Find the common string type for the given types
+ fn find_common_type(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Result<DataType> {
match (lhs_type, rhs_type) {
- (DataType::Null, DataType::Null) => Ok(DataType::Utf8),
- (DataType::Null, data_type) | (data_type, DataType::Null)
=> {
- coercion_rule(data_type, &DataType::Utf8)
- }
(DataType::Dictionary(_, lhs), DataType::Dictionary(_,
rhs)) => {
- coercion_rule(lhs, rhs)
+ find_common_type(lhs, rhs)
}
(DataType::Dictionary(_, v), other)
- | (other, DataType::Dictionary(_, v)) => coercion_rule(v,
other),
+ | (other, DataType::Dictionary(_, v)) =>
find_common_type(v, other),
_ => {
if let Some(coerced_type) = string_coercion(lhs_type,
rhs_type) {
Ok(coerced_type)
@@ -444,15 +461,13 @@ fn get_valid_types(
}
// Length checked above, safe to unwrap
- let mut coerced_type = current_types.first().unwrap().to_owned();
- for t in current_types.iter().skip(1) {
- coerced_type = coercion_rule(&coerced_type, t)?;
+ let mut coerced_type = new_types.first().unwrap().to_owned();
+ for t in new_types.iter().skip(1) {
+ coerced_type = find_common_type(&coerced_type, t)?;
}
fn base_type_or_default_type(data_type: &DataType) -> DataType {
- if data_type.is_null() {
- DataType::Utf8
- } else if let DataType::Dictionary(_, v) = data_type {
+ if let DataType::Dictionary(_, v) = data_type {
base_type_or_default_type(v)
} else {
data_type.to_owned()
@@ -462,22 +477,22 @@ fn get_valid_types(
vec![vec![base_type_or_default_type(&coerced_type); *number]]
}
TypeSignature::Numeric(number) => {
- if *number < 1 {
- return plan_err!(
- "The signature expected at least one argument but received
{}",
- current_types.len()
- );
- }
- if *number != current_types.len() {
- return plan_err!(
- "The signature expected {} arguments but received {}",
- number,
- current_types.len()
- );
- }
+ function_length_check(current_types.len(), *number)?;
- let mut valid_type = current_types.first().unwrap().clone();
+ // Find common numeric type amongs given types except string
+ let mut valid_type = current_types.first().unwrap().to_owned();
for t in current_types.iter().skip(1) {
+ let logical_data_type: NativeType = t.into();
+ if logical_data_type == NativeType::Null {
+ continue;
+ }
+
+ if !logical_data_type.is_numeric() {
+ return plan_err!(
+ "The signature expected NativeType::Numeric but
received {logical_data_type}"
+ );
+ }
+
if let Some(coerced_type) =
binary_numeric_coercion(&valid_type, t) {
valid_type = coerced_type;
} else {
@@ -489,31 +504,55 @@ fn get_valid_types(
}
}
+ let logical_data_type: NativeType = valid_type.clone().into();
+ // Fallback to default type if we don't know which type to coerced
to
+ // f64 is chosen since most of the math functions utilize
Signature::numeric,
+ // and their default type is double precision
+ if logical_data_type == NativeType::Null {
+ valid_type = DataType::Float64;
+ }
+
vec![vec![valid_type; *number]]
}
TypeSignature::Coercible(target_types) => {
- if target_types.is_empty() {
- return plan_err!(
- "The signature expected at least one argument but received
{}",
- current_types.len()
- );
- }
- if target_types.len() != current_types.len() {
- return plan_err!(
- "The signature expected {} arguments but received {}",
- target_types.len(),
- current_types.len()
- );
+ function_length_check(current_types.len(), target_types.len())?;
+
+ // Aim to keep this logic as SIMPLE as possible!
+ // Make sure the corresponding test is covered
+ // If this function becomes COMPLEX, create another new signature!
+ fn can_coerce_to(
+ logical_type: &NativeType,
+ target_type: &NativeType,
+ ) -> bool {
+ if logical_type == target_type {
+ return true;
+ }
+
+ if logical_type == &NativeType::Null {
+ return true;
+ }
+
+ if target_type.is_integer() && logical_type.is_integer() {
+ return true;
+ }
+
+ false
}
- for (data_type, target_type) in
current_types.iter().zip(target_types.iter())
+ let mut new_types = Vec::with_capacity(current_types.len());
+ for (current_type, target_type) in
+ current_types.iter().zip(target_types.iter())
{
- if !can_cast_types(data_type, target_type) {
- return plan_err!("{data_type} is not coercible to
{target_type}");
+ let logical_type: NativeType = current_type.into();
+ let target_logical_type = target_type.native();
+ if can_coerce_to(&logical_type, target_logical_type) {
+ let target_type =
+ target_logical_type.default_cast_for(current_type)?;
+ new_types.push(target_type);
}
}
- vec![target_types.to_owned()]
+ vec![new_types]
}
TypeSignature::Uniform(number, valid_types) => valid_types
.iter()
diff --git a/datafusion/functions-aggregate/src/first_last.rs
b/datafusion/functions-aggregate/src/first_last.rs
index 3ca1422668..493c2cde2b 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -33,8 +33,8 @@ use
datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
use datafusion_expr::{
- Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Documentation, Expr,
- ExprFunctionExt, Signature, SortExpr, TypeSignature, Volatility,
+ Accumulator, AggregateUDFImpl, Documentation, Expr, ExprFunctionExt,
Signature,
+ SortExpr, Volatility,
};
use datafusion_functions_aggregate_common::utils::get_sort_options;
use datafusion_physical_expr_common::sort_expr::LexOrdering;
@@ -79,15 +79,7 @@ impl Default for FirstValue {
impl FirstValue {
pub fn new() -> Self {
Self {
- signature: Signature::one_of(
- vec![
- // TODO: we can introduce more strict signature that only
numeric of array types are allowed
-
TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
- TypeSignature::Numeric(1),
- TypeSignature::Uniform(1, vec![DataType::Utf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::any(1, Volatility::Immutable),
requirement_satisfied: false,
}
}
@@ -406,15 +398,7 @@ impl Default for LastValue {
impl LastValue {
pub fn new() -> Self {
Self {
- signature: Signature::one_of(
- vec![
- // TODO: we can introduce more strict signature that only
numeric of array types are allowed
-
TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
- TypeSignature::Numeric(1),
- TypeSignature::Uniform(1, vec![DataType::Utf8]),
- ],
- Volatility::Immutable,
- ),
+ signature: Signature::any(1, Volatility::Immutable),
requirement_satisfied: false,
}
}
diff --git a/datafusion/functions-aggregate/src/stddev.rs
b/datafusion/functions-aggregate/src/stddev.rs
index b785d8e985..d1f43c6661 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -71,10 +71,7 @@ impl Stddev {
/// Create a new STDDEV aggregate function
pub fn new() -> Self {
Self {
- signature: Signature::coercible(
- vec![DataType::Float64],
- Volatility::Immutable,
- ),
+ signature: Signature::numeric(1, Volatility::Immutable),
alias: vec!["stddev_samp".to_string()],
}
}
diff --git a/datafusion/functions-aggregate/src/variance.rs
b/datafusion/functions-aggregate/src/variance.rs
index 0090886109..8daa85a5cc 100644
--- a/datafusion/functions-aggregate/src/variance.rs
+++ b/datafusion/functions-aggregate/src/variance.rs
@@ -82,10 +82,7 @@ impl VarianceSample {
pub fn new() -> Self {
Self {
aliases: vec![String::from("var_sample"),
String::from("var_samp")],
- signature: Signature::coercible(
- vec![DataType::Float64],
- Volatility::Immutable,
- ),
+ signature: Signature::numeric(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/math/abs.rs
b/datafusion/functions/src/math/abs.rs
index 5511a57d85..798939162a 100644
--- a/datafusion/functions/src/math/abs.rs
+++ b/datafusion/functions/src/math/abs.rs
@@ -117,7 +117,7 @@ impl Default for AbsFunc {
impl AbsFunc {
pub fn new() -> Self {
Self {
- signature: Signature::any(1, Volatility::Immutable),
+ signature: Signature::numeric(1, Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/string/bit_length.rs
b/datafusion/functions/src/string/bit_length.rs
index 25b56341fc..d02c2b6a65 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -79,7 +79,7 @@ impl ScalarUDFImpl for BitLengthFunc {
ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as
i64)),
)),
- _ => unreachable!(),
+ _ => unreachable!("bit length"),
},
}
}
diff --git a/datafusion/functions/src/string/concat.rs
b/datafusion/functions/src/string/concat.rs
index e3834b2918..e429a938b2 100644
--- a/datafusion/functions/src/string/concat.rs
+++ b/datafusion/functions/src/string/concat.rs
@@ -186,7 +186,7 @@ impl ScalarUDFImpl for ConcatFunc {
}
};
}
- _ => unreachable!(),
+ _ => unreachable!("concat"),
}
}
diff --git a/datafusion/functions/src/string/concat_ws.rs
b/datafusion/functions/src/string/concat_ws.rs
index 811939c169..611c48a963 100644
--- a/datafusion/functions/src/string/concat_ws.rs
+++ b/datafusion/functions/src/string/concat_ws.rs
@@ -164,7 +164,7 @@ impl ScalarUDFImpl for ConcatWsFunc {
ColumnarValueRef::NonNullableArray(string_array)
}
}
- _ => unreachable!(),
+ _ => unreachable!("concat ws"),
};
let mut columns = Vec::with_capacity(args.len() - 1);
diff --git a/datafusion/functions/src/string/lower.rs
b/datafusion/functions/src/string/lower.rs
index ef56120c58..02770e5e22 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -108,7 +108,7 @@ mod tests {
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
let result = match func.invoke(&args)? {
ColumnarValue::Array(result) => result,
- _ => unreachable!(),
+ _ => unreachable!("lower"),
};
assert_eq!(&expected, &result);
Ok(())
diff --git a/datafusion/functions/src/string/octet_length.rs
b/datafusion/functions/src/string/octet_length.rs
index 2ac2bf70da..89f71d4571 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -82,7 +82,7 @@ impl ScalarUDFImpl for OctetLengthFunc {
ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
)),
- _ => unreachable!(),
+ _ => unreachable!("OctetLengthFunc"),
},
}
}
diff --git a/datafusion/functions/src/string/repeat.rs
b/datafusion/functions/src/string/repeat.rs
index aa69f9c660..249ce15d6d 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -25,11 +25,12 @@ use arrow::array::{
OffsetSizeTrait, StringViewArray,
};
use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::{Int64, LargeUtf8, Utf8, Utf8View};
+use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
use datafusion_common::cast::as_int64_array;
+use datafusion_common::types::{logical_int64, logical_string};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
-use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
#[derive(Debug)]
@@ -46,15 +47,8 @@ impl Default for RepeatFunc {
impl RepeatFunc {
pub fn new() -> Self {
Self {
- signature: Signature::one_of(
- vec![
- // Planner attempts coercion to the target type starting
with the most preferred candidate.
- // For example, given input `(Utf8View, Int64)`, it first
tries coercing to `(Utf8View, Int64)`.
- // If that fails, it proceeds to `(Utf8, Int64)`.
- TypeSignature::Exact(vec![Utf8View, Int64]),
- TypeSignature::Exact(vec![Utf8, Int64]),
- TypeSignature::Exact(vec![LargeUtf8, Int64]),
- ],
+ signature: Signature::coercible(
+ vec![logical_string(), logical_int64()],
Volatility::Immutable,
),
}
diff --git a/datafusion/functions/src/string/upper.rs
b/datafusion/functions/src/string/upper.rs
index 68a9d60a16..1293e51fa9 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -108,7 +108,7 @@ mod tests {
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
let result = match func.invoke(&args)? {
ColumnarValue::Array(result) => result,
- _ => unreachable!(),
+ _ => unreachable!("upper"),
};
assert_eq!(&expected, &result);
Ok(())
diff --git a/datafusion/functions/src/unicode/character_length.rs
b/datafusion/functions/src/unicode/character_length.rs
index 7858a59664..eca8d3fd49 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -128,7 +128,7 @@ fn character_length(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = args[0].as_string_view();
character_length_general::<Int32Type, _>(string_array)
}
- _ => unreachable!(),
+ _ => unreachable!("CharacterLengthFunc"),
}
}
diff --git a/datafusion/functions/src/unicode/lpad.rs
b/datafusion/functions/src/unicode/lpad.rs
index 767eda203c..a639bcedcd 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -162,7 +162,7 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
length_array,
&args[2],
),
- (_, _) => unreachable!(),
+ (_, _) => unreachable!("lpad"),
}
}
diff --git a/datafusion/sqllogictest/test_files/expr.slt
b/datafusion/sqllogictest/test_files/expr.slt
index 182afff7a6..c653113fd4 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -22,7 +22,7 @@ SELECT true, false, false = false, true = false
true false true false
# test_mathematical_expressions_with_null
-query RRRRRRRRRRRRRRRRRR?RRRRRIIIRRRRRRBB
+query RRRRRRRRRRRRRRRRRRRRRRRRIIIRRRRRRBB
SELECT
sqrt(NULL),
cbrt(NULL),
@@ -550,6 +550,14 @@ SELECT repeat(NULL, 4)
----
NULL
+query T
+select repeat('-1.2', arrow_cast(3, 'Int32'));
+----
+-1.2-1.2-1.2
+
+query error DataFusion error: Error during planning: Error during planning:
Coercion from \[Utf8, Float64\] to the signature
+select repeat('-1.2', 3.2);
+
query T
SELECT replace('abcdefabcdef', 'cd', 'XX')
----
diff --git a/datafusion/sqllogictest/test_files/math.slt
b/datafusion/sqllogictest/test_files/math.slt
index 1bc972a3e3..e86d78a623 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -126,9 +126,16 @@ statement error
SELECT abs(1, 2);
# abs: unsupported argument type
-query error DataFusion error: This feature is not implemented: Unsupported
data type Utf8 for function abs
+query error This feature is not implemented: Unsupported data type Utf8 for
function abs
SELECT abs('foo');
+# abs: numeric string
+# TODO: In Postgres, '-1.2' is unknown type and interpreted to float8 so they
don't fail on this query
+query error DataFusion error: This feature is not implemented: Unsupported
data type Utf8 for function abs
+select abs('-1.2');
+
+query error DataFusion error: This feature is not implemented: Unsupported
data type Utf8 for function abs
+select abs(arrow_cast('-1.2', 'Utf8'));
statement ok
CREATE TABLE test_nullable_integer(
diff --git a/datafusion/sqllogictest/test_files/scalar.slt
b/datafusion/sqllogictest/test_files/scalar.slt
index 145172f31f..fe7d1a90c5 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -67,13 +67,13 @@ CREATE TABLE small_floats(
## abs
# abs scalar function
-query III rowsort
+query III
select abs(64), abs(0), abs(-64);
----
64 0 64
# abs scalar nulls
-query ? rowsort
+query R
select abs(null);
----
NULL
@@ -1940,7 +1940,7 @@ select position('' in '')
----
1
-query error DataFusion error: Error during planning: Error during planning:
Int64 and Int64 are not coercible to a common string
+query error DataFusion error: Error during planning: Error during planning:
The signature expected NativeType::String but received NativeType::Int64
select position(1 in 1)
query I
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index 70f7dedeac..a80036df2c 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -3351,3 +3351,10 @@ drop view t_utc;
statement ok
drop view t_europe;
+
+# TODO: In Postgres, '-1' is unknown type and interpreted to float8 so they
don't fail on this query
+query error DataFusion error: Arrow error: Parser error: Error parsing
timestamp from '\-1': timestamp must contain at least 10 characters
+select to_timestamp('-1');
+
+query error DataFusion error: Arrow error: Parser error: Error parsing
timestamp from '\-1': timestamp must contain at least 10 characters
+select to_timestamp(arrow_cast('-1', 'Utf8'));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]