EeshanBembi commented on code in PR #18137:
URL: https://github.com/apache/datafusion/pull/18137#discussion_r2658921965
##########
datafusion/functions/src/string/concat.rs:
##########
@@ -88,37 +145,91 @@ impl ScalarUDFImpl for ConcatFunc {
&self.signature
}
+ fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+ use DataType::*;
+
+ if arg_types.is_empty() {
+ return plan_err!("concat requires at least one argument");
+ }
+
+ let has_arrays = arg_types
+ .iter()
+ .any(|dt| matches!(dt, List(_) | LargeList(_) | FixedSizeList(_,
_)));
+ let has_non_arrays = arg_types
+ .iter()
+ .any(|dt| !matches!(dt, List(_) | LargeList(_) | FixedSizeList(_,
_) | Null));
+
+ if has_arrays && has_non_arrays {
+ return plan_err!(
+ "Cannot mix array and non-array arguments in concat function. \
+ Use concat(array1, array2, ...) for arrays or concat(str1,
str2, ...) for strings, but not both."
+ );
+ }
+
+ if has_arrays {
+ return Ok(arg_types.to_vec());
+ }
+
+ let target_type = self.get_string_type_precedence(arg_types);
+
+ // Only coerce types that need coercion, keep string types as-is
+ let coerced_types = arg_types
+ .iter()
+ .map(|data_type| match data_type {
+ Utf8View | Utf8 | LargeUtf8 => data_type.clone(),
+ _ => target_type.clone(),
+ })
+ .collect();
+ Ok(coerced_types)
+ }
+
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
- let mut dt = &Utf8;
- arg_types.iter().for_each(|data_type| {
- if data_type == &Utf8View {
- dt = data_type;
- }
- if data_type == &LargeUtf8 && dt != &Utf8View {
- dt = data_type;
+
+ // Check if any argument is an array type
+ for data_type in arg_types {
+ if let List(field) | LargeList(field) | FixedSizeList(field, _) =
data_type {
+ return Ok(List(Arc::new(arrow::datatypes::Field::new(
+ "item",
+ field.data_type().clone(),
+ true,
+ ))));
}
- });
+ }
- Ok(dt.to_owned())
+ // For non-array arguments, return string type based on precedence
+ let dt = self.get_string_type_precedence(arg_types);
+ Ok(dt)
}
/// Concatenates the text representations of all the arguments. NULL
arguments are ignored.
/// concat('abcde', 2, NULL, 22) = 'abcde222'
fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
+ use DataType::*;
let ScalarFunctionArgs { args, .. } = args;
- let mut return_datatype = DataType::Utf8;
- args.iter().for_each(|col| {
- if col.data_type() == DataType::Utf8View {
- return_datatype = col.data_type();
- }
- if col.data_type() == DataType::LargeUtf8
- && return_datatype != DataType::Utf8View
- {
- return_datatype = col.data_type();
+ if args.is_empty() {
+ return plan_err!("concat requires at least one argument");
+ }
+
+ for arg in &args {
+ let is_array = match arg {
+ ColumnarValue::Array(array) => matches!(
+ array.data_type(),
+ List(_) | LargeList(_) | FixedSizeList(_, _)
+ ),
+ ColumnarValue::Scalar(scalar) => matches!(
+ scalar.data_type(),
Review Comment:
Applied the same simplification to the invoke logic, now only checking
args[0] instead of iterating through all arguments.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]