alamb commented on code in PR #6662:
URL: https://github.com/apache/arrow-datafusion/pull/6662#discussion_r1237411053


##########
datafusion/expr/src/built_in_function.rs:
##########
@@ -440,46 +440,44 @@ impl BuiltinScalarFunction {
         // the return type of the built in function.
         // Some built-in functions' return type depends on the incoming type.
         match self {
-            BuiltinScalarFunction::ArrayAppend => match &input_expr_types[0] {
-                List(field) => Ok(List(Arc::new(Field::new(
-                    "item",
-                    field.data_type().clone(),
-                    true,
-                )))),
-                _ => Err(DataFusionError::Internal(format!(
-                    "The {self} function can only accept list as the first 
argument"
-                ))),
-            },
-            BuiltinScalarFunction::ArrayConcat => match &input_expr_types[0] {
-                List(field) => Ok(List(Arc::new(Field::new(
-                    "item",
-                    field.data_type().clone(),
-                    true,
-                )))),
-                _ => Err(DataFusionError::Internal(format!(
-                    "The {self} function can only accept fixed size list as 
the args."
-                ))),
-            },
-            BuiltinScalarFunction::ArrayDims => Ok(UInt8),
-            BuiltinScalarFunction::ArrayFill => Ok(List(Arc::new(Field::new(
+            BuiltinScalarFunction::ArrayAppend => Ok(List(Arc::new(Field::new(
                 "item",
-                input_expr_types[0].clone(),
+                input_expr_types[1].clone(),
                 true,
             )))),
+            BuiltinScalarFunction::ArrayConcat => {
+                let mut expr_type = Null;
+                for input_expr_type in input_expr_types {
+                    match input_expr_type {
+                        List(field) => {
+                            if !field.data_type().equals_datatype(&Null) {
+                                expr_type = field.data_type().clone();
+                                break;
+                            }
+                        }
+                        _ => {
+                            return Err(DataFusionError::Internal(format!(
+                                "The {self} function can only accept list as 
the args."
+                            )))
+                        }
+                    }
+                }
+
+                Ok(List(Arc::new(Field::new("item", expr_type, true))))
+            }
+            BuiltinScalarFunction::ArrayDims => Ok(UInt8),
+            BuiltinScalarFunction::ArrayFill => Ok(Null),

Review Comment:
   I don't understand why ArrayFill always returns `Null` as its data type
   
   My reading of https://www.postgresql.org/docs/9.1/functions-array.html 
suggests that it should return something like `List(args[0].type)`
   
   



##########
datafusion/optimizer/src/analyzer/type_coercion.rs:
##########
@@ -635,6 +635,20 @@ fn cast_expr(expr: &Expr, to_type: &DataType, schema: 
&DFSchema) -> Result<Expr>
     expr.clone().cast_to(to_type, schema)
 }
 
+/// Cast array `expr` to the specified type, if possible
+fn cast_array_expr(
+    expr: &Expr,
+    from_type: &DataType,
+    to_type: &DataType,
+    schema: &DFSchema,
+) -> Result<Expr> {
+    if from_type.equals_datatype(&DataType::Null) {

Review Comment:
   I would have thought we should be casting all the arguments that are `null` 
to the specific type of the rest of the arguments...



##########
datafusion/core/tests/sqllogictests/test_files/array.slt:
##########
@@ -99,9 +116,9 @@ select array_prepend(1, make_array(2, 3, 4)), 
array_prepend(1.0, make_array(2.0,
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
 # array_fill scalar function #1
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
+query error DataFusion error: SQL error: TokenizerError\("Unterminated string 
literal at Line: 2, Column 856"\)
 caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\)
+Internal error: Optimizer rule 'simplify_expressions' failed, due to generate 
a different schema, original schema: DFSchema \{ fields: \[DFField \{ 
qualifier: None, field: Field \{ name: 
"array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \} \}\], metadata: \{\} \}, new schema: DFSchema \{ fields: 
\[DFField \{ qualifier: None, field: Field \{ name: 
"array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: 
"item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: false, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \} \}\], metadata: \{\} \}\. This was likely caused by a bug in 
DataFusion's code and we would welcome that you file an bug report in our issue 
tracker

Review Comment:
   something seems wrong with this test



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -22,12 +22,23 @@ use arrow::buffer::Buffer;
 use arrow::compute;
 use arrow::datatypes::{DataType, Field};
 use core::any::type_name;
-use datafusion_common::cast::as_list_array;
+use datafusion_common::cast::{as_generic_string_array, as_int64_array, 
as_list_array};
 use datafusion_common::ScalarValue;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 use std::sync::Arc;
 
+macro_rules! downcast_arg {

Review Comment:
   I think this is the same as `downcast_value`: 
https://docs.rs/datafusion-common/26.0.0/datafusion_common/macro.downcast_value.html



##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -56,20 +67,29 @@ macro_rules! new_builder {
 
 macro_rules! array {
     ($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
-        // downcast all arguments to their common format
-        let args =
-            downcast_vec!($ARGS, 
$ARRAY_TYPE).collect::<Result<Vec<&$ARRAY_TYPE>>>()?;
-
-        let builder = new_builder!($BUILDER_TYPE, args[0].len());
+        let builder = new_builder!($BUILDER_TYPE, $ARGS[0].len());
         let mut builder =
-            ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, args.len());
+            ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, $ARGS.len());
+
         // for each entry in the array
-        for index in 0..args[0].len() {
-            for arg in &args {
-                if arg.is_null(index) {
-                    builder.values().append_null();
-                } else {
-                    builder.values().append_value(arg.value(index));
+        for index in 0..$ARGS[0].len() {

Review Comment:
   I am not sure about this approach of taking either a `ListArray` or a 
`NullArray`
   
   In the other functions, the way NULL is treated is that the input types are 
always the same (in this case ListArray) and the values would be `null` (aka 
`array.is_valid(i)` would return false for rows that are null.
   
   Complicating matters is if you type a literal `null` in sql like:
   
   ```sql
   select array_concat([1,2], null)
   ```
   
   That comes to DataFusion as a `null` literal (with DataType::Null). The 
coercion / casting logic normally will coerce this to the appropriate type. 
   
   For example, here is how I think arithmetic works with null:
   
   ```sql
   select 1 + NULL
   ```
   
   Arrives like 
   ```sql
   ScalarValue::Int32(Some(1)) + ScalarValue::Null
   ```
   
   And then the coercion logic will add a cast to Int32:
   
   ```sql
   ScalarValue::Int32(Some(1)) + CAST(ScalarValue::Null, DataType::Int32)
   ```
   
   And then the constant folder will collapse this into:
   
   
   ```sql
   ScalarValue::Int32(Some(1)) + ScalarValue::Int32(None)
   ```
   
   So by the time the arithmetic kernel sees it, it only has to deal with 
arguments of `Int32`
   



##########
datafusion/expr/src/built_in_function.rs:
##########
@@ -440,46 +440,44 @@ impl BuiltinScalarFunction {
         // the return type of the built in function.
         // Some built-in functions' return type depends on the incoming type.
         match self {
-            BuiltinScalarFunction::ArrayAppend => match &input_expr_types[0] {
-                List(field) => Ok(List(Arc::new(Field::new(
-                    "item",
-                    field.data_type().clone(),
-                    true,
-                )))),
-                _ => Err(DataFusionError::Internal(format!(
-                    "The {self} function can only accept list as the first 
argument"
-                ))),
-            },
-            BuiltinScalarFunction::ArrayConcat => match &input_expr_types[0] {
-                List(field) => Ok(List(Arc::new(Field::new(
-                    "item",
-                    field.data_type().clone(),
-                    true,
-                )))),
-                _ => Err(DataFusionError::Internal(format!(
-                    "The {self} function can only accept fixed size list as 
the args."
-                ))),
-            },
-            BuiltinScalarFunction::ArrayDims => Ok(UInt8),
-            BuiltinScalarFunction::ArrayFill => Ok(List(Arc::new(Field::new(
+            BuiltinScalarFunction::ArrayAppend => Ok(List(Arc::new(Field::new(
                 "item",
-                input_expr_types[0].clone(),
+                input_expr_types[1].clone(),
                 true,
             )))),
+            BuiltinScalarFunction::ArrayConcat => {
+                let mut expr_type = Null;
+                for input_expr_type in input_expr_types {
+                    match input_expr_type {
+                        List(field) => {
+                            if !field.data_type().equals_datatype(&Null) {
+                                expr_type = field.data_type().clone();
+                                break;
+                            }
+                        }
+                        _ => {
+                            return Err(DataFusionError::Internal(format!(

Review Comment:
   I think Internal errors are only intended for bugs in DataFusion -- this 
error seems like it could come from bad user input too
   
   ```suggestion
                               return Err(DataFusionError::Plan(format!(
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to