alamb commented on code in PR #7338:
URL: https://github.com/apache/arrow-datafusion/pull/7338#discussion_r1301979259
##########
datafusion/sql/src/expr/value.rs:
##########
@@ -151,16 +151,51 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let data_types: HashSet<DataType> =
values.iter().map(|e| e.get_datatype()).collect();
- if data_types.is_empty() {
- Ok(lit(ScalarValue::new_list(None, DataType::Utf8)))
- } else if data_types.len() > 1 {
- Err(DataFusionError::NotImplemented(format!(
- "Arrays with different types are not supported:
{data_types:?}",
- )))
- } else {
- let data_type = values[0].get_datatype();
+ match data_types.len() {
+ 0 => Ok(lit(ScalarValue::new_list(None, DataType::Utf8))),
+ 1 => {
+ let data_type = values[0].get_datatype();
+
+ Ok(lit(ScalarValue::new_list(Some(values), data_type)))
+ }
+ // Check whether one of them is null
+ 2 => {
+ let has_null_type = data_types.contains(&DataType::Null);
+ if has_null_type {
+ // convert to Vec for indexing
+ let data_types =
data_types.into_iter().collect::<Vec<_>>();
+ let non_null_type = if data_types[0] == DataType::Null {
Review Comment:
How do we know the Null is the first or second type? Maybe you could use
something like
```
let non_null_type = data_types.find(|d| d == DataType::Null)
```
or something
##########
datafusion/sql/src/expr/value.rs:
##########
@@ -151,16 +151,51 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let data_types: HashSet<DataType> =
Review Comment:
Yeah, I agree the use of a `HashSet` here is going to be tough because it
will lose the argument order (and thus you can't pick the first non null type
deterministically
What about keeping track of the types in both the map and a Vec, like
```
let data_types: Vec<DataType>= vec![];
let seen_types: HashSet<DataType> = HashSet::new();
```
Then I think you can use `data_types` below as you'll know it is non
duplicated
##########
datafusion/optimizer/src/analyzer/type_coercion.rs:
##########
@@ -543,6 +543,77 @@ fn coerce_arguments_for_signature(
.collect::<Result<Vec<_>>>()
}
+// TODO: Add this function to arrow-rs
+fn get_list_base_type(data_type: &DataType) -> Result<DataType> {
+ match data_type {
+ DataType::List(field) => match field.data_type() {
+ DataType::List(_) => get_list_base_type(field.data_type()),
+ base_type => Ok(base_type.clone()),
+ },
+
+ _ => Ok(data_type.clone()),
+ }
+}
+
+fn get_list_from_base_type(
+ data_type: &DataType,
+ base_type: &DataType,
+) -> Result<DataType> {
+ match data_type {
+ DataType::List(field) => match field.data_type() {
+ DataType::List(inner_field) =>
Ok(DataType::List(Arc::new(Field::new(
+ field.name(),
+ get_list_from_base_type(inner_field.data_type(), base_type)?,
+ field.is_nullable(),
+ )))),
+ _ => Ok(DataType::List(Arc::new(Field::new(
+ field.name(),
+ base_type.clone(),
+ field.is_nullable(),
+ )))),
+ },
+
+ _ => Ok(base_type.clone()),
+ }
+}
+
+fn coerce_nulls_for_array_append(
Review Comment:
I dont understand what this function is trying to do and I don't understand
why the rules for array_append would be different than the rules for array
scalars.
could you add some comments or maybe try to unify the implementations
somehow?
##########
datafusion/sql/src/expr/value.rs:
##########
@@ -151,16 +151,51 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let data_types: HashSet<DataType> =
values.iter().map(|e| e.get_datatype()).collect();
- if data_types.is_empty() {
- Ok(lit(ScalarValue::new_list(None, DataType::Utf8)))
- } else if data_types.len() > 1 {
- Err(DataFusionError::NotImplemented(format!(
- "Arrays with different types are not supported:
{data_types:?}",
- )))
- } else {
- let data_type = values[0].get_datatype();
+ match data_types.len() {
+ 0 => Ok(lit(ScalarValue::new_list(None, DataType::Utf8))),
+ 1 => {
+ let data_type = values[0].get_datatype();
+
+ Ok(lit(ScalarValue::new_list(Some(values), data_type)))
+ }
+ // Check whether one of them is null
+ 2 => {
Review Comment:
Why does this stop at 2? Why not handle 3 or more different data types?
##########
datafusion/sqllogictest/test_files/array.slt:
##########
@@ -907,6 +927,16 @@ select list_push_back(make_array(1, 2, 3), 4),
list_push_back(make_array(1.0, 2.
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+query ?
+select array_append([1, 2, 3], null);
Review Comment:
Can we also please add a test with more than two types, for example:
```sql
select [1, '2', Null]
```
Which postgres does
```sql
postgres=# select ARRAY [ 1, '2', NULL ];
array
------------
{1,2,NULL}
(1 row)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]