alamb commented on code in PR #3122:
URL: https://github.com/apache/arrow-datafusion/pull/3122#discussion_r946164515
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -34,7 +34,10 @@ macro_rules! downcast_vec {
}};
}
-macro_rules! array {
+/// Create an array of FixedSizeList from a set of individual Arrays
+/// where each element in the output FixedSizeList is the result of
+/// concatenating the corresponding values in the input Arrays
+macro_rules! make_fixed_size_list {
Review Comment:
I renamed this to be more descriptive of what it does (there are too may
arrays in DataFusion already ;)
##########
datafusion/core/tests/sql/functions.rs:
##########
@@ -111,8 +111,8 @@ async fn query_concat() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn query_array() -> Result<()> {
+// Return a session context with table "test" registered with 2 columns
Review Comment:
The changes in this file illustrate the change in behavior that this PR makes
##########
datafusion/core/tests/sql/functions.rs:
##########
@@ -124,43 +124,110 @@ async fn query_array() -> Result<()> {
Arc::new(StringArray::from_slice(&["", "a", "aa", "aaa"])),
Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])),
],
- )?;
+ )
+ .unwrap();
- let table = MemTable::try_new(schema, vec![vec![data]])?;
+ let table = MemTable::try_new(schema, vec![vec![data]]).unwrap();
let ctx = SessionContext::new();
- ctx.register_table("test", Arc::new(table))?;
- let sql = "SELECT array(c1, cast(c2 as varchar)) FROM test";
+ ctx.register_table("test", Arc::new(table)).unwrap();
+ ctx
+}
+
+#[tokio::test]
+async fn query_array() {
+ let ctx = array_context();
+ let sql = "SELECT array[c1, cast(c2 as varchar)] FROM test";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
- "+--------------------------------------+",
- "| array(test.c1,CAST(test.c2 AS Utf8)) |",
- "+--------------------------------------+",
- "| [, 0] |",
- "| [a, 1] |",
- "| [aa, ] |",
- "| [aaa, 3] |",
- "+--------------------------------------+",
+ "+----------+",
+ "| array |",
Review Comment:
using `array` for the column name is consistent with postgres
##########
datafusion/sql/src/planner.rs:
##########
@@ -2360,50 +2360,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.is_ok()
}
- fn sql_array_literal(
- &self,
- elements: Vec<SQLExpr>,
- schema: &DFSchema,
- ) -> Result<Expr> {
- let mut values = Vec::with_capacity(elements.len());
-
- for element in elements {
- let value =
- self.sql_expr_to_logical_expr(element, schema, &mut
HashMap::new())?;
- match value {
- Expr::Literal(scalar) => {
- values.push(scalar);
- }
- _ => {
- return Err(DataFusionError::NotImplemented(format!(
- "Arrays with elements other than literal are not
supported: {}",
- value
- )));
- }
- }
- }
-
- let data_types: HashSet<DataType> =
- values.iter().map(|e| e.get_datatype()).collect();
-
- if data_types.is_empty() {
- Ok(Expr::Literal(ScalarValue::List(
- None,
- Box::new(Field::new("item", DataType::Utf8, true)),
- )))
- } else if data_types.len() > 1 {
- Err(DataFusionError::NotImplemented(format!(
- "Arrays with different types are not supported: {:?}",
- data_types,
- )))
- } else {
- let data_type = values[0].get_datatype();
+ fn sql_array_expr(&self, elements: Vec<SQLExpr>, schema: &DFSchema) ->
Result<Expr> {
+ let args: Vec<Expr> = elements
+ .into_iter()
+ .map(|expr| self.sql_expr_to_logical_expr(expr, schema, &mut
HashMap::new()))
+ .collect::<Result<_>>()?;
- Ok(Expr::Literal(ScalarValue::List(
- Some(values),
- Box::new(Field::new("item", data_type, true)),
- )))
- }
+ let fun = BuiltinScalarFunction::MakeArray;
+ // follow postgres convention and name result "array"
+ Ok(Expr::ScalarFunction { fun, args }.alias("array"))
Review Comment:
This is the rewrite in the planner to write `array[]` syntax to a
`make_array` function call
##########
datafusion/expr/src/function.rs:
##########
@@ -267,12 +267,8 @@ pub fn return_type(
pub fn signature(fun: &BuiltinScalarFunction) -> Signature {
// note: the physical expression must accept the type returned by this
function or the execution panics.
- // for now, the list is small, as we do not have many built-in functions.
match fun {
- BuiltinScalarFunction::Array => Signature::variadic(
- array_expressions::SUPPORTED_ARRAY_TYPES.to_vec(),
- fun.volatility(),
- ),
+ BuiltinScalarFunction::MakeArray =>
Signature::variadic_equal(fun.volatility()),
Review Comment:
This requires all arguments to `make_array` be the same type
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -34,7 +34,10 @@ macro_rules! downcast_vec {
}};
}
-macro_rules! array {
+/// Create an array of FixedSizeList from a set of individual Arrays
+/// where each element in the output FixedSizeList is the result of
+/// concatenating the corresponding values in the input Arrays
+macro_rules! make_fixed_size_list {
Review Comment:
I simply renamed this macro to something that shows more of what it is doing
##########
datafusion/core/tests/sql/functions.rs:
##########
@@ -124,43 +124,110 @@ async fn query_array() -> Result<()> {
Arc::new(StringArray::from_slice(&["", "a", "aa", "aaa"])),
Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])),
],
- )?;
+ )
+ .unwrap();
- let table = MemTable::try_new(schema, vec![vec![data]])?;
+ let table = MemTable::try_new(schema, vec![vec![data]]).unwrap();
let ctx = SessionContext::new();
- ctx.register_table("test", Arc::new(table))?;
- let sql = "SELECT array(c1, cast(c2 as varchar)) FROM test";
+ ctx.register_table("test", Arc::new(table)).unwrap();
+ ctx
+}
+
+#[tokio::test]
+async fn query_array() {
+ let ctx = array_context();
+ let sql = "SELECT array[c1, cast(c2 as varchar)] FROM test";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
- "+--------------------------------------+",
- "| array(test.c1,CAST(test.c2 AS Utf8)) |",
- "+--------------------------------------+",
- "| [, 0] |",
- "| [a, 1] |",
- "| [aa, ] |",
- "| [aaa, 3] |",
- "+--------------------------------------+",
+ "+----------+",
+ "| array |",
+ "+----------+",
+ "| [, 0] |",
+ "| [a, 1] |",
+ "| [aa, ] |",
+ "| [aaa, 3] |",
+ "+----------+",
+ ];
+ assert_batches_eq!(expected, &actual);
+}
+
+#[tokio::test]
+async fn query_make_array() {
+ let ctx = array_context();
+ let sql = "SELECT make_array(c1, cast(c2 as varchar)) FROM test";
+ let actual = execute_to_batches(&ctx, sql).await;
+ let expected = vec![
+ "+------------------------------------------+",
+ "| makearray(test.c1,CAST(test.c2 AS Utf8)) |",
+ "+------------------------------------------+",
+ "| [, 0] |",
+ "| [a, 1] |",
+ "| [aa, ] |",
+ "| [aaa, 3] |",
+ "+------------------------------------------+",
];
assert_batches_eq!(expected, &actual);
- Ok(())
}
#[tokio::test]
-async fn query_array_scalar() -> Result<()> {
+async fn query_array_scalar() {
let ctx = SessionContext::new();
- let sql = "SELECT array(1, 2, 3);";
+ let sql = "SELECT array[1, 2, 3];";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
- "+-----------------------------------+",
- "| array(Int64(1),Int64(2),Int64(3)) |",
- "+-----------------------------------+",
- "| [1, 2, 3] |",
- "+-----------------------------------+",
+ "+-----------+",
+ "| array |",
+ "+-----------+",
+ "| [1, 2, 3] |",
+ "+-----------+",
+ ];
+ assert_batches_eq!(expected, &actual);
+
+ // alternate syntax format
+ let sql = "SELECT [1, 2, 3];";
+ let actual = execute_to_batches(&ctx, sql).await;
+ assert_batches_eq!(expected, &actual);
+}
+
+#[tokio::test]
+async fn query_array_scalar_bad_types() {
+ let ctx = SessionContext::new();
+
+ // no common type to coerce to, should error
+ let err = plan_and_collect(&ctx, "SELECT [1, true, null]")
+ .await
+ .unwrap_err();
+ assert_eq!(err.to_string(), "Error during planning: Coercion from [Int64,
Boolean, Null] to the signature VariadicEqual failed.",);
+}
+
+#[tokio::test]
+async fn query_array_scalar_coerce() {
+ let ctx = SessionContext::new();
+
+ // The planner should be able to coerce this to all integers
+ // https://github.com/apache/arrow-datafusion/issues/3170
+ let err = plan_and_collect(&ctx, "SELECT [1, 2, '3']")
+ .await
+ .unwrap_err();
+ assert_eq!(err.to_string(), "Error during planning: Coercion from [Int64,
Int64, Utf8] to the signature VariadicEqual failed.",);
+}
+
+#[tokio::test]
+async fn query_make_array_scalar() {
+ let ctx = SessionContext::new();
+
+ let sql = "SELECT make_array(1, 2, 3);";
Review Comment:
This is the equivalent of `SELECT array(1,2, 3)` on master
##########
datafusion/sql/src/planner.rs:
##########
@@ -3330,25 +3294,12 @@ mod tests {
);
}
- #[test]
- fn select_array_no_common_type() {
- let sql = "SELECT [1, true, null]";
- let err = logical_plan(sql).expect_err("query should have failed");
-
- // HashSet doesn't guarantee order
- assert_contains!(
- err.to_string(),
- r#"Arrays with different types are not supported: "#
Review Comment:
this test is moved to `function.rs`
##########
datafusion/core/tests/sql/functions.rs:
##########
@@ -124,43 +124,110 @@ async fn query_array() -> Result<()> {
Arc::new(StringArray::from_slice(&["", "a", "aa", "aaa"])),
Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])),
],
- )?;
+ )
+ .unwrap();
- let table = MemTable::try_new(schema, vec![vec![data]])?;
+ let table = MemTable::try_new(schema, vec![vec![data]]).unwrap();
let ctx = SessionContext::new();
- ctx.register_table("test", Arc::new(table))?;
- let sql = "SELECT array(c1, cast(c2 as varchar)) FROM test";
+ ctx.register_table("test", Arc::new(table)).unwrap();
+ ctx
+}
+
+#[tokio::test]
+async fn query_array() {
+ let ctx = array_context();
+ let sql = "SELECT array[c1, cast(c2 as varchar)] FROM test";
Review Comment:
Prior to this PR, this query would error (only constants were supported in
`array[]` syntax. cc @ovr
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]