This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 371ef46bdd feat: array containment operator `@>` and `<@` (#6885)
371ef46bdd is described below
commit 371ef46bddfecbf6928b4662c1c5bbcca0e49aae
Author: Igor Izvekov <[email protected]>
AuthorDate: Mon Jul 31 23:01:36 2023 +0300
feat: array containment operator `@>` and `<@` (#6885)
* feat: array containment operator
* refactor: replace array_contains with array_has_all
* feat: parser supports json operators
* fix: comments
---
.../core/tests/sqllogictests/test_files/array.slt | 30 ++++++++++++++++++++
datafusion/expr/src/operator.rs | 16 +++++++++--
datafusion/expr/src/type_coercion/binary.rs | 17 +++++++++++
datafusion/physical-expr/src/expressions/binary.rs | 7 +++--
datafusion/proto/src/logical_plan/from_proto.rs | 2 ++
datafusion/proto/src/logical_plan/mod.rs | 2 ++
datafusion/sql/src/expr/json_access.rs | 33 ++++++++++++++++++++++
datafusion/sql/src/expr/mod.rs | 11 ++++++++
datafusion/sql/tests/sql_integration.rs | 28 ++++++++++++++++++
datafusion/substrait/src/logical_plan/consumer.rs | 2 ++
datafusion/substrait/src/logical_plan/producer.rs | 2 ++
11 files changed, 146 insertions(+), 4 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 2c7cfa7e7a..27d288cf60 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -1723,7 +1723,10 @@ select list_has_all(make_array(1,2,3),
make_array(4,5,6)),
----
false true false true
+
### Array operators tests
+
+
## array concatenate operator
# array concatenate operator with scalars #1 (like array_concat scalar
function)
@@ -1744,8 +1747,35 @@ select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0,
3.0, 4.0), 'h' || make_a
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+## array containment operator
+
+# array containment operator with scalars #1 (at arrow)
+query ???????
+select make_array(1,2,3) @> make_array(1,3),
+ make_array(1,2,3) @> make_array(1,4),
+ make_array([1,2], [3,4]) @> make_array([1,2]),
+ make_array([1,2], [3,4]) @> make_array([1,3]),
+ make_array([1,2], [3,4]) @> make_array([1,2], [3,4], [5,6]),
+ make_array([[1,2,3]]) @> make_array([[1]]),
+ make_array([[1,2,3]]) @> make_array([[1,2,3]]);
+----
+true false true false false false true
+
+# array containment operator with scalars #2 (arrow at)
+query ???????
+select make_array(1,3) <@ make_array(1,2,3),
+ make_array(1,4) <@ make_array(1,2,3),
+ make_array([1,2]) <@ make_array([1,2], [3,4]),
+ make_array([1,3]) <@ make_array([1,2], [3,4]),
+ make_array([1,2], [3,4], [5,6]) <@ make_array([1,2], [3,4]),
+ make_array([[1]]) <@ make_array([[1,2,3]]),
+ make_array([[1,2,3]]) <@ make_array([[1,2,3]]);
+----
+true false true false false false true
+
### Array casting tests
+
## make_array
# make_array scalar function #1
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index 3e5b773dba..b94f8f15db 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -77,6 +77,10 @@ pub enum Operator {
BitwiseShiftLeft,
/// String concat
StringConcat,
+ /// At arrow, like `@>`
+ AtArrow,
+ /// Arrow at, like `<@`
+ ArrowAt,
}
impl Operator {
@@ -108,7 +112,9 @@ impl Operator {
| Operator::BitwiseXor
| Operator::BitwiseShiftRight
| Operator::BitwiseShiftLeft
- | Operator::StringConcat => None,
+ | Operator::StringConcat
+ | Operator::AtArrow
+ | Operator::ArrowAt => None,
}
}
@@ -167,6 +173,8 @@ impl Operator {
Operator::LtEq => Some(Operator::GtEq),
Operator::Gt => Some(Operator::Lt),
Operator::GtEq => Some(Operator::LtEq),
+ Operator::AtArrow => Some(Operator::ArrowAt),
+ Operator::ArrowAt => Some(Operator::AtArrow),
Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom
| Operator::Plus
@@ -214,7 +222,9 @@ impl Operator {
| Operator::BitwiseShiftLeft
| Operator::BitwiseShiftRight
| Operator::BitwiseXor
- | Operator::StringConcat => 0,
+ | Operator::StringConcat
+ | Operator::AtArrow
+ | Operator::ArrowAt => 0,
}
}
}
@@ -247,6 +257,8 @@ impl fmt::Display for Operator {
Operator::BitwiseShiftRight => ">>",
Operator::BitwiseShiftLeft => "<<",
Operator::StringConcat => "||",
+ Operator::AtArrow => "@>",
+ Operator::ArrowAt => "<@",
};
write!(f, "{display}")
}
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 9ebea19a16..0113408fcc 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -114,6 +114,14 @@ fn signature(lhs: &DataType, op: &Operator, rhs:
&DataType) -> Result<Signature>
))
})
}
+ Operator::AtArrow
+ | Operator::ArrowAt => {
+ array_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
+ DataFusionError::Plan(format!(
+ "Cannot infer common array type for arrow operation {lhs}
{op} {rhs}"
+ ))
+ })
+ }
Operator::Plus |
Operator::Minus |
Operator::Multiply |
@@ -742,6 +750,15 @@ fn string_concat_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<Da
})
}
+fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
+ // TODO: cast between array elements (#6558)
+ if lhs_type.equals_datatype(rhs_type) {
+ Some(lhs_type.to_owned())
+ } else {
+ None
+ }
+}
+
fn string_concat_internal_coercion(
from_type: &DataType,
to_type: &DataType,
diff --git a/datafusion/physical-expr/src/expressions/binary.rs
b/datafusion/physical-expr/src/expressions/binary.rs
index b453e8135d..d2fc4600b9 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -76,8 +76,9 @@ use self::kernels_arrow::{
add_dyn_temporal_left_scalar, add_dyn_temporal_right_scalar,
subtract_dyn_temporal_left_scalar, subtract_dyn_temporal_right_scalar,
};
-
-use crate::array_expressions::{array_append, array_concat, array_prepend};
+use crate::array_expressions::{
+ array_append, array_concat, array_has_all, array_prepend,
+};
use crate::expressions::cast_column;
use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison};
use crate::intervals::{apply_operator, Interval};
@@ -1103,6 +1104,8 @@ impl BinaryExpr {
(_, DataType::List(_)) => array_prepend(&[left, right]),
_ => binary_string_array_op!(left, right, concat_elements),
},
+ AtArrow => array_has_all(&[left, right]),
+ ArrowAt => array_has_all(&[right, left]),
}
}
}
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index 2591f179b9..1464f32bb3 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -1634,6 +1634,8 @@ pub fn from_proto_binary_op(op: &str) -> Result<Operator,
Error> {
"RegexNotIMatch" => Ok(Operator::RegexNotIMatch),
"RegexNotMatch" => Ok(Operator::RegexNotMatch),
"StringConcat" => Ok(Operator::StringConcat),
+ "AtArrow" => Ok(Operator::AtArrow),
+ "ArrowAt" => Ok(Operator::ArrowAt),
other => Err(proto_error(format!(
"Unsupported binary operator '{other:?}'"
))),
diff --git a/datafusion/proto/src/logical_plan/mod.rs
b/datafusion/proto/src/logical_plan/mod.rs
index 405c58c20b..9cbce29ed6 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -2434,6 +2434,8 @@ mod roundtrip_tests {
let ctx = SessionContext::new();
roundtrip_expr_test(test_expr, ctx);
}
+ test(Operator::ArrowAt);
+ test(Operator::AtArrow);
test(Operator::StringConcat);
test(Operator::RegexNotIMatch);
test(Operator::RegexNotMatch);
diff --git a/datafusion/sql/src/expr/json_access.rs
b/datafusion/sql/src/expr/json_access.rs
new file mode 100644
index 0000000000..0b20393401
--- /dev/null
+++ b/datafusion/sql/src/expr/json_access.rs
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::planner::{ContextProvider, SqlToRel};
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::Operator;
+use sqlparser::ast::JsonOperator;
+
+impl<'a, S: ContextProvider> SqlToRel<'a, S> {
+ pub(crate) fn parse_sql_json_access(&self, op: JsonOperator) ->
Result<Operator> {
+ match op {
+ JsonOperator::AtArrow => Ok(Operator::AtArrow),
+ JsonOperator::ArrowAt => Ok(Operator::ArrowAt),
+ _ => Err(DataFusionError::NotImplemented(format!(
+ "Unsupported SQL json operator {op:?}"
+ ))),
+ }
+ }
+}
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index b21c205ffc..0630bcb7e8 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -20,6 +20,7 @@ mod binary_op;
mod function;
mod grouping_set;
mod identifier;
+mod json_access;
mod order_by;
mod subquery;
mod substring;
@@ -70,6 +71,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
stack.push(StackEntry::SQLExpr(right));
stack.push(StackEntry::SQLExpr(left));
}
+ SQLExpr::JsonAccess {
+ left,
+ operator,
+ right,
+ } => {
+ let op = self.parse_sql_json_access(operator)?;
+ stack.push(StackEntry::Operator(op));
+ stack.push(StackEntry::SQLExpr(right));
+ stack.push(StackEntry::SQLExpr(left));
+ }
_ => {
let expr = self.sql_expr_to_logical_expr_internal(
*sql_expr,
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 88dddc7336..26663f88c3 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1106,6 +1106,22 @@ fn select_binary_expr_nested() {
quick_test(sql, expected);
}
+#[test]
+fn select_at_arrow_operator() {
+ let sql = "SELECT left @> right from array";
+ let expected = "Projection: array.left @> array.right\
+ \n TableScan: array";
+ quick_test(sql, expected);
+}
+
+#[test]
+fn select_arrow_at_operator() {
+ let sql = "SELECT left <@ right from array";
+ let expected = "Projection: array.left <@ array.right\
+ \n TableScan: array";
+ quick_test(sql, expected);
+}
+
#[test]
fn select_wildcard_with_groupby() {
quick_test(
@@ -2643,6 +2659,18 @@ impl ContextProvider for MockContextProvider {
Field::new("price", DataType::Float64, false),
Field::new("delivered", DataType::Boolean, false),
])),
+ "array" => Ok(Schema::new(vec![
+ Field::new(
+ "left",
+ DataType::List(Arc::new(Field::new("item",
DataType::Int64, true))),
+ false,
+ ),
+ Field::new(
+ "right",
+ DataType::List(Arc::new(Field::new("item",
DataType::Int64, true))),
+ false,
+ ),
+ ])),
"lineitem" => Ok(Schema::new(vec![
Field::new("l_item_id", DataType::UInt32, false),
Field::new("l_description", DataType::Utf8, false),
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs
b/datafusion/substrait/src/logical_plan/consumer.rs
index a2f95a0d89..66cc37c60b 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -99,6 +99,8 @@ pub fn name_to_op(name: &str) -> Result<Operator> {
"bitwise_and" => Ok(Operator::BitwiseAnd),
"bitwise_or" => Ok(Operator::BitwiseOr),
"str_concat" => Ok(Operator::StringConcat),
+ "at_arrow" => Ok(Operator::AtArrow),
+ "arrow_at" => Ok(Operator::ArrowAt),
"bitwise_xor" => Ok(Operator::BitwiseXor),
"bitwise_shift_right" => Ok(Operator::BitwiseShiftRight),
"bitwise_shift_left" => Ok(Operator::BitwiseShiftLeft),
diff --git a/datafusion/substrait/src/logical_plan/producer.rs
b/datafusion/substrait/src/logical_plan/producer.rs
index bddcc577ad..e044f00504 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -467,6 +467,8 @@ pub fn operator_to_name(op: Operator) -> &'static str {
Operator::BitwiseAnd => "bitwise_and",
Operator::BitwiseOr => "bitwise_or",
Operator::StringConcat => "str_concat",
+ Operator::AtArrow => "at_arrow",
+ Operator::ArrowAt => "arrow_at",
Operator::BitwiseXor => "bitwise_xor",
Operator::BitwiseShiftRight => "bitwise_shift_right",
Operator::BitwiseShiftLeft => "bitwise_shift_left",