This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 3ebc31d2c2 Remove `Expr::GetIndexedField`, replace
`Expr::{field,index,range}` with `FieldAccessor`, `IndexAccessor`, and
`SliceAccessor` (#10568)
3ebc31d2c2 is described below
commit 3ebc31d2c26e6ff02d3cc79e5e4786d4ed029710
Author: Jay Zhan <[email protected]>
AuthorDate: Tue May 21 08:07:25 2024 +0800
Remove `Expr::GetIndexedField`, replace `Expr::{field,index,range}` with
`FieldAccessor`, `IndexAccessor`, and `SliceAccessor` (#10568)
* remove expr
Signed-off-by: jayzhan211 <[email protected]>
* add expr extension
Signed-off-by: jayzhan211 <[email protected]>
* doc
Signed-off-by: jayzhan211 <[email protected]>
* move test that has struct
Signed-off-by: jayzhan211 <[email protected]>
* fmt
Signed-off-by: jayzhan211 <[email protected]>
* add foc and fix displayed name
Signed-off-by: jayzhan211 <[email protected]>
* rm test
Signed-off-by: jayzhan211 <[email protected]>
* rebase
Signed-off-by: jayzhan211 <[email protected]>
* move doc
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
datafusion/core/src/datasource/listing/helpers.rs | 1 -
datafusion/core/src/physical_planner.rs | 26 +----
datafusion/core/tests/expr_api/mod.rs | 11 +-
datafusion/core/tests/optimizer_integration.rs | 129 ++++++++++++++++++++-
datafusion/expr/src/expr.rs | 127 --------------------
datafusion/expr/src/expr_schema.rs | 73 +-----------
datafusion/expr/src/tree_node.rs | 21 +---
datafusion/expr/src/utils.rs | 1 -
datafusion/functions-array/src/expr_ext.rs | 102 ++++++++++++++++
datafusion/functions-array/src/extract.rs | 11 ++
datafusion/functions-array/src/lib.rs | 1 +
datafusion/functions/src/core/expr_ext.rs | 68 +++++++++++
datafusion/functions/src/core/mod.rs | 1 +
datafusion/optimizer/src/analyzer/type_coercion.rs | 1 -
.../optimizer/src/optimize_projections/mod.rs | 23 ----
datafusion/optimizer/src/push_down_filter.rs | 1 -
.../src/simplify_expressions/expr_simplifier.rs | 3 +-
.../src/simplify_expressions/guarantees.rs | 68 +----------
.../optimizer/src/simplify_expressions/mod.rs | 3 +
datafusion/physical-expr/src/planner.rs | 25 +---
datafusion/proto/proto/datafusion.proto | 2 +-
datafusion/proto/src/generated/pbjson.rs | 14 ---
datafusion/proto/src/generated/prost.rs | 28 +++--
datafusion/proto/src/logical_plan/from_proto.rs | 59 +---------
datafusion/proto/src/logical_plan/to_proto.rs | 44 +------
datafusion/sql/src/unparser/expr.rs | 3 -
datafusion/sqllogictest/test_files/projection.slt | 17 +++
27 files changed, 365 insertions(+), 498 deletions(-)
diff --git a/datafusion/core/src/datasource/listing/helpers.rs
b/datafusion/core/src/datasource/listing/helpers.rs
index 0cffa05131..5b87090096 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -84,7 +84,6 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr:
&Expr) -> bool {
| Expr::Exists { .. }
| Expr::InSubquery(_)
| Expr::ScalarSubquery(_)
- | Expr::GetIndexedField { .. }
| Expr::GroupingSet(_)
| Expr::Case { .. } => Ok(TreeNodeRecursion::Continue),
diff --git a/datafusion/core/src/physical_planner.rs
b/datafusion/core/src/physical_planner.rs
index 406196a591..597a03a52f 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -83,8 +83,7 @@ use datafusion_common::{
use datafusion_expr::dml::CopyTo;
use datafusion_expr::expr::{
self, AggregateFunction, AggregateFunctionDefinition, Alias, Between,
BinaryExpr,
- Cast, GetFieldAccess, GetIndexedField, GroupingSet, InList, Like, TryCast,
- WindowFunction,
+ Cast, GroupingSet, InList, Like, TryCast, WindowFunction,
};
use datafusion_expr::expr_rewriter::unnormalize_cols;
use datafusion_expr::expr_vec_fmt;
@@ -216,29 +215,6 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) ->
Result<String> {
let expr = create_physical_name(expr, false)?;
Ok(format!("{expr} IS NOT UNKNOWN"))
}
- Expr::GetIndexedField(GetIndexedField { expr: _, field }) => {
- match field {
- GetFieldAccess::NamedStructField { name: _ } => {
- unreachable!(
- "NamedStructField should have been rewritten in
OperatorToFunction"
- )
- }
- GetFieldAccess::ListIndex { key: _ } => {
- unreachable!(
- "ListIndex should have been rewritten in
OperatorToFunction"
- )
- }
- GetFieldAccess::ListRange {
- start: _,
- stop: _,
- stride: _,
- } => {
- unreachable!(
- "ListRange should have been rewritten in
OperatorToFunction"
- )
- }
- };
- }
Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
Expr::WindowFunction(WindowFunction {
fun,
diff --git a/datafusion/core/tests/expr_api/mod.rs
b/datafusion/core/tests/expr_api/mod.rs
index db75a8c614..b74e572e96 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -21,6 +21,8 @@ use arrow_array::{ArrayRef, RecordBatch, StringArray,
StructArray};
use arrow_schema::{DataType, Field};
use datafusion::prelude::*;
use datafusion_common::DFSchema;
+use datafusion_functions::core::expr_ext::FieldAccessor;
+use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor};
/// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
use std::sync::{Arc, OnceLock};
@@ -61,7 +63,7 @@ fn test_eq_with_coercion() {
#[test]
fn test_get_field() {
evaluate_expr_test(
- get_field(col("props"), "a"),
+ col("props").field("a"),
vec![
"+------------+",
"| expr |",
@@ -77,7 +79,8 @@ fn test_get_field() {
#[test]
fn test_nested_get_field() {
evaluate_expr_test(
- get_field(col("props"), "a")
+ col("props")
+ .field("a")
.eq(lit("2021-02-02"))
.or(col("id").eq(lit(1))),
vec![
@@ -95,7 +98,7 @@ fn test_nested_get_field() {
#[test]
fn test_list() {
evaluate_expr_test(
- array_element(col("list"), lit(1i64)),
+ col("list").index(lit(1i64)),
vec![
"+------+", "| expr |", "+------+", "| one |", "| two |", "|
five |",
"+------+",
@@ -106,7 +109,7 @@ fn test_list() {
#[test]
fn test_list_range() {
evaluate_expr_test(
- array_slice(col("list"), lit(1i64), lit(2i64), None),
+ col("list").range(lit(1i64), lit(2i64)),
vec![
"+--------------+",
"| expr |",
diff --git a/datafusion/core/tests/optimizer_integration.rs
b/datafusion/core/tests/optimizer_integration.rs
index 8acf8618c7..f84e8e24af 100644
--- a/datafusion/core/tests/optimizer_integration.rs
+++ b/datafusion/core/tests/optimizer_integration.rs
@@ -23,11 +23,19 @@ use std::collections::HashMap;
use std::sync::Arc;
use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
+use arrow_schema::{Fields, SchemaBuilder};
use datafusion_common::config::ConfigOptions;
-use datafusion_common::{plan_err, Result};
-use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource,
WindowUDF};
+use datafusion_common::tree_node::{TransformedResult, TreeNode};
+use datafusion_common::{plan_err, DFSchema, Result, ScalarValue};
+use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
+use datafusion_expr::{
+ col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan,
Operator,
+ ScalarUDF, TableSource, WindowUDF,
+};
+use datafusion_functions::core::expr_ext::FieldAccessor;
use datafusion_optimizer::analyzer::Analyzer;
use datafusion_optimizer::optimizer::Optimizer;
+use datafusion_optimizer::simplify_expressions::GuaranteeRewriter;
use datafusion_optimizer::{OptimizerConfig, OptimizerContext};
use datafusion_sql::planner::{ContextProvider, SqlToRel};
use datafusion_sql::sqlparser::ast::Statement;
@@ -233,3 +241,120 @@ impl TableSource for MyTableSource {
self.schema.clone()
}
}
+
+#[test]
+fn test_nested_schema_nullability() {
+ let mut builder = SchemaBuilder::new();
+ builder.push(Field::new("foo", DataType::Int32, true));
+ builder.push(Field::new(
+ "parent",
+ DataType::Struct(Fields::from(vec![Field::new(
+ "child",
+ DataType::Int64,
+ false,
+ )])),
+ true,
+ ));
+ let schema = builder.finish();
+
+ let dfschema = DFSchema::from_field_specific_qualified_schema(
+ vec![Some("table_name".into()), None],
+ &Arc::new(schema),
+ )
+ .unwrap();
+
+ let expr = col("parent").field("child");
+ assert!(expr.nullable(&dfschema).unwrap());
+}
+
+#[test]
+fn test_inequalities_non_null_bounded() {
+ let guarantees = vec![
+ // x ∈ [1, 3] (not null)
+ (
+ col("x"),
+ NullableInterval::NotNull {
+ values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
+ },
+ ),
+ // s.y ∈ [1, 3] (not null)
+ (
+ col("s").field("y"),
+ NullableInterval::NotNull {
+ values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
+ },
+ ),
+ ];
+
+ let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
+
+ // (original_expr, expected_simplification)
+ let simplified_cases = &[
+ (col("x").lt(lit(0)), false),
+ (col("s").field("y").lt(lit(0)), false),
+ (col("x").lt_eq(lit(3)), true),
+ (col("x").gt(lit(3)), false),
+ (col("x").gt(lit(0)), true),
+ (col("x").eq(lit(0)), false),
+ (col("x").not_eq(lit(0)), true),
+ (col("x").between(lit(0), lit(5)), true),
+ (col("x").between(lit(5), lit(10)), false),
+ (col("x").not_between(lit(0), lit(5)), false),
+ (col("x").not_between(lit(5), lit(10)), true),
+ (
+ Expr::BinaryExpr(BinaryExpr {
+ left: Box::new(col("x")),
+ op: Operator::IsDistinctFrom,
+ right: Box::new(lit(ScalarValue::Null)),
+ }),
+ true,
+ ),
+ (
+ Expr::BinaryExpr(BinaryExpr {
+ left: Box::new(col("x")),
+ op: Operator::IsDistinctFrom,
+ right: Box::new(lit(5)),
+ }),
+ true,
+ ),
+ ];
+
+ validate_simplified_cases(&mut rewriter, simplified_cases);
+
+ let unchanged_cases = &[
+ col("x").gt(lit(2)),
+ col("x").lt_eq(lit(2)),
+ col("x").eq(lit(2)),
+ col("x").not_eq(lit(2)),
+ col("x").between(lit(3), lit(5)),
+ col("x").not_between(lit(3), lit(10)),
+ ];
+
+ validate_unchanged_cases(&mut rewriter, unchanged_cases);
+}
+
+fn validate_simplified_cases<T>(rewriter: &mut GuaranteeRewriter, cases:
&[(Expr, T)])
+where
+ ScalarValue: From<T>,
+ T: Clone,
+{
+ for (expr, expected_value) in cases {
+ let output = expr.clone().rewrite(rewriter).data().unwrap();
+ let expected = lit(ScalarValue::from(expected_value.clone()));
+ assert_eq!(
+ output, expected,
+ "{} simplified to {}, but expected {}",
+ expr, output, expected
+ );
+ }
+}
+fn validate_unchanged_cases(rewriter: &mut GuaranteeRewriter, cases: &[Expr]) {
+ for expr in cases {
+ let output = expr.clone().rewrite(rewriter).data().unwrap();
+ assert_eq!(
+ &output, expr,
+ "{} was simplified to {}, but expected it to be unchanged",
+ expr, output
+ );
+ }
+}
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index a0bd0086aa..5e43c160ba 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -143,9 +143,6 @@ pub enum Expr {
IsNotUnknown(Box<Expr>),
/// arithmetic negation of an expression, the operand must be of a signed
numeric data type
Negative(Box<Expr>),
- /// Returns the field of a [`arrow::array::ListArray`] or
- /// [`arrow::array::StructArray`] by index or range
- GetIndexedField(GetIndexedField),
/// Whether an expression is between a given range.
Between(Between),
/// The CASE expression is similar to a series of nested if/else and there
are two forms that
@@ -888,7 +885,6 @@ impl Expr {
Expr::Column(..) => "Column",
Expr::OuterReferenceColumn(_, _) => "Outer",
Expr::Exists { .. } => "Exists",
- Expr::GetIndexedField { .. } => "GetIndexedField",
Expr::GroupingSet(..) => "GroupingSet",
Expr::InList { .. } => "InList",
Expr::InSubquery(..) => "InSubquery",
@@ -1180,91 +1176,6 @@ impl Expr {
))
}
- /// Return access to the named field. Example `expr["name"]`
- ///
- /// ## Access field "my_field" from column "c1"
- ///
- /// For example if column "c1" holds documents like this
- ///
- /// ```json
- /// {
- /// "my_field": 123.34,
- /// "other_field": "Boston",
- /// }
- /// ```
- ///
- /// You can access column "my_field" with
- ///
- /// ```
- /// # use datafusion_expr::{col};
- /// let expr = col("c1")
- /// .field("my_field");
- /// assert_eq!(expr.display_name().unwrap(), "c1[my_field]");
- /// ```
- pub fn field(self, name: impl Into<String>) -> Self {
- Expr::GetIndexedField(GetIndexedField {
- expr: Box::new(self),
- field: GetFieldAccess::NamedStructField {
- name: ScalarValue::from(name.into()),
- },
- })
- }
-
- /// Return access to the element field. Example `expr["name"]`
- ///
- /// ## Example Access element 2 from column "c1"
- ///
- /// For example if column "c1" holds documents like this
- ///
- /// ```json
- /// [10, 20, 30, 40]
- /// ```
- ///
- /// You can access the value "30" with
- ///
- /// ```
- /// # use datafusion_expr::{lit, col, Expr};
- /// let expr = col("c1")
- /// .index(lit(3));
- /// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]");
- /// ```
- pub fn index(self, key: Expr) -> Self {
- Expr::GetIndexedField(GetIndexedField {
- expr: Box::new(self),
- field: GetFieldAccess::ListIndex { key: Box::new(key) },
- })
- }
-
- /// Return elements between `1` based `start` and `stop`, for
- /// example `expr[1:3]`
- ///
- /// ## Example: Access element 2, 3, 4 from column "c1"
- ///
- /// For example if column "c1" holds documents like this
- ///
- /// ```json
- /// [10, 20, 30, 40]
- /// ```
- ///
- /// You can access the value `[20, 30, 40]` with
- ///
- /// ```
- /// # use datafusion_expr::{lit, col};
- /// let expr = col("c1")
- /// .range(lit(2), lit(4));
- /// assert_eq!(expr.display_name().unwrap(),
"c1[Int32(2):Int32(4):Int64(1)]");
- /// ```
- pub fn range(self, start: Expr, stop: Expr) -> Self {
- Expr::GetIndexedField(GetIndexedField {
- expr: Box::new(self),
- field: GetFieldAccess::ListRange {
- start: Box::new(start),
- stop: Box::new(stop),
- stride: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))),
- },
- })
- }
-
#[deprecated(since = "39.0.0", note = "use try_as_col instead")]
pub fn try_into_col(&self) -> Result<Column> {
match self {
@@ -1362,7 +1273,6 @@ impl Expr {
| Expr::Cast(..)
| Expr::Column(..)
| Expr::Exists(..)
- | Expr::GetIndexedField(..)
| Expr::GroupingSet(..)
| Expr::InList(..)
| Expr::InSubquery(..)
@@ -1611,19 +1521,6 @@ impl fmt::Display for Expr {
Some(qualifier) => write!(f, "{qualifier}.*"),
None => write!(f, "*"),
},
- Expr::GetIndexedField(GetIndexedField { field, expr }) => match
field {
- GetFieldAccess::NamedStructField { name } => {
- write!(f, "({expr})[{name}]")
- }
- GetFieldAccess::ListIndex { key } => write!(f,
"({expr})[{key}]"),
- GetFieldAccess::ListRange {
- start,
- stop,
- stride,
- } => {
- write!(f, "({expr})[{start}:{stop}:{stride}]")
- }
- },
Expr::GroupingSet(grouping_sets) => match grouping_sets {
GroupingSet::Rollup(exprs) => {
// ROLLUP (c0, c1, c2)
@@ -1828,30 +1725,6 @@ fn write_name<W: Write>(w: &mut W, e: &Expr) ->
Result<()> {
Expr::ScalarSubquery(subquery) => {
w.write_str(subquery.subquery.schema().field(0).name().as_str())?;
}
- Expr::GetIndexedField(GetIndexedField { expr, field }) => {
- write_name(w, expr)?;
- match field {
- GetFieldAccess::NamedStructField { name } => write!(w,
"[{name}]")?,
- GetFieldAccess::ListIndex { key } => {
- w.write_str("[")?;
- write_name(w, key)?;
- w.write_str("]")?;
- }
- GetFieldAccess::ListRange {
- start,
- stop,
- stride,
- } => {
- w.write_str("[")?;
- write_name(w, start)?;
- w.write_str(":")?;
- write_name(w, stop)?;
- w.write_str(":")?;
- write_name(w, stride)?;
- w.write_str("]")?;
- }
- }
- }
Expr::Unnest(Unnest { expr }) => {
w.write_str("unnest(")?;
write_name(w, expr)?;
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index 2c08dbe042..8b7f30d245 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -17,11 +17,9 @@
use super::{Between, Expr, Like};
use crate::expr::{
- AggregateFunction, AggregateFunctionDefinition, Alias, BinaryExpr, Cast,
- GetFieldAccess, GetIndexedField, InList, InSubquery, Placeholder,
ScalarFunction,
- Sort, TryCast, Unnest, WindowFunction,
+ AggregateFunction, AggregateFunctionDefinition, Alias, BinaryExpr, Cast,
InList,
+ InSubquery, Placeholder, ScalarFunction, Sort, TryCast, Unnest,
WindowFunction,
};
-use crate::field_util::GetFieldAccessSchema;
use crate::type_coercion::binary::get_result_type;
use crate::type_coercion::functions::data_types_with_scalar_udf;
use crate::{utils, LogicalPlan, Projection, Subquery};
@@ -214,9 +212,6 @@ impl ExprSchemable for Expr {
// grouping sets do not really have a type and do not appear
in projections
Ok(DataType::Null)
}
- Expr::GetIndexedField(GetIndexedField { expr, field }) => {
- field_for_index(expr, field, schema).map(|x|
x.data_type().clone())
- }
}
}
@@ -320,16 +315,6 @@ impl ExprSchemable for Expr {
Expr::Wildcard { .. } => internal_err!(
"Wildcard expressions are not valid in a logical query plan"
),
- Expr::GetIndexedField(GetIndexedField { expr, field }) => {
- // If schema is nested, check if parent is nullable
- // if it is, return early
- if let Expr::Column(col) = expr.as_ref() {
- if input_schema.nullable(col)? {
- return Ok(true);
- }
- }
- field_for_index(expr, field, input_schema).map(|x|
x.is_nullable())
- }
Expr::GroupingSet(_) => {
// grouping sets do not really have the concept of nullable
and do not appear
// in projections
@@ -473,33 +458,6 @@ impl ExprSchemable for Expr {
}
}
-/// return the schema [`Field`] for the type referenced by `get_indexed_field`
-fn field_for_index(
- expr: &Expr,
- field: &GetFieldAccess,
- schema: &dyn ExprSchema,
-) -> Result<Field> {
- let expr_dt = expr.get_type(schema)?;
- match field {
- GetFieldAccess::NamedStructField { name } => {
- GetFieldAccessSchema::NamedStructField { name: name.clone() }
- }
- GetFieldAccess::ListIndex { key } => GetFieldAccessSchema::ListIndex {
- key_dt: key.get_type(schema)?,
- },
- GetFieldAccess::ListRange {
- start,
- stop,
- stride,
- } => GetFieldAccessSchema::ListRange {
- start_dt: start.get_type(schema)?,
- stop_dt: stop.get_type(schema)?,
- stride_dt: stride.get_type(schema)?,
- },
- }
- .get_accessed_field(&expr_dt)
-}
-
/// cast subquery in InSubquery/ScalarSubquery to a given type.
pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) ->
Result<Subquery> {
if subquery.subquery.schema().field(0).data_type() == cast_to_type {
@@ -536,7 +494,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type:
&DataType) -> Result<Subq
mod tests {
use super::*;
use crate::{col, lit};
- use arrow::datatypes::{Fields, SchemaBuilder};
+
use datafusion_common::{DFSchema, ScalarValue};
macro_rules! test_is_expr_nullable {
@@ -673,31 +631,6 @@ mod tests {
assert_eq!(&meta, expr.to_field(&schema).unwrap().1.metadata());
}
- #[test]
- fn test_nested_schema_nullability() {
- let mut builder = SchemaBuilder::new();
- builder.push(Field::new("foo", DataType::Int32, true));
- builder.push(Field::new(
- "parent",
- DataType::Struct(Fields::from(vec![Field::new(
- "child",
- DataType::Int64,
- false,
- )])),
- true,
- ));
- let schema = builder.finish();
-
- let dfschema = DFSchema::from_field_specific_qualified_schema(
- vec![Some("table_name".into()), None],
- &Arc::new(schema),
- )
- .unwrap();
-
- let expr = col("parent").field("child");
- assert!(expr.nullable(&dfschema).unwrap());
- }
-
#[derive(Debug)]
struct MockExprSchema {
nullable: bool,
diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs
index 1b3b5e8fcb..31ca4c4094 100644
--- a/datafusion/expr/src/tree_node.rs
+++ b/datafusion/expr/src/tree_node.rs
@@ -19,10 +19,10 @@
use crate::expr::{
AggregateFunction, AggregateFunctionDefinition, Alias, Between,
BinaryExpr, Case,
- Cast, GetIndexedField, GroupingSet, InList, InSubquery, Like, Placeholder,
- ScalarFunction, Sort, TryCast, Unnest, WindowFunction,
+ Cast, GroupingSet, InList, InSubquery, Like, Placeholder, ScalarFunction,
Sort,
+ TryCast, Unnest, WindowFunction,
};
-use crate::{Expr, GetFieldAccess};
+use crate::Expr;
use datafusion_common::tree_node::{
Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion,
@@ -51,16 +51,6 @@ impl TreeNode for Expr {
| Expr::TryCast(TryCast { expr, .. })
| Expr::Sort(Sort { expr, .. })
| Expr::InSubquery(InSubquery{ expr, .. }) => vec![expr.as_ref()],
- Expr::GetIndexedField(GetIndexedField { expr, field }) => {
- let expr = expr.as_ref();
- match field {
- GetFieldAccess::ListIndex {key} => vec![key.as_ref(),
expr],
- GetFieldAccess::ListRange {start, stop, stride} => {
- vec![start.as_ref(), stop.as_ref(),stride.as_ref(),
expr]
- }
- GetFieldAccess::NamedStructField { .. } => vec![expr],
- }
- }
Expr::GroupingSet(GroupingSet::Rollup(exprs))
| Expr::GroupingSet(GroupingSet::Cube(exprs)) =>
exprs.iter().collect(),
Expr::ScalarFunction (ScalarFunction{ args, .. } ) => {
@@ -374,11 +364,6 @@ impl TreeNode for Expr {
.update_data(|(new_expr, new_list)| {
Expr::InList(InList::new(new_expr, new_list, negated))
}),
- Expr::GetIndexedField(GetIndexedField { expr, field }) => {
- transform_box(expr, &mut f)?.update_data(|be| {
- Expr::GetIndexedField(GetIndexedField::new(be, field))
- })
- }
})
}
}
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 581e299cf9..0d25a3443f 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -303,7 +303,6 @@ pub fn expr_to_columns(expr: &Expr, accum: &mut
HashSet<Column>) -> Result<()> {
| Expr::InSubquery(_)
| Expr::ScalarSubquery(_)
| Expr::Wildcard { .. }
- | Expr::GetIndexedField { .. }
| Expr::Placeholder(_)
| Expr::OuterReferenceColumn { .. } => {}
}
diff --git a/datafusion/functions-array/src/expr_ext.rs
b/datafusion/functions-array/src/expr_ext.rs
new file mode 100644
index 0000000000..5505ef7468
--- /dev/null
+++ b/datafusion/functions-array/src/expr_ext.rs
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Extension methods for Expr.
+
+use datafusion_expr::Expr;
+
+use crate::extract::{array_element, array_slice};
+
+/// Return access to the element field. Example `expr["name"]`
+///
+/// ## Example Access element 2 from column "c1"
+///
+/// For example if column "c1" holds documents like this
+///
+/// ```json
+/// [10, 20, 30, 40]
+/// ```
+///
+/// You can access the value "30" with
+///
+/// ```
+/// # use datafusion_expr::{lit, col, Expr};
+/// # use datafusion_functions_array::expr_ext::IndexAccessor;
+/// let expr = col("c1")
+/// .index(lit(3));
+/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]");
+/// ```
+pub trait IndexAccessor {
+ fn index(self, key: Expr) -> Expr;
+}
+
+impl IndexAccessor for Expr {
+ fn index(self, key: Expr) -> Expr {
+ array_element(self, key)
+ }
+}
+
+/// Return elements between `1` based `start` and `stop`, for
+/// example `expr[1:3]`
+///
+/// ## Example: Access element 2, 3, 4 from column "c1"
+///
+/// For example if column "c1" holds documents like this
+///
+/// ```json
+/// [10, 20, 30, 40]
+/// ```
+///
+/// You can access the value `[20, 30, 40]` with
+///
+/// ```
+/// # use datafusion_expr::{lit, col};
+/// # use datafusion_functions_array::expr_ext::SliceAccessor;
+/// let expr = col("c1")
+/// .range(lit(2), lit(4));
+/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]");
+/// ```
+pub trait SliceAccessor {
+ fn range(self, start: Expr, stop: Expr) -> Expr;
+}
+
+impl SliceAccessor for Expr {
+ fn range(self, start: Expr, stop: Expr) -> Expr {
+ array_slice(self, start, stop, None)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use datafusion_expr::{col, lit};
+
+ #[test]
+ fn test_index() {
+ let expr1 = col("a").index(lit(1));
+ let expr2 = array_element(col("a"), lit(1));
+ assert_eq!(expr1, expr2);
+ }
+
+ #[test]
+ fn test_range() {
+ let expr1 = col("a").range(lit(1), lit(2));
+ let expr2 = array_slice(col("a"), lit(1), lit(2), None);
+ assert_eq!(expr1, expr2);
+ }
+}
diff --git a/datafusion/functions-array/src/extract.rs
b/datafusion/functions-array/src/extract.rs
index 152e5f3c4b..a12cdc20df 100644
--- a/datafusion/functions-array/src/extract.rs
+++ b/datafusion/functions-array/src/extract.rs
@@ -97,6 +97,11 @@ impl ScalarUDFImpl for ArrayElement {
"array_element"
}
+ fn display_name(&self, args: &[Expr]) -> Result<String> {
+ let args_name: Vec<String> = args.iter().map(|e|
e.to_string()).collect();
+ Ok(format!("{}[{}]", args_name[0], args_name[1]))
+ }
+
fn signature(&self) -> &Signature {
&self.signature
}
@@ -245,6 +250,12 @@ impl ScalarUDFImpl for ArraySlice {
fn as_any(&self) -> &dyn Any {
self
}
+
+ fn display_name(&self, args: &[Expr]) -> Result<String> {
+ let args_name: Vec<String> = args.iter().map(|e|
e.to_string()).collect();
+ Ok(format!("{}[{}]", args_name[0], args_name[1..].join(":")))
+ }
+
fn name(&self) -> &str {
"array_slice"
}
diff --git a/datafusion/functions-array/src/lib.rs
b/datafusion/functions-array/src/lib.rs
index 5914736773..93be8bd790 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -34,6 +34,7 @@ pub mod concat;
pub mod dimension;
pub mod empty;
pub mod except;
+pub mod expr_ext;
pub mod extract;
pub mod flatten;
pub mod length;
diff --git a/datafusion/functions/src/core/expr_ext.rs
b/datafusion/functions/src/core/expr_ext.rs
new file mode 100644
index 0000000000..d80df0f334
--- /dev/null
+++ b/datafusion/functions/src/core/expr_ext.rs
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Extension methods for Expr.
+
+use datafusion_expr::{Expr, Literal};
+
+use super::expr_fn::get_field;
+
+/// Return access to the named field. Example `expr["name"]`
+///
+/// ## Access field "my_field" from column "c1"
+///
+/// For example if column "c1" holds documents like this
+///
+/// ```json
+/// {
+/// "my_field": 123.34,
+/// "other_field": "Boston",
+/// }
+/// ```
+///
+/// You can access column "my_field" with
+///
+/// ```
+/// # use datafusion_expr::{col};
+/// # use datafusion_functions::core::expr_ext::FieldAccessor;
+/// let expr = col("c1")
+/// .field("my_field");
+/// assert_eq!(expr.display_name().unwrap(), "c1[my_field]");
+/// ```
+pub trait FieldAccessor {
+ fn field(self, name: impl Literal) -> Expr;
+}
+
+impl FieldAccessor for Expr {
+ fn field(self, name: impl Literal) -> Expr {
+ get_field(self, name)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use datafusion_expr::col;
+
+ #[test]
+ fn test_field() {
+ let expr1 = col("a").field("b");
+ let expr2 = get_field(col("a"), "b");
+ assert_eq!(expr1, expr2);
+ }
+}
diff --git a/datafusion/functions/src/core/mod.rs
b/datafusion/functions/src/core/mod.rs
index 491e45c35a..349d483a41 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -23,6 +23,7 @@ use std::sync::Arc;
pub mod arrow_cast;
pub mod arrowtypeof;
pub mod coalesce;
+pub mod expr_ext;
pub mod getfield;
pub mod named_struct;
pub mod nullif;
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 0f1f3ba7e7..3d08bd6c7e 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -450,7 +450,6 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
| Expr::IsNotNull(_)
| Expr::IsNull(_)
| Expr::Negative(_)
- | Expr::GetIndexedField(_)
| Expr::Cast(_)
| Expr::TryCast(_)
| Expr::Sort(_)
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs
b/datafusion/optimizer/src/optimize_projections/mod.rs
index 5a9705381d..a13ae4bd41 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -1037,29 +1037,6 @@ mod tests {
assert_optimized_plan_equal(plan, expected)
}
- #[test]
- fn test_struct_field_push_down() -> Result<()> {
- let schema = Arc::new(Schema::new(vec![
- Field::new("a", DataType::Int64, false),
- Field::new_struct(
- "s",
- vec![
- Field::new("x", DataType::Int64, false),
- Field::new("y", DataType::Int64, false),
- ],
- false,
- ),
- ]));
-
- let table_scan = table_scan(TableReference::none(), &schema,
None)?.build()?;
- let plan = LogicalPlanBuilder::from(table_scan)
- .project(vec![col("s").field("x")])?
- .build()?;
- let expected = "Projection: (?table?.s)[x]\
- \n TableScan: ?table? projection=[s]";
- assert_optimized_plan_equal(plan, expected)
- }
-
#[test]
fn test_neg_push_down() -> Result<()> {
let table_scan = test_table_scan()?;
diff --git a/datafusion/optimizer/src/push_down_filter.rs
b/datafusion/optimizer/src/push_down_filter.rs
index b684b54903..c88913c02b 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -237,7 +237,6 @@ fn can_evaluate_as_join_condition(predicate: &Expr) ->
Result<bool> {
| Expr::IsNotFalse(_)
| Expr::IsNotUnknown(_)
| Expr::Negative(_)
- | Expr::GetIndexedField(_)
| Expr::Between(_)
| Expr::Case(_)
| Expr::Cast(_)
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 455d659fb2..25504e5c78 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -616,8 +616,7 @@ impl<'a> ConstEvaluator<'a> {
| Expr::Case(_)
| Expr::Cast { .. }
| Expr::TryCast { .. }
- | Expr::InList { .. }
- | Expr::GetIndexedField { .. } => true,
+ | Expr::InList { .. } => true,
}
}
diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
index 9d8e3feceb..2ccf93e2d5 100644
--- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
+++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
@@ -39,7 +39,7 @@ use datafusion_expr::{expr::InList, lit, Between, BinaryExpr,
Expr};
/// See a full example in [`ExprSimplifier::with_guarantees()`].
///
/// [`ExprSimplifier::with_guarantees()`]:
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
-pub(crate) struct GuaranteeRewriter<'a> {
+pub struct GuaranteeRewriter<'a> {
guarantees: HashMap<&'a Expr, &'a NullableInterval>,
}
@@ -261,72 +261,6 @@ mod tests {
}
}
- #[test]
- fn test_inequalities_non_null_bounded() {
- let guarantees = vec![
- // x ∈ [1, 3] (not null)
- (
- col("x"),
- NullableInterval::NotNull {
- values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
- },
- ),
- // s.y ∈ [1, 3] (not null)
- (
- col("s").field("y"),
- NullableInterval::NotNull {
- values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
- },
- ),
- ];
-
- let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
-
- // (original_expr, expected_simplification)
- let simplified_cases = &[
- (col("x").lt(lit(0)), false),
- (col("s").field("y").lt(lit(0)), false),
- (col("x").lt_eq(lit(3)), true),
- (col("x").gt(lit(3)), false),
- (col("x").gt(lit(0)), true),
- (col("x").eq(lit(0)), false),
- (col("x").not_eq(lit(0)), true),
- (col("x").between(lit(0), lit(5)), true),
- (col("x").between(lit(5), lit(10)), false),
- (col("x").not_between(lit(0), lit(5)), false),
- (col("x").not_between(lit(5), lit(10)), true),
- (
- Expr::BinaryExpr(BinaryExpr {
- left: Box::new(col("x")),
- op: Operator::IsDistinctFrom,
- right: Box::new(lit(ScalarValue::Null)),
- }),
- true,
- ),
- (
- Expr::BinaryExpr(BinaryExpr {
- left: Box::new(col("x")),
- op: Operator::IsDistinctFrom,
- right: Box::new(lit(5)),
- }),
- true,
- ),
- ];
-
- validate_simplified_cases(&mut rewriter, simplified_cases);
-
- let unchanged_cases = &[
- col("x").gt(lit(2)),
- col("x").lt_eq(lit(2)),
- col("x").eq(lit(2)),
- col("x").not_eq(lit(2)),
- col("x").between(lit(3), lit(5)),
- col("x").not_between(lit(3), lit(10)),
- ];
-
- validate_unchanged_cases(&mut rewriter, unchanged_cases);
- }
-
#[test]
fn test_inequalities_non_null_unbounded() {
let guarantees = vec![
diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs
b/datafusion/optimizer/src/simplify_expressions/mod.rs
index d0399fef07..46c066c11c 100644
--- a/datafusion/optimizer/src/simplify_expressions/mod.rs
+++ b/datafusion/optimizer/src/simplify_expressions/mod.rs
@@ -30,3 +30,6 @@ pub use datafusion_expr::simplify::{SimplifyContext,
SimplifyInfo};
pub use expr_simplifier::*;
pub use simplify_exprs::*;
+
+// Export for test in datafusion/core/tests/optimizer_integration.rs
+pub use guarantees::GuaranteeRewriter;
diff --git a/datafusion/physical-expr/src/planner.rs
b/datafusion/physical-expr/src/planner.rs
index ab57a8e800..9e8561eb68 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -20,16 +20,13 @@ use std::sync::Arc;
use arrow::datatypes::Schema;
use datafusion_common::{
- exec_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
ScalarValue,
+ exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue,
};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction};
use datafusion_expr::var_provider::is_system_variables;
use datafusion_expr::var_provider::VarType;
-use datafusion_expr::{
- binary_expr, Between, BinaryExpr, Expr, GetFieldAccess, GetIndexedField,
Like,
- Operator, TryCast,
-};
+use datafusion_expr::{binary_expr, Between, BinaryExpr, Expr, Like, Operator,
TryCast};
use crate::scalar_function;
use crate::{
@@ -287,24 +284,6 @@ pub fn create_physical_expr(
input_dfschema,
execution_props,
)?),
- Expr::GetIndexedField(GetIndexedField { expr: _, field }) => match
field {
- GetFieldAccess::NamedStructField { name: _ } => {
- internal_err!(
- "NamedStructField should be rewritten in
OperatorToFunction"
- )
- }
- GetFieldAccess::ListIndex { key: _ } => {
- internal_err!("ListIndex should be rewritten in
OperatorToFunction")
- }
- GetFieldAccess::ListRange {
- start: _,
- stop: _,
- stride: _,
- } => {
- internal_err!("ListRange should be rewritten in
OperatorToFunction")
- }
- },
-
Expr::ScalarFunction(ScalarFunction { func, args }) => {
let physical_args =
create_physical_exprs(args, input_dfschema, execution_props)?;
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 8d69b0bad5..1c11e8e50d 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -397,7 +397,7 @@ message LogicalExprNode {
// Scalar UDF expressions
ScalarUDFExprNode scalar_udf_expr = 20;
- GetIndexedField get_indexed_field = 21;
+ // GetIndexedField get_indexed_field = 21;
GroupingSetNode grouping_set = 22;
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index 8df0aeb851..77ba0808fb 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -13763,9 +13763,6 @@ impl serde::Serialize for LogicalExprNode {
logical_expr_node::ExprType::ScalarUdfExpr(v) => {
struct_ser.serialize_field("scalarUdfExpr", v)?;
}
- logical_expr_node::ExprType::GetIndexedField(v) => {
- struct_ser.serialize_field("getIndexedField", v)?;
- }
logical_expr_node::ExprType::GroupingSet(v) => {
struct_ser.serialize_field("groupingSet", v)?;
}
@@ -13850,8 +13847,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
"aggregateUdfExpr",
"scalar_udf_expr",
"scalarUdfExpr",
- "get_indexed_field",
- "getIndexedField",
"grouping_set",
"groupingSet",
"cube",
@@ -13897,7 +13892,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
WindowExpr,
AggregateUdfExpr,
ScalarUdfExpr,
- GetIndexedField,
GroupingSet,
Cube,
Rollup,
@@ -13952,7 +13946,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
"windowExpr" | "window_expr" =>
Ok(GeneratedField::WindowExpr),
"aggregateUdfExpr" | "aggregate_udf_expr" =>
Ok(GeneratedField::AggregateUdfExpr),
"scalarUdfExpr" | "scalar_udf_expr" =>
Ok(GeneratedField::ScalarUdfExpr),
- "getIndexedField" | "get_indexed_field" =>
Ok(GeneratedField::GetIndexedField),
"groupingSet" | "grouping_set" =>
Ok(GeneratedField::GroupingSet),
"cube" => Ok(GeneratedField::Cube),
"rollup" => Ok(GeneratedField::Rollup),
@@ -14120,13 +14113,6 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
return
Err(serde::de::Error::duplicate_field("scalarUdfExpr"));
}
expr_type__ =
map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::ScalarUdfExpr)
-;
- }
- GeneratedField::GetIndexedField => {
- if expr_type__.is_some() {
- return
Err(serde::de::Error::duplicate_field("getIndexedField"));
- }
- expr_type__ =
map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::GetIndexedField)
;
}
GeneratedField::GroupingSet => {
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index b6b7687e6c..a175987f19 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -566,7 +566,7 @@ pub struct SubqueryAliasNode {
pub struct LogicalExprNode {
#[prost(
oneof = "logical_expr_node::ExprType",
- tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35"
+ tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19,
20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35"
)]
pub expr_type: ::core::option::Option<logical_expr_node::ExprType>,
}
@@ -622,8 +622,6 @@ pub mod logical_expr_node {
/// Scalar UDF expressions
#[prost(message, tag = "20")]
ScalarUdfExpr(super::ScalarUdfExprNode),
- #[prost(message, tag = "21")]
- GetIndexedField(::prost::alloc::boxed::Box<super::GetIndexedField>),
#[prost(message, tag = "22")]
GroupingSet(super::GroupingSetNode),
#[prost(message, tag = "23")]
@@ -701,24 +699,24 @@ pub struct NamedStructField {
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ListIndex {
- #[prost(message, optional, boxed, tag = "1")]
- pub key:
::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
+ #[prost(message, optional, tag = "1")]
+ pub key: ::core::option::Option<LogicalExprNode>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ListRange {
- #[prost(message, optional, boxed, tag = "1")]
- pub start:
::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
- #[prost(message, optional, boxed, tag = "2")]
- pub stop:
::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
- #[prost(message, optional, boxed, tag = "3")]
- pub stride:
::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
+ #[prost(message, optional, tag = "1")]
+ pub start: ::core::option::Option<LogicalExprNode>,
+ #[prost(message, optional, tag = "2")]
+ pub stop: ::core::option::Option<LogicalExprNode>,
+ #[prost(message, optional, tag = "3")]
+ pub stride: ::core::option::Option<LogicalExprNode>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GetIndexedField {
- #[prost(message, optional, boxed, tag = "1")]
- pub expr:
::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
+ #[prost(message, optional, tag = "1")]
+ pub expr: ::core::option::Option<LogicalExprNode>,
#[prost(oneof = "get_indexed_field::Field", tags = "2, 3, 4")]
pub field: ::core::option::Option<get_indexed_field::Field>,
}
@@ -730,9 +728,9 @@ pub mod get_indexed_field {
#[prost(message, tag = "2")]
NamedStructField(super::NamedStructField),
#[prost(message, tag = "3")]
- ListIndex(::prost::alloc::boxed::Box<super::ListIndex>),
+ ListIndex(super::ListIndex),
#[prost(message, tag = "4")]
- ListRange(::prost::alloc::boxed::Box<super::ListRange>),
+ ListRange(super::ListRange),
}
}
#[allow(clippy::derive_partial_eq_without_eq)]
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index 5df8eb59e1..b6f72f6773 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -42,7 +42,7 @@ use datafusion_expr::{
expr::{self, InList, Sort, WindowFunction},
logical_plan::{PlanType, StringifiedPlan},
AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, Case, Cast,
Expr,
- GetFieldAccess, GetIndexedField, GroupingSet,
+ GroupingSet,
GroupingSet::GroupingSets,
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame,
WindowFrameBound,
WindowFrameUnits,
@@ -924,63 +924,6 @@ pub fn parse_expr(
})
.expect("Binary expression could not be reduced to a single
expression."))
}
- ExprType::GetIndexedField(get_indexed_field) => {
- let expr = parse_required_expr(
- get_indexed_field.expr.as_deref(),
- registry,
- "expr",
- codec,
- )?;
- let field = match &get_indexed_field.field {
- Some(protobuf::get_indexed_field::Field::NamedStructField(
- named_struct_field,
- )) => GetFieldAccess::NamedStructField {
- name: named_struct_field
- .name
- .as_ref()
- .ok_or_else(|| Error::required("value"))?
- .try_into()?,
- },
-
Some(protobuf::get_indexed_field::Field::ListIndex(list_index)) => {
- GetFieldAccess::ListIndex {
- key: Box::new(parse_required_expr(
- list_index.key.as_deref(),
- registry,
- "key",
- codec,
- )?),
- }
- }
-
Some(protobuf::get_indexed_field::Field::ListRange(list_range)) => {
- GetFieldAccess::ListRange {
- start: Box::new(parse_required_expr(
- list_range.start.as_deref(),
- registry,
- "start",
- codec,
- )?),
- stop: Box::new(parse_required_expr(
- list_range.stop.as_deref(),
- registry,
- "stop",
- codec,
- )?),
- stride: Box::new(parse_required_expr(
- list_range.stride.as_deref(),
- registry,
- "stride",
- codec,
- )?),
- }
- }
- None => return Err(proto_error("Field must not be None")),
- };
-
- Ok(Expr::GetIndexedField(GetIndexedField::new(
- Box::new(expr),
- field,
- )))
- }
ExprType::Column(column) => Ok(Expr::Column(column.into())),
ExprType::Literal(literal) => {
let scalar_value: ScalarValue = literal.try_into()?;
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 52482c890a..91f7411e91 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -35,9 +35,8 @@ use datafusion_common::{
Column, Constraint, Constraints, DFSchema, DFSchemaRef, ScalarValue,
TableReference,
};
use datafusion_expr::expr::{
- self, AggregateFunctionDefinition, Alias, Between, BinaryExpr, Cast,
GetFieldAccess,
- GetIndexedField, GroupingSet, InList, Like, Placeholder, ScalarFunction,
Sort,
- Unnest,
+ self, AggregateFunctionDefinition, Alias, Between, BinaryExpr, Cast,
GroupingSet,
+ InList, Like, Placeholder, ScalarFunction, Sort, Unnest,
};
use datafusion_expr::{
logical_plan::PlanType, logical_plan::StringifiedPlan, AggregateFunction,
@@ -957,45 +956,6 @@ pub fn serialize_expr(
// see discussion in
https://github.com/apache/datafusion/issues/2565
return Err(Error::General("Proto serialization error:
Expr::ScalarSubquery(_) | Expr::InSubquery(_) | Expr::Exists { .. } |
Exp:OuterReferenceColumn not supported".to_string()));
}
- Expr::GetIndexedField(GetIndexedField { expr, field }) => {
- let field = match field {
- GetFieldAccess::NamedStructField { name } => {
- protobuf::get_indexed_field::Field::NamedStructField(
- protobuf::NamedStructField {
- name: Some(name.try_into()?),
- },
- )
- }
- GetFieldAccess::ListIndex { key } => {
- protobuf::get_indexed_field::Field::ListIndex(Box::new(
- protobuf::ListIndex {
- key: Some(Box::new(serialize_expr(key.as_ref(),
codec)?)),
- },
- ))
- }
- GetFieldAccess::ListRange {
- start,
- stop,
- stride,
- } => protobuf::get_indexed_field::Field::ListRange(Box::new(
- protobuf::ListRange {
- start: Some(Box::new(serialize_expr(start.as_ref(),
codec)?)),
- stop: Some(Box::new(serialize_expr(stop.as_ref(),
codec)?)),
- stride: Some(Box::new(serialize_expr(stride.as_ref(),
codec)?)),
- },
- )),
- };
-
- protobuf::LogicalExprNode {
- expr_type: Some(ExprType::GetIndexedField(Box::new(
- protobuf::GetIndexedField {
- expr: Some(Box::new(serialize_expr(expr.as_ref(),
codec)?)),
- field: Some(field),
- },
- ))),
- }
- }
-
Expr::GroupingSet(GroupingSet::Cube(exprs)) =>
protobuf::LogicalExprNode {
expr_type: Some(ExprType::Cube(CubeNode {
expr: serialize_exprs(exprs, codec)?,
diff --git a/datafusion/sql/src/unparser/expr.rs
b/datafusion/sql/src/unparser/expr.rs
index 463ec13817..6209b409fa 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -411,9 +411,6 @@ impl Unparser<'_> {
)
})
}
- Expr::GetIndexedField(_) => {
- not_impl_err!("Unsupported Expr conversion: {expr:?}")
- }
Expr::TryCast(TryCast { expr, data_type }) => {
let inner_expr = self.expr_to_sql(expr)?;
Ok(ast::Expr::TryCast {
diff --git a/datafusion/sqllogictest/test_files/projection.slt
b/datafusion/sqllogictest/test_files/projection.slt
index 843ab71091..3c8855e347 100644
--- a/datafusion/sqllogictest/test_files/projection.slt
+++ b/datafusion/sqllogictest/test_files/projection.slt
@@ -233,3 +233,20 @@ DROP TABLE test;
statement ok
DROP TABLE test_simple;
+
+## projection push down with Struct
+statement ok
+create table t as values (struct(1));
+
+query TT
+explain select column1.c0 from t;
+----
+logical_plan
+01)Projection: get_field(t.column1, Utf8("c0"))
+02)--TableScan: t projection=[column1]
+physical_plan
+01)ProjectionExec: expr=[get_field(column1@0, c0) as t.column1[c0]]
+02)--MemoryExec: partitions=1, partition_sizes=[1]
+
+statement ok
+drop table t;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]