This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 96ef1af62f Combine `Wildcard` and `QualifiedWildcard`, add
`wildcard()` expr fn (#8105)
96ef1af62f is described below
commit 96ef1af62fccc403ff7532545618b50da6ad6c9d
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Nov 12 06:29:32 2023 -0500
Combine `Wildcard` and `QualifiedWildcard`, add `wildcard()` expr fn (#8105)
---
datafusion/core/src/datasource/listing/helpers.rs | 3 +-
datafusion/core/src/physical_planner.rs | 5 +-
datafusion/core/tests/dataframe/mod.rs | 29 ++++---
datafusion/expr/src/expr.rs | 29 ++++---
datafusion/expr/src/expr_fn.rs | 13 +++
datafusion/expr/src/expr_schema.rs | 15 ++--
datafusion/expr/src/logical_plan/builder.rs | 13 ++-
datafusion/expr/src/tree_node/expr.rs | 8 +-
datafusion/expr/src/utils.rs | 3 +-
.../optimizer/src/analyzer/count_wildcard_rule.rs | 66 ++++++++-------
.../optimizer/src/analyzer/inline_table_scan.rs | 2 +-
.../optimizer/src/common_subexpr_eliminate.rs | 2 +-
datafusion/optimizer/src/push_down_filter.rs | 3 +-
.../src/simplify_expressions/expr_simplifier.rs | 3 +-
datafusion/proto/proto/datafusion.proto | 6 +-
datafusion/proto/src/generated/pbjson.rs | 94 +++++++++++++++++++++-
datafusion/proto/src/generated/prost.rs | 10 ++-
datafusion/proto/src/logical_plan/from_proto.rs | 4 +-
datafusion/proto/src/logical_plan/to_proto.rs | 11 +--
.../proto/tests/cases/roundtrip_logical_plan.rs | 12 ++-
datafusion/sql/src/expr/function.rs | 6 +-
21 files changed, 229 insertions(+), 108 deletions(-)
diff --git a/datafusion/core/src/datasource/listing/helpers.rs
b/datafusion/core/src/datasource/listing/helpers.rs
index 986e54ebbe..1d929f4bd4 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -120,8 +120,7 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr:
&Expr) -> bool {
| Expr::AggregateFunction { .. }
| Expr::Sort { .. }
| Expr::WindowFunction { .. }
- | Expr::Wildcard
- | Expr::QualifiedWildcard { .. }
+ | Expr::Wildcard { .. }
| Expr::Placeholder(_) => {
is_applicable = false;
VisitRecursion::Stop
diff --git a/datafusion/core/src/physical_planner.rs
b/datafusion/core/src/physical_planner.rs
index f941e88f3a..9f9b529ace 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -364,9 +364,8 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) ->
Result<String> {
Expr::Sort { .. } => {
internal_err!("Create physical name does not support sort
expression")
}
- Expr::Wildcard => internal_err!("Create physical name does not support
wildcard"),
- Expr::QualifiedWildcard { .. } => {
- internal_err!("Create physical name does not support qualified
wildcard")
+ Expr::Wildcard { .. } => {
+ internal_err!("Create physical name does not support wildcard")
}
Expr::Placeholder(_) => {
internal_err!("Create physical name does not support placeholder")
diff --git a/datafusion/core/tests/dataframe/mod.rs
b/datafusion/core/tests/dataframe/mod.rs
index 845d77581b..10f4574020 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -42,10 +42,9 @@ use datafusion::{assert_batches_eq,
assert_batches_sorted_eq};
use datafusion_common::{DataFusionError, ScalarValue, UnnestOptions};
use datafusion_execution::config::SessionConfig;
use datafusion_expr::expr::{GroupingSet, Sort};
-use datafusion_expr::Expr::Wildcard;
use datafusion_expr::{
array_agg, avg, col, count, exists, expr, in_subquery, lit, max,
out_ref_col,
- scalar_subquery, sum, AggregateFunction, Expr, ExprSchemable, WindowFrame,
+ scalar_subquery, sum, wildcard, AggregateFunction, Expr, ExprSchemable,
WindowFrame,
WindowFrameBound, WindowFrameUnits, WindowFunction,
};
use datafusion_physical_expr::var_provider::{VarProvider, VarType};
@@ -64,8 +63,8 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
let df_results = ctx
.table("t1")
.await?
- .aggregate(vec![col("b")], vec![count(Wildcard)])?
- .sort(vec![count(Wildcard).sort(true, false)])?
+ .aggregate(vec![col("b")], vec![count(wildcard())])?
+ .sort(vec![count(wildcard()).sort(true, false)])?
.explain(false, false)?
.collect()
.await?;
@@ -99,8 +98,8 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
Arc::new(
ctx.table("t2")
.await?
- .aggregate(vec![], vec![count(Expr::Wildcard)])?
- .select(vec![count(Expr::Wildcard)])?
+ .aggregate(vec![], vec![count(wildcard())])?
+ .select(vec![count(wildcard())])?
.into_unoptimized_plan(),
// Usually, into_optimized_plan() should be used here, but due
to
// https://github.com/apache/arrow-datafusion/issues/5771,
@@ -136,8 +135,8 @@ async fn test_count_wildcard_on_where_exist() -> Result<()>
{
.filter(exists(Arc::new(
ctx.table("t2")
.await?
- .aggregate(vec![], vec![count(Expr::Wildcard)])?
- .select(vec![count(Expr::Wildcard)])?
+ .aggregate(vec![], vec![count(wildcard())])?
+ .select(vec![count(wildcard())])?
.into_unoptimized_plan(),
// Usually, into_optimized_plan() should be used here, but due to
// https://github.com/apache/arrow-datafusion/issues/5771,
@@ -172,7 +171,7 @@ async fn test_count_wildcard_on_window() -> Result<()> {
.await?
.select(vec![Expr::WindowFunction(expr::WindowFunction::new(
WindowFunction::AggregateFunction(AggregateFunction::Count),
- vec![Expr::Wildcard],
+ vec![wildcard()],
vec![],
vec![Expr::Sort(Sort::new(Box::new(col("a")), false, true))],
WindowFrame {
@@ -202,17 +201,17 @@ async fn test_count_wildcard_on_aggregate() -> Result<()>
{
let sql_results = ctx
.sql("select count(*) from t1")
.await?
- .select(vec![count(Expr::Wildcard)])?
+ .select(vec![count(wildcard())])?
.explain(false, false)?
.collect()
.await?;
- // add `.select(vec![count(Expr::Wildcard)])?` to make sure we can analyze
all node instead of just top node.
+ // add `.select(vec![count(wildcard())])?` to make sure we can analyze all
node instead of just top node.
let df_results = ctx
.table("t1")
.await?
- .aggregate(vec![], vec![count(Expr::Wildcard)])?
- .select(vec![count(Expr::Wildcard)])?
+ .aggregate(vec![], vec![count(wildcard())])?
+ .select(vec![count(wildcard())])?
.explain(false, false)?
.collect()
.await?;
@@ -248,8 +247,8 @@ async fn test_count_wildcard_on_where_scalar_subquery() ->
Result<()> {
ctx.table("t2")
.await?
.filter(out_ref_col(DataType::UInt32,
"t1.a").eq(col("t2.a")))?
- .aggregate(vec![], vec![count(Wildcard)])?
- .select(vec![col(count(Wildcard).to_string())])?
+ .aggregate(vec![], vec![count(wildcard())])?
+ .select(vec![col(count(wildcard()).to_string())])?
.into_unoptimized_plan(),
))
.gt(lit(ScalarValue::UInt8(Some(0)))),
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 8929b21f44..4267f182bd 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -166,16 +166,12 @@ pub enum Expr {
InSubquery(InSubquery),
/// Scalar subquery
ScalarSubquery(Subquery),
- /// Represents a reference to all available fields.
+ /// Represents a reference to all available fields in a specific schema,
+ /// with an optional (schema) qualifier.
///
/// This expr has to be resolved to a list of columns before translating
logical
/// plan into physical plan.
- Wildcard,
- /// Represents a reference to all available fields in a specific schema.
- ///
- /// This expr has to be resolved to a list of columns before translating
logical
- /// plan into physical plan.
- QualifiedWildcard { qualifier: String },
+ Wildcard { qualifier: Option<String> },
/// List of grouping set expressions. Only valid in the context of an
aggregate
/// GROUP BY expression list
GroupingSet(GroupingSet),
@@ -729,7 +725,6 @@ impl Expr {
Expr::Negative(..) => "Negative",
Expr::Not(..) => "Not",
Expr::Placeholder(_) => "Placeholder",
- Expr::QualifiedWildcard { .. } => "QualifiedWildcard",
Expr::ScalarFunction(..) => "ScalarFunction",
Expr::ScalarSubquery { .. } => "ScalarSubquery",
Expr::ScalarUDF(..) => "ScalarUDF",
@@ -737,7 +732,7 @@ impl Expr {
Expr::Sort { .. } => "Sort",
Expr::TryCast { .. } => "TryCast",
Expr::WindowFunction { .. } => "WindowFunction",
- Expr::Wildcard => "Wildcard",
+ Expr::Wildcard { .. } => "Wildcard",
}
}
@@ -1292,8 +1287,10 @@ impl fmt::Display for Expr {
write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
}
}
- Expr::Wildcard => write!(f, "*"),
- Expr::QualifiedWildcard { qualifier } => write!(f,
"{qualifier}.*"),
+ Expr::Wildcard { qualifier } => match qualifier {
+ Some(qualifier) => write!(f, "{qualifier}.*"),
+ None => write!(f, "*"),
+ },
Expr::GetIndexedField(GetIndexedField { field, expr }) => match
field {
GetFieldAccess::NamedStructField { name } => {
write!(f, "({expr})[{name}]")
@@ -1613,10 +1610,12 @@ fn create_name(e: &Expr) -> Result<String> {
Expr::Sort { .. } => {
internal_err!("Create name does not support sort expression")
}
- Expr::Wildcard => Ok("*".to_string()),
- Expr::QualifiedWildcard { .. } => {
- internal_err!("Create name does not support qualified wildcard")
- }
+ Expr::Wildcard { qualifier } => match qualifier {
+ Some(qualifier) => internal_err!(
+ "Create name does not support qualified wildcard, got
{qualifier}"
+ ),
+ None => Ok("*".to_string()),
+ },
Expr::Placeholder(Placeholder { id, .. }) => Ok((*id).to_string()),
}
}
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 98cacc0392..0e0ad46da1 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -99,6 +99,19 @@ pub fn placeholder(id: impl Into<String>) -> Expr {
})
}
+/// Create an '*' [`Expr::Wildcard`] expression that matches all columns
+///
+/// # Example
+///
+/// ```rust
+/// # use datafusion_expr::{wildcard};
+/// let p = wildcard();
+/// assert_eq!(p.to_string(), "*")
+/// ```
+pub fn wildcard() -> Expr {
+ Expr::Wildcard { qualifier: None }
+}
+
/// Return a new expression `left <op> right`
pub fn binary_expr(left: Expr, op: Operator, right: Expr) -> Expr {
Expr::BinaryExpr(BinaryExpr::new(Box::new(left), op, Box::new(right)))
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index 2889fac8c1..2631708fb7 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -157,13 +157,13 @@ impl ExprSchemable for Expr {
plan_datafusion_err!("Placeholder type could not be
resolved")
})
}
- Expr::Wildcard => {
+ Expr::Wildcard { qualifier } => {
// Wildcard do not really have a type and do not appear in
projections
- Ok(DataType::Null)
+ match qualifier {
+ Some(_) => internal_err!("QualifiedWildcard expressions
are not valid in a logical query plan"),
+ None => Ok(DataType::Null)
+ }
}
- Expr::QualifiedWildcard { .. } => internal_err!(
- "QualifiedWildcard expressions are not valid in a logical
query plan"
- ),
Expr::GroupingSet(_) => {
// grouping sets do not really have a type and do not appear
in projections
Ok(DataType::Null)
@@ -270,12 +270,9 @@ impl ExprSchemable for Expr {
| Expr::SimilarTo(Like { expr, pattern, .. }) => {
Ok(expr.nullable(input_schema)? ||
pattern.nullable(input_schema)?)
}
- Expr::Wildcard => internal_err!(
+ Expr::Wildcard { .. } => internal_err!(
"Wildcard expressions are not valid in a logical query plan"
),
- Expr::QualifiedWildcard { .. } => internal_err!(
- "QualifiedWildcard expressions are not valid in a logical
query plan"
- ),
Expr::GetIndexedField(GetIndexedField { expr, field }) => {
field_for_index(expr, field, input_schema).map(|x|
x.is_nullable())
}
diff --git a/datafusion/expr/src/logical_plan/builder.rs
b/datafusion/expr/src/logical_plan/builder.rs
index 162a6a959e..4a30f4e223 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1287,11 +1287,16 @@ pub fn project(
for e in expr {
let e = e.into();
match e {
- Expr::Wildcard => {
+ Expr::Wildcard { qualifier: None } => {
projected_expr.extend(expand_wildcard(input_schema, &plan,
None)?)
}
- Expr::QualifiedWildcard { ref qualifier } => projected_expr
- .extend(expand_qualified_wildcard(qualifier, input_schema,
None)?),
+ Expr::Wildcard {
+ qualifier: Some(qualifier),
+ } => projected_expr.extend(expand_qualified_wildcard(
+ &qualifier,
+ input_schema,
+ None,
+ )?),
_ => projected_expr
.push(columnize_expr(normalize_col(e, &plan)?, input_schema)),
}
@@ -1590,7 +1595,7 @@ mod tests {
let plan = table_scan(Some("t1"), &employee_schema(), None)?
.join_using(t2, JoinType::Inner, vec!["id"])?
- .project(vec![Expr::Wildcard])?
+ .project(vec![Expr::Wildcard { qualifier: None }])?
.build()?;
// id column should only show up once in projection
diff --git a/datafusion/expr/src/tree_node/expr.rs
b/datafusion/expr/src/tree_node/expr.rs
index 764dcffbce..d6c14b8622 100644
--- a/datafusion/expr/src/tree_node/expr.rs
+++ b/datafusion/expr/src/tree_node/expr.rs
@@ -77,8 +77,7 @@ impl TreeNode for Expr {
| Expr::Literal(_)
| Expr::Exists { .. }
| Expr::ScalarSubquery(_)
- | Expr::Wildcard
- | Expr::QualifiedWildcard { .. }
+ | Expr::Wildcard {..}
| Expr::Placeholder (_) => vec![],
Expr::BinaryExpr(BinaryExpr { left, right, .. }) => {
vec![left.as_ref().clone(), right.as_ref().clone()]
@@ -350,10 +349,7 @@ impl TreeNode for Expr {
transform_vec(list, &mut transform)?,
negated,
)),
- Expr::Wildcard => Expr::Wildcard,
- Expr::QualifiedWildcard { qualifier } => {
- Expr::QualifiedWildcard { qualifier }
- }
+ Expr::Wildcard { qualifier } => Expr::Wildcard { qualifier },
Expr::GetIndexedField(GetIndexedField { expr, field }) => {
Expr::GetIndexedField(GetIndexedField::new(
transform_boxed(expr, &mut transform)?,
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 5fc5b5b3f9..a462cdb346 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -292,8 +292,7 @@ pub fn expr_to_columns(expr: &Expr, accum: &mut
HashSet<Column>) -> Result<()> {
| Expr::Exists { .. }
| Expr::InSubquery(_)
| Expr::ScalarSubquery(_)
- | Expr::Wildcard
- | Expr::QualifiedWildcard { .. }
+ | Expr::Wildcard { .. }
| Expr::GetIndexedField { .. }
| Expr::Placeholder(_)
| Expr::OuterReferenceColumn { .. } => {}
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 912ac069e0..b4de322f76 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -129,15 +129,17 @@ impl TreeNodeRewriter for CountWildcardRewriter {
order_by,
window_frame,
}) if args.len() == 1 => match args[0] {
- Expr::Wildcard => Expr::WindowFunction(expr::WindowFunction {
- fun: window_function::WindowFunction::AggregateFunction(
- aggregate_function::AggregateFunction::Count,
- ),
- args: vec![lit(COUNT_STAR_EXPANSION)],
- partition_by,
- order_by,
- window_frame,
- }),
+ Expr::Wildcard { qualifier: None } => {
+ Expr::WindowFunction(expr::WindowFunction {
+ fun:
window_function::WindowFunction::AggregateFunction(
+ aggregate_function::AggregateFunction::Count,
+ ),
+ args: vec![lit(COUNT_STAR_EXPANSION)],
+ partition_by,
+ order_by,
+ window_frame,
+ })
+ }
_ => old_expr,
},
@@ -148,13 +150,15 @@ impl TreeNodeRewriter for CountWildcardRewriter {
filter,
order_by,
}) if args.len() == 1 => match args[0] {
- Expr::Wildcard => Expr::AggregateFunction(AggregateFunction {
- fun: aggregate_function::AggregateFunction::Count,
- args: vec![lit(COUNT_STAR_EXPANSION)],
- distinct,
- filter,
- order_by,
- }),
+ Expr::Wildcard { qualifier: None } => {
+ Expr::AggregateFunction(AggregateFunction {
+ fun: aggregate_function::AggregateFunction::Count,
+ args: vec![lit(COUNT_STAR_EXPANSION)],
+ distinct,
+ filter,
+ order_by,
+ })
+ }
_ => old_expr,
},
@@ -221,8 +225,8 @@ mod tests {
use datafusion_expr::expr::Sort;
use datafusion_expr::{
col, count, exists, expr, in_subquery, lit,
logical_plan::LogicalPlanBuilder,
- max, out_ref_col, scalar_subquery, AggregateFunction, Expr,
WindowFrame,
- WindowFrameBound, WindowFrameUnits, WindowFunction,
+ max, out_ref_col, scalar_subquery, wildcard, AggregateFunction, Expr,
+ WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunction,
};
fn assert_plan_eq(plan: &LogicalPlan, expected: &str) -> Result<()> {
@@ -237,9 +241,9 @@ mod tests {
fn test_count_wildcard_on_sort() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(table_scan)
- .aggregate(vec![col("b")], vec![count(Expr::Wildcard)])?
- .project(vec![count(Expr::Wildcard)])?
- .sort(vec![count(Expr::Wildcard).sort(true, false)])?
+ .aggregate(vec![col("b")], vec![count(wildcard())])?
+ .project(vec![count(wildcard())])?
+ .sort(vec![count(wildcard()).sort(true, false)])?
.build()?;
let expected = "Sort: COUNT(*) ASC NULLS LAST [COUNT(*):Int64;N]\
\n Projection: COUNT(*) [COUNT(*):Int64;N]\
@@ -258,8 +262,8 @@ mod tests {
col("a"),
Arc::new(
LogicalPlanBuilder::from(table_scan_t2)
- .aggregate(Vec::<Expr>::new(),
vec![count(Expr::Wildcard)])?
- .project(vec![count(Expr::Wildcard)])?
+ .aggregate(Vec::<Expr>::new(),
vec![count(wildcard())])?
+ .project(vec![count(wildcard())])?
.build()?,
),
))?
@@ -282,8 +286,8 @@ mod tests {
let plan = LogicalPlanBuilder::from(table_scan_t1)
.filter(exists(Arc::new(
LogicalPlanBuilder::from(table_scan_t2)
- .aggregate(Vec::<Expr>::new(),
vec![count(Expr::Wildcard)])?
- .project(vec![count(Expr::Wildcard)])?
+ .aggregate(Vec::<Expr>::new(), vec![count(wildcard())])?
+ .project(vec![count(wildcard())])?
.build()?,
)))?
.build()?;
@@ -336,7 +340,7 @@ mod tests {
let plan = LogicalPlanBuilder::from(table_scan)
.window(vec![Expr::WindowFunction(expr::WindowFunction::new(
WindowFunction::AggregateFunction(AggregateFunction::Count),
- vec![Expr::Wildcard],
+ vec![wildcard()],
vec![],
vec![Expr::Sort(Sort::new(Box::new(col("a")), false, true))],
WindowFrame {
@@ -347,7 +351,7 @@ mod tests {
end_bound:
WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
},
))])?
- .project(vec![count(Expr::Wildcard)])?
+ .project(vec![count(wildcard())])?
.build()?;
let expected = "Projection: COUNT(UInt8(1)) AS COUNT(*)
[COUNT(*):Int64;N]\
@@ -360,8 +364,8 @@ mod tests {
fn test_count_wildcard_on_aggregate() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(table_scan)
- .aggregate(Vec::<Expr>::new(), vec![count(Expr::Wildcard)])?
- .project(vec![count(Expr::Wildcard)])?
+ .aggregate(Vec::<Expr>::new(), vec![count(wildcard())])?
+ .project(vec![count(wildcard())])?
.build()?;
let expected = "Projection: COUNT(*) [COUNT(*):Int64;N]\
@@ -374,8 +378,8 @@ mod tests {
fn test_count_wildcard_on_nesting() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(table_scan)
- .aggregate(Vec::<Expr>::new(), vec![max(count(Expr::Wildcard))])?
- .project(vec![count(Expr::Wildcard)])?
+ .aggregate(Vec::<Expr>::new(), vec![max(count(wildcard()))])?
+ .project(vec![count(wildcard())])?
.build()?;
let expected = "Projection: COUNT(UInt8(1)) AS COUNT(*)
[COUNT(*):Int64;N]\
diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs
b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
index 3d0dabdd37..90af7aec82 100644
--- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs
+++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
@@ -126,7 +126,7 @@ fn generate_projection_expr(
));
}
} else {
- exprs.push(Expr::Wildcard);
+ exprs.push(Expr::Wildcard { qualifier: None });
}
Ok(exprs)
}
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 68a6a5607a..8025402cce 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -514,7 +514,7 @@ impl ExprMask {
| Expr::ScalarVariable(..)
| Expr::Alias(..)
| Expr::Sort { .. }
- | Expr::Wildcard
+ | Expr::Wildcard { .. }
);
let is_aggr = matches!(
diff --git a/datafusion/optimizer/src/push_down_filter.rs
b/datafusion/optimizer/src/push_down_filter.rs
index ae986b3c84..05f4072e38 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -250,8 +250,7 @@ fn can_evaluate_as_join_condition(predicate: &Expr) ->
Result<bool> {
| Expr::AggregateFunction(_)
| Expr::WindowFunction(_)
| Expr::AggregateUDF { .. }
- | Expr::Wildcard
- | Expr::QualifiedWildcard { .. }
+ | Expr::Wildcard { .. }
| Expr::GroupingSet(_) => internal_err!("Unsupported predicate type"),
})?;
Ok(is_evaluate)
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 04fdcca0a9..c5a1aacce7 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -343,8 +343,7 @@ impl<'a> ConstEvaluator<'a> {
| Expr::WindowFunction { .. }
| Expr::Sort { .. }
| Expr::GroupingSet(_)
- | Expr::Wildcard
- | Expr::QualifiedWildcard { .. }
+ | Expr::Wildcard { .. }
| Expr::Placeholder(_) => false,
Expr::ScalarFunction(ScalarFunction { fun, .. }) => {
Self::volatility_ok(fun.volatility())
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index f9deca2f1e..9dcd55e731 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -363,7 +363,7 @@ message LogicalExprNode {
SortExprNode sort = 12;
NegativeNode negative = 13;
InListNode in_list = 14;
- bool wildcard = 15;
+ Wildcard wildcard = 15;
ScalarFunctionNode scalar_function = 16;
TryCastNode try_cast = 17;
@@ -399,6 +399,10 @@ message LogicalExprNode {
}
}
+message Wildcard {
+ optional string qualifier = 1;
+}
+
message PlaceholderNode {
string id = 1;
ArrowType data_type = 2;
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index 81f260c28b..948ad0c4ce 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -12705,7 +12705,8 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode {
if expr_type__.is_some() {
return
Err(serde::de::Error::duplicate_field("wildcard"));
}
- expr_type__ =
map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::Wildcard);
+ expr_type__ =
map_.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::Wildcard)
+;
}
GeneratedField::ScalarFunction => {
if expr_type__.is_some() {
@@ -25082,6 +25083,97 @@ impl<'de> serde::Deserialize<'de> for WhenThen {
deserializer.deserialize_struct("datafusion.WhenThen", FIELDS,
GeneratedVisitor)
}
}
+impl serde::Serialize for Wildcard {
+ #[allow(deprecated)]
+ fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok,
S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::SerializeStruct;
+ let mut len = 0;
+ if self.qualifier.is_some() {
+ len += 1;
+ }
+ let mut struct_ser =
serializer.serialize_struct("datafusion.Wildcard", len)?;
+ if let Some(v) = self.qualifier.as_ref() {
+ struct_ser.serialize_field("qualifier", v)?;
+ }
+ struct_ser.end()
+ }
+}
+impl<'de> serde::Deserialize<'de> for Wildcard {
+ #[allow(deprecated)]
+ fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ const FIELDS: &[&str] = &[
+ "qualifier",
+ ];
+
+ #[allow(clippy::enum_variant_names)]
+ enum GeneratedField {
+ Qualifier,
+ }
+ impl<'de> serde::Deserialize<'de> for GeneratedField {
+ fn deserialize<D>(deserializer: D) ->
std::result::Result<GeneratedField, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ struct GeneratedVisitor;
+
+ impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+ type Value = GeneratedField;
+
+ fn expecting(&self, formatter: &mut
std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(formatter, "expected one of: {:?}", &FIELDS)
+ }
+
+ #[allow(unused_variables)]
+ fn visit_str<E>(self, value: &str) ->
std::result::Result<GeneratedField, E>
+ where
+ E: serde::de::Error,
+ {
+ match value {
+ "qualifier" => Ok(GeneratedField::Qualifier),
+ _ => Err(serde::de::Error::unknown_field(value,
FIELDS)),
+ }
+ }
+ }
+ deserializer.deserialize_identifier(GeneratedVisitor)
+ }
+ }
+ struct GeneratedVisitor;
+ impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+ type Value = Wildcard;
+
+ fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) ->
std::fmt::Result {
+ formatter.write_str("struct datafusion.Wildcard")
+ }
+
+ fn visit_map<V>(self, mut map_: V) ->
std::result::Result<Wildcard, V::Error>
+ where
+ V: serde::de::MapAccess<'de>,
+ {
+ let mut qualifier__ = None;
+ while let Some(k) = map_.next_key()? {
+ match k {
+ GeneratedField::Qualifier => {
+ if qualifier__.is_some() {
+ return
Err(serde::de::Error::duplicate_field("qualifier"));
+ }
+ qualifier__ = map_.next_value()?;
+ }
+ }
+ }
+ Ok(Wildcard {
+ qualifier: qualifier__,
+ })
+ }
+ }
+ deserializer.deserialize_struct("datafusion.Wildcard", FIELDS,
GeneratedVisitor)
+ }
+}
impl serde::Serialize for WindowAggExecNode {
#[allow(deprecated)]
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok,
S::Error>
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index ae64c11b3b..93b0a05c31 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -569,8 +569,8 @@ pub mod logical_expr_node {
Negative(::prost::alloc::boxed::Box<super::NegativeNode>),
#[prost(message, tag = "14")]
InList(::prost::alloc::boxed::Box<super::InListNode>),
- #[prost(bool, tag = "15")]
- Wildcard(bool),
+ #[prost(message, tag = "15")]
+ Wildcard(super::Wildcard),
#[prost(message, tag = "16")]
ScalarFunction(super::ScalarFunctionNode),
#[prost(message, tag = "17")]
@@ -616,6 +616,12 @@ pub mod logical_expr_node {
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Wildcard {
+ #[prost(string, optional, tag = "1")]
+ pub qualifier: ::core::option::Option<::prost::alloc::string::String>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
pub struct PlaceholderNode {
#[prost(string, tag = "1")]
pub id: ::prost::alloc::string::String,
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index 31fffca3bb..b2b66693f7 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -1296,7 +1296,9 @@ pub fn parse_expr(
.collect::<Result<Vec<_>, _>>()?,
in_list.negated,
))),
- ExprType::Wildcard(_) => Ok(Expr::Wildcard),
+ ExprType::Wildcard(protobuf::Wildcard { qualifier }) =>
Ok(Expr::Wildcard {
+ qualifier: qualifier.clone(),
+ }),
ExprType::ScalarFunction(expr) => {
let scalar_function = protobuf::ScalarFunction::try_from(expr.fun)
.map_err(|_| Error::unknown("ScalarFunction", expr.fun))?;
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 803becbcae..e590731f58 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -960,8 +960,10 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode {
expr_type: Some(ExprType::InList(expr)),
}
}
- Expr::Wildcard => Self {
- expr_type: Some(ExprType::Wildcard(true)),
+ Expr::Wildcard { qualifier } => Self {
+ expr_type: Some(ExprType::Wildcard(protobuf::Wildcard {
+ qualifier: qualifier.clone(),
+ })),
},
Expr::ScalarSubquery(_)
| Expr::InSubquery(_)
@@ -1052,11 +1054,6 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode {
})),
}
}
-
- Expr::QualifiedWildcard { .. } => return Err(Error::General(
- "Proto serialization error: Expr::QualifiedWildcard { .. } not
supported"
- .to_string(),
- )),
};
Ok(expr_node)
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index ca801df337..97c553dc04 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -1147,7 +1147,17 @@ fn roundtrip_inlist() {
#[test]
fn roundtrip_wildcard() {
- let test_expr = Expr::Wildcard;
+ let test_expr = Expr::Wildcard { qualifier: None };
+
+ let ctx = SessionContext::new();
+ roundtrip_expr_test(test_expr, ctx);
+}
+
+#[test]
+fn roundtrip_qualified_wildcard() {
+ let test_expr = Expr::Wildcard {
+ qualifier: Some("foo".into()),
+ };
let ctx = SessionContext::new();
roundtrip_expr_test(test_expr, ctx);
diff --git a/datafusion/sql/src/expr/function.rs
b/datafusion/sql/src/expr/function.rs
index c58b8319ce..c77ef64718 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -212,11 +212,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
FunctionArg::Named {
name: _,
arg: FunctionArgExpr::Wildcard,
- } => Ok(Expr::Wildcard),
+ } => Ok(Expr::Wildcard { qualifier: None }),
FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => {
self.sql_expr_to_logical_expr(arg, schema, planner_context)
}
- FunctionArg::Unnamed(FunctionArgExpr::Wildcard) =>
Ok(Expr::Wildcard),
+ FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => {
+ Ok(Expr::Wildcard { qualifier: None })
+ }
_ => not_impl_err!("Unsupported qualified wildcard argument:
{sql:?}"),
}
}