This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new be6efbc93 [feat]:fast check has column (#5328)
be6efbc93 is described below
commit be6efbc93f04d4459e8f61345c830afc73d08fd7
Author: 苏小刚 <[email protected]>
AuthorDate: Fri Mar 3 19:46:03 2023 +0800
[feat]:fast check has column (#5328)
* impl has_column
* replace is_err by has_column
* add comment
* avoid str allocate in Err,
error should not be returned
* Update datafusion/common/src/dfschema.rs
Co-authored-by: Daniël Heres <[email protected]>
* Update datafusion/common/src/dfschema.rs
Co-authored-by: Daniël Heres <[email protected]>
* format
* avoid the collect()
* fix one more is_ok
---------
Co-authored-by: Daniël Heres <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/common/src/dfschema.rs | 21 +++++++++++++++++++++
datafusion/expr/src/logical_plan/builder.rs | 13 +++++--------
.../optimizer/src/decorrelate_where_exists.rs | 2 +-
datafusion/optimizer/src/decorrelate_where_in.rs | 2 +-
datafusion/optimizer/src/eliminate_outer_join.rs | 12 ++++++------
5 files changed, 34 insertions(+), 16 deletions(-)
diff --git a/datafusion/common/src/dfschema.rs
b/datafusion/common/src/dfschema.rs
index b49f07310..71b67175d 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -331,6 +331,27 @@ impl DFSchema {
}
}
+ /// Find if the field exists with the given name
+ pub fn has_column_with_unqualified_name(&self, name: &str) -> bool {
+ self.fields().iter().any(|field| field.name() == name)
+ }
+
+ /// Find if the field exists with the given qualified name
+ pub fn has_column_with_qualified_name(&self, qualifier: &str, name: &str)
-> bool {
+ self.fields().iter().any(|field| {
+ field.qualifier().map(|q| q.eq(qualifier)).unwrap_or(false)
+ && field.name() == name
+ })
+ }
+
+ /// Find if the field exists with the given qualified column
+ pub fn has_column(&self, column: &Column) -> bool {
+ match &column.relation {
+ Some(r) => self.has_column_with_qualified_name(r, &column.name),
+ None => self.has_column_with_unqualified_name(&column.name),
+ }
+ }
+
/// Check to see if unqualified field names matches field names in Arrow
schema
pub fn matches_arrow_schema(&self, arrow_schema: &Schema) -> bool {
self.fields
diff --git a/datafusion/expr/src/logical_plan/builder.rs
b/datafusion/expr/src/logical_plan/builder.rs
index e6ff15244..775818e99 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -377,10 +377,7 @@ impl LogicalPlanBuilder {
input,
mut expr,
schema: _,
- }) if missing_cols
- .iter()
- .all(|c| input.schema().field_from_column(c).is_ok()) =>
- {
+ }) if missing_cols.iter().all(|c| input.schema().has_column(c)) =>
{
let mut missing_exprs = missing_cols
.iter()
.map(|c| normalize_col(Expr::Column(c.clone()), &input))
@@ -723,13 +720,13 @@ impl LogicalPlanBuilder {
let mut join_on: Vec<(Expr, Expr)> = vec![];
let mut filters: Option<Expr> = None;
for (l, r) in &on {
- if self.plan.schema().field_from_column(l).is_ok()
- && right.schema().field_from_column(r).is_ok()
+ if self.plan.schema().has_column(l)
+ && right.schema().has_column(r)
&&
can_hash(self.plan.schema().field_from_column(l)?.data_type())
{
join_on.push((Expr::Column(l.clone()),
Expr::Column(r.clone())));
- } else if self.plan.schema().field_from_column(r).is_ok()
- && right.schema().field_from_column(l).is_ok()
+ } else if self.plan.schema().has_column(l)
+ && right.schema().has_column(r)
&&
can_hash(self.plan.schema().field_from_column(r)?.data_type())
{
join_on.push((Expr::Column(r.clone()),
Expr::Column(l.clone())));
diff --git a/datafusion/optimizer/src/decorrelate_where_exists.rs
b/datafusion/optimizer/src/decorrelate_where_exists.rs
index 72a68b312..023629b97 100644
--- a/datafusion/optimizer/src/decorrelate_where_exists.rs
+++ b/datafusion/optimizer/src/decorrelate_where_exists.rs
@@ -172,7 +172,7 @@ fn optimize_exists(
let using_cols: Vec<Column> = expr
.to_columns()?
.into_iter()
- .filter(|col| input_schema.field_from_column(col).is_ok())
+ .filter(|col| input_schema.has_column(col))
.collect::<_>();
cols.extend(using_cols);
diff --git a/datafusion/optimizer/src/decorrelate_where_in.rs
b/datafusion/optimizer/src/decorrelate_where_in.rs
index bc7009861..0aa3bac9e 100644
--- a/datafusion/optimizer/src/decorrelate_where_in.rs
+++ b/datafusion/optimizer/src/decorrelate_where_in.rs
@@ -166,7 +166,7 @@ fn optimize_where_in(
let using_cols: Vec<Column> = expr
.to_columns()?
.into_iter()
- .filter(|col| input_schema.field_from_column(col).is_ok())
+ .filter(|col| input_schema.has_column(col))
.collect::<_>();
cols.extend(using_cols);
diff --git a/datafusion/optimizer/src/eliminate_outer_join.rs
b/datafusion/optimizer/src/eliminate_outer_join.rs
index 66f22da21..8dfdfae03 100644
--- a/datafusion/optimizer/src/eliminate_outer_join.rs
+++ b/datafusion/optimizer/src/eliminate_outer_join.rs
@@ -84,10 +84,10 @@ impl OptimizerRule for EliminateOuterJoin {
let mut left_non_nullable = false;
let mut right_non_nullable = false;
for col in non_nullable_cols.iter() {
- if
join.left.schema().field_from_column(col).is_ok() {
+ if join.left.schema().has_column(col) {
left_non_nullable = true;
}
- if
join.right.schema().field_from_column(col).is_ok() {
+ if join.right.schema().has_column(col) {
right_non_nullable = true;
}
}
@@ -251,10 +251,10 @@ fn extract_non_nullable_columns(
{
for left_col in &left_non_nullable_cols {
for right_col in &right_non_nullable_cols {
- if (left_schema.field_from_column(left_col).is_ok()
- &&
left_schema.field_from_column(right_col).is_ok())
- ||
(right_schema.field_from_column(left_col).is_ok()
- &&
right_schema.field_from_column(right_col).is_ok())
+ if (left_schema.has_column(left_col)
+ && left_schema.has_column(right_col))
+ || (right_schema.has_column(left_col)
+ && right_schema.has_column(right_col))
{
non_nullable_cols.push(left_col.clone());
break;