This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 0651a5e36 Create new `datafusion-optimizer` crate for logical
optimizer rules (#2675)
0651a5e36 is described below
commit 0651a5e3634776261daa7b7db5c772999112eb02
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jun 2 13:15:25 2022 -0600
Create new `datafusion-optimizer` crate for logical optimizer rules (#2675)
---
Cargo.toml | 1 +
datafusion/core/Cargo.toml | 1 +
datafusion/core/src/optimizer/mod.rs | 16 +++----
datafusion/core/src/test/mod.rs | 18 +------
datafusion/optimizer/Cargo.toml | 46 ++++++++++++++++++
datafusion/optimizer/README.md | 26 ++++++++++
.../src}/common_subexpr_eliminate.rs | 2 +-
.../src}/eliminate_filter.rs | 3 +-
.../optimizer => optimizer/src}/eliminate_limit.rs | 2 +-
.../src}/filter_push_down.rs | 5 +-
.../src/optimizer/mod.rs => optimizer/src/lib.rs} | 10 ++--
.../optimizer => optimizer/src}/limit_push_down.rs | 2 +-
.../src/optimizer => optimizer/src}/optimizer.rs | 0
.../src}/projection_push_down.rs | 3 +-
.../src}/single_distinct_to_groupby.rs | 6 +--
.../src}/subquery_filter_to_join.rs | 5 +-
datafusion/optimizer/src/test/mod.rs | 56 ++++++++++++++++++++++
.../{core => optimizer}/src/test/user_defined.rs | 8 ++--
.../{core/src/optimizer => optimizer/src}/utils.rs | 2 +-
19 files changed, 158 insertions(+), 54 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 1cc7aa6eb..7a349735b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ members = [
"datafusion/data-access",
"datafusion/expr",
"datafusion/jit",
+ "datafusion/optimizer",
"datafusion/physical-expr",
"datafusion/proto",
"datafusion/row",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 79a48cf15..a598b65d3 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -63,6 +63,7 @@ datafusion-common = { path = "../common", version = "8.0.0",
features = ["parque
datafusion-data-access = { path = "../data-access", version = "8.0.0" }
datafusion-expr = { path = "../expr", version = "8.0.0" }
datafusion-jit = { path = "../jit", version = "8.0.0", optional = true }
+datafusion-optimizer = { path = "../optimizer", version = "8.0.0" }
datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" }
datafusion-row = { path = "../row", version = "8.0.0" }
datafusion-sql = { path = "../sql", version = "8.0.0" }
diff --git a/datafusion/core/src/optimizer/mod.rs
b/datafusion/core/src/optimizer/mod.rs
index b274ab645..cf6412db9 100644
--- a/datafusion/core/src/optimizer/mod.rs
+++ b/datafusion/core/src/optimizer/mod.rs
@@ -19,14 +19,10 @@
//! some simple rules to a logical plan, such as "Projection Push Down" and
"Type Coercion".
#![allow(clippy::module_inception)]
-pub mod common_subexpr_eliminate;
-pub mod eliminate_filter;
-pub mod eliminate_limit;
-pub mod filter_push_down;
-pub mod limit_push_down;
-pub mod optimizer;
-pub mod projection_push_down;
pub mod simplify_expressions;
-pub mod single_distinct_to_groupby;
-pub mod subquery_filter_to_join;
-pub mod utils;
+
+pub use datafusion_optimizer::{
+ common_subexpr_eliminate, eliminate_filter, eliminate_limit,
filter_push_down,
+ limit_push_down, optimizer, projection_push_down,
single_distinct_to_groupby,
+ subquery_filter_to_join, utils,
+};
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index dd00a5028..304c1b376 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -24,7 +24,7 @@ use crate::error::Result;
use crate::from_slice::FromSlice;
use crate::logical_plan::LogicalPlan;
use crate::physical_plan::file_format::{CsvExec, FileScanConfig};
-use crate::test_util::{aggr_test_schema, scan_empty};
+use crate::test_util::aggr_test_schema;
use array::{Array, ArrayRef};
use arrow::array::{self, DecimalBuilder, Int32Array};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
@@ -128,21 +128,6 @@ pub fn partitioned_csv_config(
})
}
-/// some tests share a common table with different names
-pub fn test_table_scan_with_name(name: &str) -> Result<LogicalPlan> {
- let schema = Schema::new(vec![
- Field::new("a", DataType::UInt32, false),
- Field::new("b", DataType::UInt32, false),
- Field::new("c", DataType::UInt32, false),
- ]);
- scan_empty(Some(name), &schema, None)?.build()
-}
-
-/// some tests share a common table
-pub fn test_table_scan() -> Result<LogicalPlan> {
- test_table_scan_with_name("test")
-}
-
pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
let actual: Vec<String> = plan
.schema()
@@ -259,5 +244,4 @@ fn create_batch(schema: &Schema) -> RecordBatch {
pub mod exec;
pub mod object_store;
-pub mod user_defined;
pub mod variable;
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
new file mode 100644
index 000000000..4d024f4c5
--- /dev/null
+++ b/datafusion/optimizer/Cargo.toml
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-optimizer"
+description = "DataFusion Query Optimizer"
+version = "8.0.0"
+homepage = "https://github.com/apache/arrow-datafusion"
+repository = "https://github.com/apache/arrow-datafusion"
+readme = "README.md"
+authors = ["Apache Arrow <[email protected]>"]
+license = "Apache-2.0"
+keywords = [ "datafusion", "query", "optimizer" ]
+edition = "2021"
+rust-version = "1.59"
+
+[lib]
+name = "datafusion_optimizer"
+path = "src/lib.rs"
+
+[features]
+default = ["unicode_expressions"]
+unicode_expressions = []
+
+[dependencies]
+arrow = { version = "15.0.0", features = ["prettyprint"] }
+async-trait = "0.1.41"
+chrono = { version = "0.4", default-features = false }
+datafusion-common = { path = "../common", version = "8.0.0" }
+datafusion-expr = { path = "../expr", version = "8.0.0" }
+hashbrown = { version = "0.12", features = ["raw"] }
+log = "^0.4"
diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
new file mode 100644
index 000000000..39d28a8fa
--- /dev/null
+++ b/datafusion/optimizer/README.md
@@ -0,0 +1,26 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# DataFusion Query Optimizer Rules
+
+[DataFusion](df) is an extensible query execution framework, written in Rust,
that uses Apache Arrow as its in-memory format.
+
+This crate is a submodule of DataFusion that provides query optimizer rules.
+
+[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
similarity index 99%
rename from datafusion/core/src/optimizer/common_subexpr_eliminate.rs
rename to datafusion/optimizer/src/common_subexpr_eliminate.rs
index 916e99713..bc635c215 100644
--- a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -17,7 +17,7 @@
//! Eliminate common sub-expression.
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
use arrow::datatypes::DataType;
use datafusion_common::{DFField, DFSchema, Result};
use datafusion_expr::{
diff --git a/datafusion/core/src/optimizer/eliminate_filter.rs
b/datafusion/optimizer/src/eliminate_filter.rs
similarity index 98%
rename from datafusion/core/src/optimizer/eliminate_filter.rs
rename to datafusion/optimizer/src/eliminate_filter.rs
index 4bbc2c401..86cd3bb8d 100644
--- a/datafusion/core/src/optimizer/eliminate_filter.rs
+++ b/datafusion/optimizer/src/eliminate_filter.rs
@@ -18,6 +18,7 @@
//! Optimizer rule to replace `where false` on a plan with an empty relation.
//! This saves time in planning and executing the query.
//! Note that this rule should be applied after simplify expressions optimizer
rule.
+use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{Result, ScalarValue};
use datafusion_expr::{
logical_plan::{EmptyRelation, Filter, LogicalPlan},
@@ -25,8 +26,6 @@ use datafusion_expr::{
Expr,
};
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
-
/// Optimization rule that elimanate the scalar value (true/false) filter with
an [LogicalPlan::EmptyRelation]
#[derive(Default)]
pub struct EliminateFilter;
diff --git a/datafusion/core/src/optimizer/eliminate_limit.rs
b/datafusion/optimizer/src/eliminate_limit.rs
similarity index 98%
rename from datafusion/core/src/optimizer/eliminate_limit.rs
rename to datafusion/optimizer/src/eliminate_limit.rs
index 27e5fab17..f6d3b8472 100644
--- a/datafusion/core/src/optimizer/eliminate_limit.rs
+++ b/datafusion/optimizer/src/eliminate_limit.rs
@@ -17,7 +17,7 @@
//! Optimizer rule to replace `LIMIT 0` on a plan with an empty relation.
//! This saves time in planning and executing the query.
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::Result;
use datafusion_expr::{
logical_plan::{EmptyRelation, Limit, LogicalPlan},
diff --git a/datafusion/core/src/optimizer/filter_push_down.rs
b/datafusion/optimizer/src/filter_push_down.rs
similarity index 99%
rename from datafusion/core/src/optimizer/filter_push_down.rs
rename to datafusion/optimizer/src/filter_push_down.rs
index a1a50a378..96abdc4c5 100644
--- a/datafusion/core/src/optimizer/filter_push_down.rs
+++ b/datafusion/optimizer/src/filter_push_down.rs
@@ -14,10 +14,7 @@
//! Filter Push Down optimizer rule ensures that filters are applied as early
as possible in the plan
-use crate::optimizer::{
- optimizer::{OptimizerConfig, OptimizerRule},
- utils,
-};
+use crate::{utils, OptimizerConfig, OptimizerRule};
use datafusion_common::{Column, DFSchema, Result};
use datafusion_expr::{
col,
diff --git a/datafusion/core/src/optimizer/mod.rs
b/datafusion/optimizer/src/lib.rs
similarity index 81%
copy from datafusion/core/src/optimizer/mod.rs
copy to datafusion/optimizer/src/lib.rs
index b274ab645..9a5130050 100644
--- a/datafusion/core/src/optimizer/mod.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -15,10 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-//! This module contains a query optimizer that operates against a logical
plan and applies
-//! some simple rules to a logical plan, such as "Projection Push Down" and
"Type Coercion".
-
-#![allow(clippy::module_inception)]
pub mod common_subexpr_eliminate;
pub mod eliminate_filter;
pub mod eliminate_limit;
@@ -26,7 +22,11 @@ pub mod filter_push_down;
pub mod limit_push_down;
pub mod optimizer;
pub mod projection_push_down;
-pub mod simplify_expressions;
pub mod single_distinct_to_groupby;
pub mod subquery_filter_to_join;
pub mod utils;
+
+#[cfg(test)]
+pub mod test;
+
+pub use optimizer::{OptimizerConfig, OptimizerRule};
diff --git a/datafusion/core/src/optimizer/limit_push_down.rs
b/datafusion/optimizer/src/limit_push_down.rs
similarity index 99%
rename from datafusion/core/src/optimizer/limit_push_down.rs
rename to datafusion/optimizer/src/limit_push_down.rs
index 41fb7cc5f..91f976001 100644
--- a/datafusion/core/src/optimizer/limit_push_down.rs
+++ b/datafusion/optimizer/src/limit_push_down.rs
@@ -17,7 +17,7 @@
//! Optimizer rule to push down LIMIT in the query plan
//! It will push down through projection, limits (taking the smaller limit)
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{DataFusionError, Result};
use datafusion_expr::{
logical_plan::{
diff --git a/datafusion/core/src/optimizer/optimizer.rs
b/datafusion/optimizer/src/optimizer.rs
similarity index 100%
rename from datafusion/core/src/optimizer/optimizer.rs
rename to datafusion/optimizer/src/optimizer.rs
diff --git a/datafusion/core/src/optimizer/projection_push_down.rs
b/datafusion/optimizer/src/projection_push_down.rs
similarity index 99%
rename from datafusion/core/src/optimizer/projection_push_down.rs
rename to datafusion/optimizer/src/projection_push_down.rs
index b99b81f52..cd26d886c 100644
--- a/datafusion/core/src/optimizer/projection_push_down.rs
+++ b/datafusion/optimizer/src/projection_push_down.rs
@@ -18,7 +18,7 @@
//! Projection Push Down optimizer rule ensures that only referenced columns
are
//! loaded into memory
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
use arrow::datatypes::{Field, Schema};
use arrow::error::Result as ArrowResult;
use datafusion_common::{
@@ -530,7 +530,6 @@ mod tests {
use super::*;
use crate::test::*;
- use crate::test_util::scan_empty;
use arrow::datatypes::DataType;
use datafusion_expr::{
col, lit,
diff --git a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs
b/datafusion/optimizer/src/single_distinct_to_groupby.rs
similarity index 98%
rename from datafusion/core/src/optimizer/single_distinct_to_groupby.rs
rename to datafusion/optimizer/src/single_distinct_to_groupby.rs
index d29a2477b..c508b9772 100644
--- a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -17,7 +17,7 @@
//! single distinct to group by optimizer rule
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{DFSchema, Result};
use datafusion_expr::{
col,
@@ -201,10 +201,10 @@ impl OptimizerRule for SingleDistinctToGroupBy {
#[cfg(test)]
mod tests {
use super::*;
- use crate::physical_plan::aggregates;
use crate::test::*;
use datafusion_expr::{
col, count, count_distinct, lit,
logical_plan::builder::LogicalPlanBuilder, max,
+ AggregateFunction,
};
fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
@@ -314,7 +314,7 @@ mod tests {
vec![
count_distinct(col("b")),
Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::Max,
+ fun: AggregateFunction::Max,
distinct: true,
args: vec![col("b")],
},
diff --git a/datafusion/core/src/optimizer/subquery_filter_to_join.rs
b/datafusion/optimizer/src/subquery_filter_to_join.rs
similarity index 99%
rename from datafusion/core/src/optimizer/subquery_filter_to_join.rs
rename to datafusion/optimizer/src/subquery_filter_to_join.rs
index bcbd9ae8a..f2621e190 100644
--- a/datafusion/core/src/optimizer/subquery_filter_to_join.rs
+++ b/datafusion/optimizer/src/subquery_filter_to_join.rs
@@ -26,10 +26,7 @@
//! WHERE t1.f IN (SELECT f FROM t2) OR t2.f = 'x'
//! ```
//! won't
-use crate::optimizer::{
- optimizer::{OptimizerConfig, OptimizerRule},
- utils,
-};
+use crate::{utils, OptimizerConfig, OptimizerRule};
use datafusion_common::{DataFusionError, Result};
use datafusion_expr::{
logical_plan::{
diff --git a/datafusion/optimizer/src/test/mod.rs
b/datafusion/optimizer/src/test/mod.rs
new file mode 100644
index 000000000..86e12bc30
--- /dev/null
+++ b/datafusion/optimizer/src/test/mod.rs
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema};
+use datafusion_common::Result;
+use datafusion_expr::{logical_plan::table_scan, LogicalPlan,
LogicalPlanBuilder};
+
+pub mod user_defined;
+
+/// some tests share a common table with different names
+pub fn test_table_scan_with_name(name: &str) -> Result<LogicalPlan> {
+ let schema = Schema::new(vec![
+ Field::new("a", DataType::UInt32, false),
+ Field::new("b", DataType::UInt32, false),
+ Field::new("c", DataType::UInt32, false),
+ ]);
+ table_scan(Some(name), &schema, None)?.build()
+}
+
+/// some tests share a common table
+pub fn test_table_scan() -> Result<LogicalPlan> {
+ test_table_scan_with_name("test")
+}
+
+/// Scan an empty data source, mainly used in tests
+pub fn scan_empty(
+ name: Option<&str>,
+ table_schema: &Schema,
+ projection: Option<Vec<usize>>,
+) -> Result<LogicalPlanBuilder> {
+ table_scan(name, table_schema, projection)
+}
+
+pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
+ let actual: Vec<String> = plan
+ .schema()
+ .fields()
+ .iter()
+ .map(|f| f.name().clone())
+ .collect();
+ assert_eq!(actual, expected);
+}
diff --git a/datafusion/core/src/test/user_defined.rs
b/datafusion/optimizer/src/test/user_defined.rs
similarity index 93%
rename from datafusion/core/src/test/user_defined.rs
rename to datafusion/optimizer/src/test/user_defined.rs
index 19ed0580b..c9993568c 100644
--- a/datafusion/core/src/test/user_defined.rs
+++ b/datafusion/optimizer/src/test/user_defined.rs
@@ -17,15 +17,17 @@
//! Simple user defined logical plan node for testing
+use datafusion_common::DFSchemaRef;
+use datafusion_expr::{
+ logical_plan::{Extension, UserDefinedLogicalNode},
+ Expr, LogicalPlan,
+};
use std::{
any::Any,
fmt::{self, Debug},
sync::Arc,
};
-use crate::logical_plan::plan::Extension;
-use crate::logical_plan::{DFSchemaRef, Expr, LogicalPlan,
UserDefinedLogicalNode};
-
/// Create a new user defined plan node, for testing
pub fn new(input: LogicalPlan) -> LogicalPlan {
let node = Arc::new(TestUserDefinedPlanNode { input });
diff --git a/datafusion/core/src/optimizer/utils.rs
b/datafusion/optimizer/src/utils.rs
similarity index 99%
rename from datafusion/core/src/optimizer/utils.rs
rename to datafusion/optimizer/src/utils.rs
index 863536972..35414f5f8 100644
--- a/datafusion/core/src/optimizer/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -17,7 +17,7 @@
//! Collection of utility functions that are leveraged by the query optimizer
rules
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_expr::{
and,