This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 0651a5e36 Create new `datafusion-optimizer` crate for logical 
optimizer rules (#2675)
0651a5e36 is described below

commit 0651a5e3634776261daa7b7db5c772999112eb02
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jun 2 13:15:25 2022 -0600

    Create new `datafusion-optimizer` crate for logical optimizer rules (#2675)
---
 Cargo.toml                                         |  1 +
 datafusion/core/Cargo.toml                         |  1 +
 datafusion/core/src/optimizer/mod.rs               | 16 +++----
 datafusion/core/src/test/mod.rs                    | 18 +------
 datafusion/optimizer/Cargo.toml                    | 46 ++++++++++++++++++
 datafusion/optimizer/README.md                     | 26 ++++++++++
 .../src}/common_subexpr_eliminate.rs               |  2 +-
 .../src}/eliminate_filter.rs                       |  3 +-
 .../optimizer => optimizer/src}/eliminate_limit.rs |  2 +-
 .../src}/filter_push_down.rs                       |  5 +-
 .../src/optimizer/mod.rs => optimizer/src/lib.rs}  | 10 ++--
 .../optimizer => optimizer/src}/limit_push_down.rs |  2 +-
 .../src/optimizer => optimizer/src}/optimizer.rs   |  0
 .../src}/projection_push_down.rs                   |  3 +-
 .../src}/single_distinct_to_groupby.rs             |  6 +--
 .../src}/subquery_filter_to_join.rs                |  5 +-
 datafusion/optimizer/src/test/mod.rs               | 56 ++++++++++++++++++++++
 .../{core => optimizer}/src/test/user_defined.rs   |  8 ++--
 .../{core/src/optimizer => optimizer/src}/utils.rs |  2 +-
 19 files changed, 158 insertions(+), 54 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 1cc7aa6eb..7a349735b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ members = [
     "datafusion/data-access",
     "datafusion/expr",
     "datafusion/jit",
+    "datafusion/optimizer",
     "datafusion/physical-expr",
     "datafusion/proto",
     "datafusion/row",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 79a48cf15..a598b65d3 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -63,6 +63,7 @@ datafusion-common = { path = "../common", version = "8.0.0", 
features = ["parque
 datafusion-data-access = { path = "../data-access", version = "8.0.0" }
 datafusion-expr = { path = "../expr", version = "8.0.0" }
 datafusion-jit = { path = "../jit", version = "8.0.0", optional = true }
+datafusion-optimizer = { path = "../optimizer", version = "8.0.0" }
 datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" }
 datafusion-row = { path = "../row", version = "8.0.0" }
 datafusion-sql = { path = "../sql", version = "8.0.0" }
diff --git a/datafusion/core/src/optimizer/mod.rs 
b/datafusion/core/src/optimizer/mod.rs
index b274ab645..cf6412db9 100644
--- a/datafusion/core/src/optimizer/mod.rs
+++ b/datafusion/core/src/optimizer/mod.rs
@@ -19,14 +19,10 @@
 //! some simple rules to a logical plan, such as "Projection Push Down" and 
"Type Coercion".
 
 #![allow(clippy::module_inception)]
-pub mod common_subexpr_eliminate;
-pub mod eliminate_filter;
-pub mod eliminate_limit;
-pub mod filter_push_down;
-pub mod limit_push_down;
-pub mod optimizer;
-pub mod projection_push_down;
 pub mod simplify_expressions;
-pub mod single_distinct_to_groupby;
-pub mod subquery_filter_to_join;
-pub mod utils;
+
+pub use datafusion_optimizer::{
+    common_subexpr_eliminate, eliminate_filter, eliminate_limit, 
filter_push_down,
+    limit_push_down, optimizer, projection_push_down, 
single_distinct_to_groupby,
+    subquery_filter_to_join, utils,
+};
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index dd00a5028..304c1b376 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -24,7 +24,7 @@ use crate::error::Result;
 use crate::from_slice::FromSlice;
 use crate::logical_plan::LogicalPlan;
 use crate::physical_plan::file_format::{CsvExec, FileScanConfig};
-use crate::test_util::{aggr_test_schema, scan_empty};
+use crate::test_util::aggr_test_schema;
 use array::{Array, ArrayRef};
 use arrow::array::{self, DecimalBuilder, Int32Array};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
@@ -128,21 +128,6 @@ pub fn partitioned_csv_config(
     })
 }
 
-/// some tests share a common table with different names
-pub fn test_table_scan_with_name(name: &str) -> Result<LogicalPlan> {
-    let schema = Schema::new(vec![
-        Field::new("a", DataType::UInt32, false),
-        Field::new("b", DataType::UInt32, false),
-        Field::new("c", DataType::UInt32, false),
-    ]);
-    scan_empty(Some(name), &schema, None)?.build()
-}
-
-/// some tests share a common table
-pub fn test_table_scan() -> Result<LogicalPlan> {
-    test_table_scan_with_name("test")
-}
-
 pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
     let actual: Vec<String> = plan
         .schema()
@@ -259,5 +244,4 @@ fn create_batch(schema: &Schema) -> RecordBatch {
 
 pub mod exec;
 pub mod object_store;
-pub mod user_defined;
 pub mod variable;
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
new file mode 100644
index 000000000..4d024f4c5
--- /dev/null
+++ b/datafusion/optimizer/Cargo.toml
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-optimizer"
+description = "DataFusion Query Optimizer"
+version = "8.0.0"
+homepage = "https://github.com/apache/arrow-datafusion";
+repository = "https://github.com/apache/arrow-datafusion";
+readme = "README.md"
+authors = ["Apache Arrow <[email protected]>"]
+license = "Apache-2.0"
+keywords = [ "datafusion", "query", "optimizer" ]
+edition = "2021"
+rust-version = "1.59"
+
+[lib]
+name = "datafusion_optimizer"
+path = "src/lib.rs"
+
+[features]
+default = ["unicode_expressions"]
+unicode_expressions = []
+
+[dependencies]
+arrow = { version = "15.0.0", features = ["prettyprint"] }
+async-trait = "0.1.41"
+chrono = { version = "0.4", default-features = false }
+datafusion-common = { path = "../common", version = "8.0.0" }
+datafusion-expr = { path = "../expr", version = "8.0.0" }
+hashbrown = { version = "0.12", features = ["raw"] }
+log = "^0.4"
diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
new file mode 100644
index 000000000..39d28a8fa
--- /dev/null
+++ b/datafusion/optimizer/README.md
@@ -0,0 +1,26 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Query Optimizer Rules
+
+[DataFusion](df) is an extensible query execution framework, written in Rust, 
that uses Apache Arrow as its in-memory format.
+
+This crate is a submodule of DataFusion that provides query optimizer rules.
+
+[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs 
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
similarity index 99%
rename from datafusion/core/src/optimizer/common_subexpr_eliminate.rs
rename to datafusion/optimizer/src/common_subexpr_eliminate.rs
index 916e99713..bc635c215 100644
--- a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -17,7 +17,7 @@
 
 //! Eliminate common sub-expression.
 
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
 use arrow::datatypes::DataType;
 use datafusion_common::{DFField, DFSchema, Result};
 use datafusion_expr::{
diff --git a/datafusion/core/src/optimizer/eliminate_filter.rs 
b/datafusion/optimizer/src/eliminate_filter.rs
similarity index 98%
rename from datafusion/core/src/optimizer/eliminate_filter.rs
rename to datafusion/optimizer/src/eliminate_filter.rs
index 4bbc2c401..86cd3bb8d 100644
--- a/datafusion/core/src/optimizer/eliminate_filter.rs
+++ b/datafusion/optimizer/src/eliminate_filter.rs
@@ -18,6 +18,7 @@
 //! Optimizer rule to replace `where false` on a plan with an empty relation.
 //! This saves time in planning and executing the query.
 //! Note that this rule should be applied after simplify expressions optimizer 
rule.
+use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::{
     logical_plan::{EmptyRelation, Filter, LogicalPlan},
@@ -25,8 +26,6 @@ use datafusion_expr::{
     Expr,
 };
 
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
-
 /// Optimization rule that elimanate the scalar value (true/false) filter with 
an [LogicalPlan::EmptyRelation]
 #[derive(Default)]
 pub struct EliminateFilter;
diff --git a/datafusion/core/src/optimizer/eliminate_limit.rs 
b/datafusion/optimizer/src/eliminate_limit.rs
similarity index 98%
rename from datafusion/core/src/optimizer/eliminate_limit.rs
rename to datafusion/optimizer/src/eliminate_limit.rs
index 27e5fab17..f6d3b8472 100644
--- a/datafusion/core/src/optimizer/eliminate_limit.rs
+++ b/datafusion/optimizer/src/eliminate_limit.rs
@@ -17,7 +17,7 @@
 
 //! Optimizer rule to replace `LIMIT 0` on a plan with an empty relation.
 //! This saves time in planning and executing the query.
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::Result;
 use datafusion_expr::{
     logical_plan::{EmptyRelation, Limit, LogicalPlan},
diff --git a/datafusion/core/src/optimizer/filter_push_down.rs 
b/datafusion/optimizer/src/filter_push_down.rs
similarity index 99%
rename from datafusion/core/src/optimizer/filter_push_down.rs
rename to datafusion/optimizer/src/filter_push_down.rs
index a1a50a378..96abdc4c5 100644
--- a/datafusion/core/src/optimizer/filter_push_down.rs
+++ b/datafusion/optimizer/src/filter_push_down.rs
@@ -14,10 +14,7 @@
 
 //! Filter Push Down optimizer rule ensures that filters are applied as early 
as possible in the plan
 
-use crate::optimizer::{
-    optimizer::{OptimizerConfig, OptimizerRule},
-    utils,
-};
+use crate::{utils, OptimizerConfig, OptimizerRule};
 use datafusion_common::{Column, DFSchema, Result};
 use datafusion_expr::{
     col,
diff --git a/datafusion/core/src/optimizer/mod.rs 
b/datafusion/optimizer/src/lib.rs
similarity index 81%
copy from datafusion/core/src/optimizer/mod.rs
copy to datafusion/optimizer/src/lib.rs
index b274ab645..9a5130050 100644
--- a/datafusion/core/src/optimizer/mod.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -15,10 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module contains a query optimizer that operates against a logical 
plan and applies
-//! some simple rules to a logical plan, such as "Projection Push Down" and 
"Type Coercion".
-
-#![allow(clippy::module_inception)]
 pub mod common_subexpr_eliminate;
 pub mod eliminate_filter;
 pub mod eliminate_limit;
@@ -26,7 +22,11 @@ pub mod filter_push_down;
 pub mod limit_push_down;
 pub mod optimizer;
 pub mod projection_push_down;
-pub mod simplify_expressions;
 pub mod single_distinct_to_groupby;
 pub mod subquery_filter_to_join;
 pub mod utils;
+
+#[cfg(test)]
+pub mod test;
+
+pub use optimizer::{OptimizerConfig, OptimizerRule};
diff --git a/datafusion/core/src/optimizer/limit_push_down.rs 
b/datafusion/optimizer/src/limit_push_down.rs
similarity index 99%
rename from datafusion/core/src/optimizer/limit_push_down.rs
rename to datafusion/optimizer/src/limit_push_down.rs
index 41fb7cc5f..91f976001 100644
--- a/datafusion/core/src/optimizer/limit_push_down.rs
+++ b/datafusion/optimizer/src/limit_push_down.rs
@@ -17,7 +17,7 @@
 
 //! Optimizer rule to push down LIMIT in the query plan
 //! It will push down through projection, limits (taking the smaller limit)
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::{
     logical_plan::{
diff --git a/datafusion/core/src/optimizer/optimizer.rs 
b/datafusion/optimizer/src/optimizer.rs
similarity index 100%
rename from datafusion/core/src/optimizer/optimizer.rs
rename to datafusion/optimizer/src/optimizer.rs
diff --git a/datafusion/core/src/optimizer/projection_push_down.rs 
b/datafusion/optimizer/src/projection_push_down.rs
similarity index 99%
rename from datafusion/core/src/optimizer/projection_push_down.rs
rename to datafusion/optimizer/src/projection_push_down.rs
index b99b81f52..cd26d886c 100644
--- a/datafusion/core/src/optimizer/projection_push_down.rs
+++ b/datafusion/optimizer/src/projection_push_down.rs
@@ -18,7 +18,7 @@
 //! Projection Push Down optimizer rule ensures that only referenced columns 
are
 //! loaded into memory
 
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
 use arrow::datatypes::{Field, Schema};
 use arrow::error::Result as ArrowResult;
 use datafusion_common::{
@@ -530,7 +530,6 @@ mod tests {
 
     use super::*;
     use crate::test::*;
-    use crate::test_util::scan_empty;
     use arrow::datatypes::DataType;
     use datafusion_expr::{
         col, lit,
diff --git a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs 
b/datafusion/optimizer/src/single_distinct_to_groupby.rs
similarity index 98%
rename from datafusion/core/src/optimizer/single_distinct_to_groupby.rs
rename to datafusion/optimizer/src/single_distinct_to_groupby.rs
index d29a2477b..c508b9772 100644
--- a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -17,7 +17,7 @@
 
 //! single distinct to group by optimizer rule
 
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::{DFSchema, Result};
 use datafusion_expr::{
     col,
@@ -201,10 +201,10 @@ impl OptimizerRule for SingleDistinctToGroupBy {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_plan::aggregates;
     use crate::test::*;
     use datafusion_expr::{
         col, count, count_distinct, lit, 
logical_plan::builder::LogicalPlanBuilder, max,
+        AggregateFunction,
     };
 
     fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
@@ -314,7 +314,7 @@ mod tests {
                 vec![
                     count_distinct(col("b")),
                     Expr::AggregateFunction {
-                        fun: aggregates::AggregateFunction::Max,
+                        fun: AggregateFunction::Max,
                         distinct: true,
                         args: vec![col("b")],
                     },
diff --git a/datafusion/core/src/optimizer/subquery_filter_to_join.rs 
b/datafusion/optimizer/src/subquery_filter_to_join.rs
similarity index 99%
rename from datafusion/core/src/optimizer/subquery_filter_to_join.rs
rename to datafusion/optimizer/src/subquery_filter_to_join.rs
index bcbd9ae8a..f2621e190 100644
--- a/datafusion/core/src/optimizer/subquery_filter_to_join.rs
+++ b/datafusion/optimizer/src/subquery_filter_to_join.rs
@@ -26,10 +26,7 @@
 //!   WHERE t1.f IN (SELECT f FROM t2) OR t2.f = 'x'
 //! ```
 //! won't
-use crate::optimizer::{
-    optimizer::{OptimizerConfig, OptimizerRule},
-    utils,
-};
+use crate::{utils, OptimizerConfig, OptimizerRule};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::{
     logical_plan::{
diff --git a/datafusion/optimizer/src/test/mod.rs 
b/datafusion/optimizer/src/test/mod.rs
new file mode 100644
index 000000000..86e12bc30
--- /dev/null
+++ b/datafusion/optimizer/src/test/mod.rs
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema};
+use datafusion_common::Result;
+use datafusion_expr::{logical_plan::table_scan, LogicalPlan, 
LogicalPlanBuilder};
+
+pub mod user_defined;
+
+/// some tests share a common table with different names
+pub fn test_table_scan_with_name(name: &str) -> Result<LogicalPlan> {
+    let schema = Schema::new(vec![
+        Field::new("a", DataType::UInt32, false),
+        Field::new("b", DataType::UInt32, false),
+        Field::new("c", DataType::UInt32, false),
+    ]);
+    table_scan(Some(name), &schema, None)?.build()
+}
+
+/// some tests share a common table
+pub fn test_table_scan() -> Result<LogicalPlan> {
+    test_table_scan_with_name("test")
+}
+
+/// Scan an empty data source, mainly used in tests
+pub fn scan_empty(
+    name: Option<&str>,
+    table_schema: &Schema,
+    projection: Option<Vec<usize>>,
+) -> Result<LogicalPlanBuilder> {
+    table_scan(name, table_schema, projection)
+}
+
+pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
+    let actual: Vec<String> = plan
+        .schema()
+        .fields()
+        .iter()
+        .map(|f| f.name().clone())
+        .collect();
+    assert_eq!(actual, expected);
+}
diff --git a/datafusion/core/src/test/user_defined.rs 
b/datafusion/optimizer/src/test/user_defined.rs
similarity index 93%
rename from datafusion/core/src/test/user_defined.rs
rename to datafusion/optimizer/src/test/user_defined.rs
index 19ed0580b..c9993568c 100644
--- a/datafusion/core/src/test/user_defined.rs
+++ b/datafusion/optimizer/src/test/user_defined.rs
@@ -17,15 +17,17 @@
 
 //! Simple user defined logical plan node for testing
 
+use datafusion_common::DFSchemaRef;
+use datafusion_expr::{
+    logical_plan::{Extension, UserDefinedLogicalNode},
+    Expr, LogicalPlan,
+};
 use std::{
     any::Any,
     fmt::{self, Debug},
     sync::Arc,
 };
 
-use crate::logical_plan::plan::Extension;
-use crate::logical_plan::{DFSchemaRef, Expr, LogicalPlan, 
UserDefinedLogicalNode};
-
 /// Create a new user defined plan node, for testing
 pub fn new(input: LogicalPlan) -> LogicalPlan {
     let node = Arc::new(TestUserDefinedPlanNode { input });
diff --git a/datafusion/core/src/optimizer/utils.rs 
b/datafusion/optimizer/src/utils.rs
similarity index 99%
rename from datafusion/core/src/optimizer/utils.rs
rename to datafusion/optimizer/src/utils.rs
index 863536972..35414f5f8 100644
--- a/datafusion/core/src/optimizer/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -17,7 +17,7 @@
 
 //! Collection of utility functions that are leveraged by the query optimizer 
rules
 
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::{DataFusionError, Result, ScalarValue};
 use datafusion_expr::{
     and,

Reply via email to