This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 9ee055a3f5 Add some initial content about creating logical plans
(#7952)
9ee055a3f5 is described below
commit 9ee055a3f59ec08de3432a3ec3de8ff55d29975a
Author: Andy Grove <[email protected]>
AuthorDate: Sat Oct 28 09:45:16 2023 -0600
Add some initial content about creating logical plans (#7952)
---
Cargo.toml | 1 +
.../core/src/datasource/default_table_source.rs | 6 +-
datafusion/core/src/prelude.rs | 2 +-
dev/update_datafusion_versions.py | 1 +
docs/Cargo.toml | 32 +++++
.../library-user-guide/building-logical-plans.md | 129 ++++++++++++++++++++-
docs/src/lib.rs | 19 +++
docs/src/library_logical_plan.rs | 78 +++++++++++++
8 files changed, 264 insertions(+), 4 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 71088e7fc7..77e3c6038e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,6 +32,7 @@ members = [
"datafusion/substrait",
"datafusion/wasmtest",
"datafusion-examples",
+ "docs",
"test-utils",
"benchmarks",
]
diff --git a/datafusion/core/src/datasource/default_table_source.rs
b/datafusion/core/src/datasource/default_table_source.rs
index f93faa50a9..00a9c123ce 100644
--- a/datafusion/core/src/datasource/default_table_source.rs
+++ b/datafusion/core/src/datasource/default_table_source.rs
@@ -26,10 +26,12 @@ use arrow::datatypes::SchemaRef;
use datafusion_common::{internal_err, Constraints, DataFusionError};
use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource};
-/// DataFusion default table source, wrapping TableProvider
+/// DataFusion default table source, wrapping TableProvider.
///
/// This structure adapts a `TableProvider` (physical plan trait) to the
`TableSource`
-/// (logical plan trait)
+/// (logical plan trait) and is necessary because the logical plan is
contained in
+/// the `datafusion_expr` crate, and is not aware of table providers, which
exist in
+/// the core `datafusion` crate.
pub struct DefaultTableSource {
/// table provider
pub table_provider: Arc<dyn TableProvider>,
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index 7689468e5d..5cd8b3870f 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -13,7 +13,7 @@
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
-// under the License.pub},
+// under the License.
//! DataFusion "prelude" to simplify importing common types.
//!
diff --git a/dev/update_datafusion_versions.py
b/dev/update_datafusion_versions.py
index 7cbe39fdfb..19701b8136 100755
--- a/dev/update_datafusion_versions.py
+++ b/dev/update_datafusion_versions.py
@@ -43,6 +43,7 @@ crates = {
'datafusion-wasmtest': 'datafusion/wasmtest/Cargo.toml',
'datafusion-benchmarks': 'benchmarks/Cargo.toml',
'datafusion-examples': 'datafusion-examples/Cargo.toml',
+ 'datafusion-docs': 'docs/Cargo.toml',
}
def update_workspace_version(new_version: str):
diff --git a/docs/Cargo.toml b/docs/Cargo.toml
new file mode 100644
index 0000000000..9caa0bde36
--- /dev/null
+++ b/docs/Cargo.toml
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-docs-tests"
+description = "DataFusion Documentation Tests"
+publish = false
+version = { workspace = true }
+edition = { workspace = true }
+readme = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = "1.70"
+
+[dependencies]
+datafusion = { path = "../datafusion/core", version = "32.0.0",
default-features = false }
diff --git a/docs/source/library-user-guide/building-logical-plans.md
b/docs/source/library-user-guide/building-logical-plans.md
index 406f488112..fe922d8eae 100644
--- a/docs/source/library-user-guide/building-logical-plans.md
+++ b/docs/source/library-user-guide/building-logical-plans.md
@@ -19,4 +19,131 @@
# Building Logical Plans
-Coming Soon
+A logical plan is a structured representation of a database query that
describes the high-level operations and
+transformations needed to retrieve data from a database or data source. It
abstracts away specific implementation
+details and focuses on the logical flow of the query, including operations
like filtering, sorting, and joining tables.
+
+This logical plan serves as an intermediate step before generating an
optimized physical execution plan. This is
+explained in more detail in the [Query Planning and Execution Overview]
section of the [Architecture Guide].
+
+## Building Logical Plans Manually
+
+DataFusion's [LogicalPlan] is an enum containing variants representing all the
supported operators, and also
+contains an `Extension` variant that allows projects building on DataFusion to
add custom logical operators.
+
+It is possible to create logical plans by directly creating instances of the
[LogicalPlan] enum as follows, but is is
+much easier to use the [LogicalPlanBuilder], which is described in the next
section.
+
+Here is an example of building a logical plan directly:
+
+<!-- source for this example is in
datafusion_docs::library_logical_plan::plan_1 -->
+
+```rust
+// create a logical table source
+let schema = Schema::new(vec![
+ Field::new("id", DataType::Int32, true),
+ Field::new("name", DataType::Utf8, true),
+]);
+let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+// create a TableScan plan
+let projection = None; // optional projection
+let filters = vec![]; // optional filters to push down
+let fetch = None; // optional LIMIT
+let table_scan = LogicalPlan::TableScan(TableScan::try_new(
+ "person",
+ Arc::new(table_source),
+ projection,
+ filters,
+ fetch,
+)?);
+
+// create a Filter plan that evaluates `id > 500` that wraps the TableScan
+let filter_expr = col("id").gt(lit(500));
+let plan = LogicalPlan::Filter(Filter::try_new(filter_expr,
Arc::new(table_scan))?);
+
+// print the plan
+println!("{}", plan.display_indent_schema());
+```
+
+This example produces the following plan:
+
+```
+Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
+ TableScan: person [id:Int32;N, name:Utf8;N]
+```
+
+## Building Logical Plans with LogicalPlanBuilder
+
+DataFusion logical plans can be created using the [LogicalPlanBuilder] struct.
There is also a [DataFrame] API which is
+a higher-level API that delegates to [LogicalPlanBuilder].
+
+The following associated functions can be used to create a new builder:
+
+- `empty` - create an empty plan with no fields
+- `values` - create a plan from a set of literal values
+- `scan` - create a plan representing a table scan
+- `scan_with_filters` - create a plan representing a table scan with filters
+
+Once the builder is created, transformation methods can be called to declare
that further operations should be
+performed on the plan. Note that all we are doing at this stage is building up
the logical plan structure. No query
+execution will be performed.
+
+Here are some examples of transformation methods, but for a full list, refer
to the [LogicalPlanBuilder] API documentation.
+
+- `filter`
+- `limit`
+- `sort`
+- `distinct`
+- `join`
+
+The following example demonstrates building the same simple query plan as the
previous example, with a table scan followed by a filter.
+
+<!-- source for this example is in
datafusion_docs::library_logical_plan::plan_builder_1 -->
+
+```rust
+// create a logical table source
+let schema = Schema::new(vec![
+ Field::new("id", DataType::Int32, true),
+ Field::new("name", DataType::Utf8, true),
+]);
+let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+// optional projection
+let projection = None;
+
+// create a LogicalPlanBuilder for a table scan
+let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source),
projection)?;
+
+// perform a filter operation and build the plan
+let plan = builder
+ .filter(col("id").gt(lit(500)))? // WHERE id > 500
+ .build()?;
+
+// print the plan
+println!("{}", plan.display_indent_schema());
+```
+
+This example produces the following plan:
+
+```
+Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
+ TableScan: person [id:Int32;N, name:Utf8;N]
+```
+
+## Table Sources
+
+The previous example used a [LogicalTableSource], which is used for tests and
documentation in DataFusion, and is also
+suitable if you are using DataFusion to build logical plans but do not use
DataFusion's physical planner. However, if you
+want to use a [TableSource] that can be executed in DataFusion then you will
need to use [DefaultTableSource], which is a
+wrapper for a [TableProvider].
+
+[query planning and execution overview]:
https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview
+[architecture guide]:
https://docs.rs/datafusion/latest/datafusion/index.html#architecture
+[logicalplan]:
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html
+[logicalplanbuilder]:
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html
+[dataframe]: using-the-dataframe-api.md
+[logicaltablesource]:
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html
+[defaulttablesource]:
https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html
+[tableprovider]:
https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html
+[tablesource]:
https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html
diff --git a/docs/src/lib.rs b/docs/src/lib.rs
new file mode 100644
index 0000000000..f73132468e
--- /dev/null
+++ b/docs/src/lib.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(test)]
+mod library_logical_plan;
diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs
new file mode 100644
index 0000000000..3550039415
--- /dev/null
+++ b/docs/src/library_logical_plan.rs
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::error::Result;
+use datafusion::logical_expr::builder::LogicalTableSource;
+use datafusion::logical_expr::{Filter, LogicalPlan, LogicalPlanBuilder,
TableScan};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+#[test]
+fn plan_1() -> Result<()> {
+ // create a logical table source
+ let schema = Schema::new(vec![
+ Field::new("id", DataType::Int32, true),
+ Field::new("name", DataType::Utf8, true),
+ ]);
+ let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+ // create a TableScan plan
+ let projection = None; // optional projection
+ let filters = vec![]; // optional filters to push down
+ let fetch = None; // optional LIMIT
+ let table_scan = LogicalPlan::TableScan(TableScan::try_new(
+ "person",
+ Arc::new(table_source),
+ projection,
+ filters,
+ fetch,
+ )?);
+
+ // create a Filter plan that evaluates `id > 500` and wraps the TableScan
+ let filter_expr = col("id").gt(lit(500));
+ let plan = LogicalPlan::Filter(Filter::try_new(filter_expr,
Arc::new(table_scan))?);
+
+ // print the plan
+ println!("{}", plan.display_indent_schema());
+
+ Ok(())
+}
+
+#[test]
+fn plan_builder_1() -> Result<()> {
+ // create a logical table source
+ let schema = Schema::new(vec![
+ Field::new("id", DataType::Int32, true),
+ Field::new("name", DataType::Utf8, true),
+ ]);
+ let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+ // optional projection
+ let projection = None;
+
+ // create a LogicalPlanBuilder for a table scan
+ let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source),
projection)?;
+
+ // perform a filter that evaluates `id > 500`, and build the plan
+ let plan = builder.filter(col("id").gt(lit(500)))?.build()?;
+
+ // print the plan
+ println!("{}", plan.display_indent_schema());
+
+ Ok(())
+}