This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 9ee055a3f5 Add some initial content about creating logical plans 
(#7952)
9ee055a3f5 is described below

commit 9ee055a3f59ec08de3432a3ec3de8ff55d29975a
Author: Andy Grove <[email protected]>
AuthorDate: Sat Oct 28 09:45:16 2023 -0600

    Add some initial content about creating logical plans (#7952)
---
 Cargo.toml                                         |   1 +
 .../core/src/datasource/default_table_source.rs    |   6 +-
 datafusion/core/src/prelude.rs                     |   2 +-
 dev/update_datafusion_versions.py                  |   1 +
 docs/Cargo.toml                                    |  32 +++++
 .../library-user-guide/building-logical-plans.md   | 129 ++++++++++++++++++++-
 docs/src/lib.rs                                    |  19 +++
 docs/src/library_logical_plan.rs                   |  78 +++++++++++++
 8 files changed, 264 insertions(+), 4 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 71088e7fc7..77e3c6038e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,6 +32,7 @@ members = [
     "datafusion/substrait",
     "datafusion/wasmtest",
     "datafusion-examples",
+    "docs",
     "test-utils",
     "benchmarks",
 ]
diff --git a/datafusion/core/src/datasource/default_table_source.rs 
b/datafusion/core/src/datasource/default_table_source.rs
index f93faa50a9..00a9c123ce 100644
--- a/datafusion/core/src/datasource/default_table_source.rs
+++ b/datafusion/core/src/datasource/default_table_source.rs
@@ -26,10 +26,12 @@ use arrow::datatypes::SchemaRef;
 use datafusion_common::{internal_err, Constraints, DataFusionError};
 use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource};
 
-/// DataFusion default table source, wrapping TableProvider
+/// DataFusion default table source, wrapping TableProvider.
 ///
 /// This structure adapts a `TableProvider` (physical plan trait) to the 
`TableSource`
-/// (logical plan trait)
+/// (logical plan trait) and is necessary because the logical plan is 
contained in
+/// the `datafusion_expr` crate, and is not aware of table providers, which 
exist in
+/// the core `datafusion` crate.
 pub struct DefaultTableSource {
     /// table provider
     pub table_provider: Arc<dyn TableProvider>,
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index 7689468e5d..5cd8b3870f 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -13,7 +13,7 @@
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
-// under the License.pub},
+// under the License.
 
 //! DataFusion "prelude" to simplify importing common types.
 //!
diff --git a/dev/update_datafusion_versions.py 
b/dev/update_datafusion_versions.py
index 7cbe39fdfb..19701b8136 100755
--- a/dev/update_datafusion_versions.py
+++ b/dev/update_datafusion_versions.py
@@ -43,6 +43,7 @@ crates = {
     'datafusion-wasmtest': 'datafusion/wasmtest/Cargo.toml',
     'datafusion-benchmarks': 'benchmarks/Cargo.toml',
     'datafusion-examples': 'datafusion-examples/Cargo.toml',
+    'datafusion-docs': 'docs/Cargo.toml',
 }
 
 def update_workspace_version(new_version: str):
diff --git a/docs/Cargo.toml b/docs/Cargo.toml
new file mode 100644
index 0000000000..9caa0bde36
--- /dev/null
+++ b/docs/Cargo.toml
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-docs-tests"
+description = "DataFusion Documentation Tests"
+publish = false
+version = { workspace = true }
+edition = { workspace = true }
+readme = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = "1.70"
+
+[dependencies]
+datafusion = { path = "../datafusion/core", version = "32.0.0", 
default-features = false }
diff --git a/docs/source/library-user-guide/building-logical-plans.md 
b/docs/source/library-user-guide/building-logical-plans.md
index 406f488112..fe922d8eae 100644
--- a/docs/source/library-user-guide/building-logical-plans.md
+++ b/docs/source/library-user-guide/building-logical-plans.md
@@ -19,4 +19,131 @@
 
 # Building Logical Plans
 
-Coming Soon
+A logical plan is a structured representation of a database query that 
describes the high-level operations and
+transformations needed to retrieve data from a database or data source. It 
abstracts away specific implementation
+details and focuses on the logical flow of the query, including operations 
like filtering, sorting, and joining tables.
+
+This logical plan serves as an intermediate step before generating an 
optimized physical execution plan. This is
+explained in more detail in the [Query Planning and Execution Overview] 
section of the [Architecture Guide].
+
+## Building Logical Plans Manually
+
+DataFusion's [LogicalPlan] is an enum containing variants representing all the 
supported operators, and also
+contains an `Extension` variant that allows projects building on DataFusion to 
add custom logical operators.
+
+It is possible to create logical plans by directly creating instances of the 
[LogicalPlan] enum as follows, but is is
+much easier to use the [LogicalPlanBuilder], which is described in the next 
section.
+
+Here is an example of building a logical plan directly:
+
+<!-- source for this example is in 
datafusion_docs::library_logical_plan::plan_1 -->
+
+```rust
+// create a logical table source
+let schema = Schema::new(vec![
+    Field::new("id", DataType::Int32, true),
+    Field::new("name", DataType::Utf8, true),
+]);
+let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+// create a TableScan plan
+let projection = None; // optional projection
+let filters = vec![]; // optional filters to push down
+let fetch = None; // optional LIMIT
+let table_scan = LogicalPlan::TableScan(TableScan::try_new(
+    "person",
+    Arc::new(table_source),
+    projection,
+    filters,
+    fetch,
+)?);
+
+// create a Filter plan that evaluates `id > 500` that wraps the TableScan
+let filter_expr = col("id").gt(lit(500));
+let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, 
Arc::new(table_scan))?);
+
+// print the plan
+println!("{}", plan.display_indent_schema());
+```
+
+This example produces the following plan:
+
+```
+Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
+  TableScan: person [id:Int32;N, name:Utf8;N]
+```
+
+## Building Logical Plans with LogicalPlanBuilder
+
+DataFusion logical plans can be created using the [LogicalPlanBuilder] struct. 
There is also a [DataFrame] API which is
+a higher-level API that delegates to [LogicalPlanBuilder].
+
+The following associated functions can be used to create a new builder:
+
+- `empty` - create an empty plan with no fields
+- `values` - create a plan from a set of literal values
+- `scan` - create a plan representing a table scan
+- `scan_with_filters` - create a plan representing a table scan with filters
+
+Once the builder is created, transformation methods can be called to declare 
that further operations should be
+performed on the plan. Note that all we are doing at this stage is building up 
the logical plan structure. No query
+execution will be performed.
+
+Here are some examples of transformation methods, but for a full list, refer 
to the [LogicalPlanBuilder] API documentation.
+
+- `filter`
+- `limit`
+- `sort`
+- `distinct`
+- `join`
+
+The following example demonstrates building the same simple query plan as the 
previous example, with a table scan followed by a filter.
+
+<!-- source for this example is in 
datafusion_docs::library_logical_plan::plan_builder_1 -->
+
+```rust
+// create a logical table source
+let schema = Schema::new(vec![
+    Field::new("id", DataType::Int32, true),
+    Field::new("name", DataType::Utf8, true),
+]);
+let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+// optional projection
+let projection = None;
+
+// create a LogicalPlanBuilder for a table scan
+let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), 
projection)?;
+
+// perform a filter operation and build the plan
+let plan = builder
+    .filter(col("id").gt(lit(500)))? // WHERE id > 500
+    .build()?;
+
+// print the plan
+println!("{}", plan.display_indent_schema());
+```
+
+This example produces the following plan:
+
+```
+Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
+  TableScan: person [id:Int32;N, name:Utf8;N]
+```
+
+## Table Sources
+
+The previous example used a [LogicalTableSource], which is used for tests and 
documentation in DataFusion, and is also
+suitable if you are using DataFusion to build logical plans but do not use 
DataFusion's physical planner. However, if you
+want to use a [TableSource] that can be executed in DataFusion then you will 
need to use [DefaultTableSource], which is a
+wrapper for a [TableProvider].
+
+[query planning and execution overview]: 
https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview
+[architecture guide]: 
https://docs.rs/datafusion/latest/datafusion/index.html#architecture
+[logicalplan]: 
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html
+[logicalplanbuilder]: 
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html
+[dataframe]: using-the-dataframe-api.md
+[logicaltablesource]: 
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html
+[defaulttablesource]: 
https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html
+[tableprovider]: 
https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html
+[tablesource]: 
https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html
diff --git a/docs/src/lib.rs b/docs/src/lib.rs
new file mode 100644
index 0000000000..f73132468e
--- /dev/null
+++ b/docs/src/lib.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(test)]
+mod library_logical_plan;
diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs
new file mode 100644
index 0000000000..3550039415
--- /dev/null
+++ b/docs/src/library_logical_plan.rs
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::error::Result;
+use datafusion::logical_expr::builder::LogicalTableSource;
+use datafusion::logical_expr::{Filter, LogicalPlan, LogicalPlanBuilder, 
TableScan};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+#[test]
+fn plan_1() -> Result<()> {
+    // create a logical table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+    // create a TableScan plan
+    let projection = None; // optional projection
+    let filters = vec![]; // optional filters to push down
+    let fetch = None; // optional LIMIT
+    let table_scan = LogicalPlan::TableScan(TableScan::try_new(
+        "person",
+        Arc::new(table_source),
+        projection,
+        filters,
+        fetch,
+    )?);
+
+    // create a Filter plan that evaluates `id > 500` and wraps the TableScan
+    let filter_expr = col("id").gt(lit(500));
+    let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, 
Arc::new(table_scan))?);
+
+    // print the plan
+    println!("{}", plan.display_indent_schema());
+
+    Ok(())
+}
+
+#[test]
+fn plan_builder_1() -> Result<()> {
+    // create a logical table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+    // optional projection
+    let projection = None;
+
+    // create a LogicalPlanBuilder for a table scan
+    let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), 
projection)?;
+
+    // perform a filter that evaluates `id > 500`, and build the plan
+    let plan = builder.filter(col("id").gt(lit(500)))?.build()?;
+
+    // print the plan
+    println!("{}", plan.display_indent_schema());
+
+    Ok(())
+}

Reply via email to