[arrow-datafusion] branch main updated: Minor: Move `project_schema` to `datafusion_common` (#7237)

jakevin Tue, 08 Aug 2023 22:15:07 -0700

This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 61329a04d9 Minor: Move `project_schema` to `datafusion_common` (#7237)
61329a04d9 is described below

commit 61329a04d98cc7cf2a8fea47e5e7c991906d9f48
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Aug 9 00:14:55 2023 -0500

    Minor: Move `project_schema` to `datafusion_common` (#7237)
---
 datafusion/common/src/lib.rs                       |  1 +
 datafusion/common/src/utils.rs                     | 42 +++++++++++++++++++++-
 datafusion/core/src/datasource/empty.rs            |  2 +-
 datafusion/core/src/datasource/listing/table.rs    |  4 +--
 datafusion/core/src/physical_plan/memory.rs        |  6 ++--
 datafusion/core/src/physical_plan/mod.rs           | 41 +--------------------
 datafusion/core/tests/custom_sources.rs            |  5 +--
 .../core/tests/custom_sources_cases/statistics.rs  |  6 ++--
 8 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index e6116029d6..b831d3d0ca 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -52,6 +52,7 @@ pub use scalar::{ScalarType, ScalarValue};
 pub use schema_reference::{OwnedSchemaReference, SchemaReference};
 pub use stats::{ColumnStatistics, Statistics};
 pub use table_reference::{OwnedTableReference, ResolvedTableReference, 
TableReference};
+pub use utils::project_schema;
 
 /// Downcast an Arrow Array to a concrete type, return an 
`DataFusionError::Internal` if the cast is
 /// not possible. In normal usage of DataFusion the downcast should always 
succeed.
diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs
index 9ab91bcdd8..54481672f9 100644
--- a/datafusion/common/src/utils.rs
+++ b/datafusion/common/src/utils.rs
@@ -21,7 +21,7 @@ use crate::{DataFusionError, Result, ScalarValue};
 use arrow::array::{ArrayRef, PrimitiveArray};
 use arrow::compute;
 use arrow::compute::{lexicographical_partition_ranges, SortColumn, 
SortOptions};
-use arrow::datatypes::UInt32Type;
+use arrow::datatypes::{SchemaRef, UInt32Type};
 use arrow::record_batch::RecordBatch;
 use sqlparser::ast::Ident;
 use sqlparser::dialect::GenericDialect;
@@ -31,6 +31,46 @@ use std::cmp::Ordering;
 use std::ops::Range;
 use std::sync::Arc;
 
+/// Applies an optional projection to a [`SchemaRef`], returning the
+/// projected schema
+///
+/// Example:
+/// ```
+/// use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+/// use datafusion_common::project_schema;
+///
+/// // Schema with columns 'a', 'b', and 'c'
+/// let schema = SchemaRef::new(Schema::new(vec![
+///   Field::new("a", DataType::Int32, true),
+///   Field::new("b", DataType::Int64, true),
+///   Field::new("c", DataType::Utf8, true),
+/// ]));
+///
+/// // Pick columns 'c' and 'b'
+/// let projection = Some(vec![2,1]);
+/// let projected_schema = project_schema(
+///    &schema,
+///    projection.as_ref()
+///  ).unwrap();
+///
+/// let expected_schema = SchemaRef::new(Schema::new(vec![
+///   Field::new("c", DataType::Utf8, true),
+///   Field::new("b", DataType::Int64, true),
+/// ]));
+///
+/// assert_eq!(projected_schema, expected_schema);
+/// ```
+pub fn project_schema(
+    schema: &SchemaRef,
+    projection: Option<&Vec<usize>>,
+) -> Result<SchemaRef> {
+    let schema = match projection {
+        Some(columns) => Arc::new(schema.project(columns)?),
+        None => Arc::clone(schema),
+    };
+    Ok(schema)
+}
+
 /// Given column vectors, returns row at `idx`.
 pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> 
Result<Vec<ScalarValue>> {
     columns
diff --git a/datafusion/core/src/datasource/empty.rs 
b/datafusion/core/src/datasource/empty.rs
index 37434002c1..77160aa5d1 100644
--- a/datafusion/core/src/datasource/empty.rs
+++ b/datafusion/core/src/datasource/empty.rs
@@ -22,12 +22,12 @@ use std::sync::Arc;
 
 use arrow::datatypes::*;
 use async_trait::async_trait;
+use datafusion_common::project_schema;
 
 use crate::datasource::{TableProvider, TableType};
 use crate::error::Result;
 use crate::execution::context::SessionState;
 use crate::logical_expr::Expr;
-use crate::physical_plan::project_schema;
 use crate::physical_plan::{empty::EmptyExec, ExecutionPlan};
 
 /// An empty plan that is useful for testing and generating plans
diff --git a/datafusion/core/src/datasource/listing/table.rs 
b/datafusion/core/src/datasource/listing/table.rs
index b47d25d1f9..60e5428867 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -25,7 +25,7 @@ use arrow::datatypes::{DataType, Field, SchemaBuilder, 
SchemaRef};
 use arrow_schema::Schema;
 use async_trait::async_trait;
 use dashmap::DashMap;
-use datafusion_common::{plan_err, SchemaExt, ToDFSchema};
+use datafusion_common::{plan_err, project_schema, SchemaExt, ToDFSchema};
 use datafusion_expr::expr::Sort;
 use datafusion_optimizer::utils::conjunction;
 use datafusion_physical_expr::{create_physical_expr, LexOrdering, 
PhysicalSortExpr};
@@ -50,7 +50,7 @@ use crate::{
     error::{DataFusionError, Result},
     execution::context::SessionState,
     logical_expr::Expr,
-    physical_plan::{empty::EmptyExec, project_schema, ExecutionPlan, 
Statistics},
+    physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics},
 };
 
 use super::PartitionedFile;
diff --git a/datafusion/core/src/physical_plan/memory.rs 
b/datafusion/core/src/physical_plan/memory.rs
index 5e7917b978..afc63e7d4c 100644
--- a/datafusion/core/src/physical_plan/memory.rs
+++ b/datafusion/core/src/physical_plan/memory.rs
@@ -19,13 +19,13 @@
 
 use super::expressions::PhysicalSortExpr;
 use super::{
-    common, project_schema, DisplayAs, DisplayFormatType, ExecutionPlan, 
Partitioning,
-    RecordBatchStream, SendableRecordBatchStream, Statistics,
+    common, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, 
RecordBatchStream,
+    SendableRecordBatchStream, Statistics,
 };
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use core::fmt;
-use datafusion_common::Result;
+use datafusion_common::{project_schema, Result};
 use std::any::Any;
 use std::sync::Arc;
 use std::task::{Context, Poll};
diff --git a/datafusion/core/src/physical_plan/mod.rs 
b/datafusion/core/src/physical_plan/mod.rs
index 66254ee6f5..cf33fbc8fb 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -660,46 +660,6 @@ use datafusion_physical_expr::{
 pub use datafusion_physical_expr::{AggregateExpr, PhysicalExpr};
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement};
 
-/// Applies an optional projection to a [`SchemaRef`], returning the
-/// projected schema
-///
-/// Example:
-/// ```
-/// use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-/// use datafusion::physical_plan::project_schema;
-///
-/// // Schema with columns 'a', 'b', and 'c'
-/// let schema = SchemaRef::new(Schema::new(vec![
-///   Field::new("a", DataType::Int32, true),
-///   Field::new("b", DataType::Int64, true),
-///   Field::new("c", DataType::Utf8, true),
-/// ]));
-///
-/// // Pick columns 'c' and 'b'
-/// let projection = Some(vec![2,1]);
-/// let projected_schema = project_schema(
-///    &schema,
-///    projection.as_ref()
-///  ).unwrap();
-///
-/// let expected_schema = SchemaRef::new(Schema::new(vec![
-///   Field::new("c", DataType::Utf8, true),
-///   Field::new("b", DataType::Int64, true),
-/// ]));
-///
-/// assert_eq!(projected_schema, expected_schema);
-/// ```
-pub fn project_schema(
-    schema: &SchemaRef,
-    projection: Option<&Vec<usize>>,
-) -> Result<SchemaRef> {
-    let schema = match projection {
-        Some(columns) => Arc::new(schema.project(columns)?),
-        None => Arc::clone(schema),
-    };
-    Ok(schema)
-}
-
 pub mod aggregates;
 pub mod analyze;
 pub mod coalesce_batches;
@@ -728,6 +688,7 @@ pub mod windows;
 
 use crate::physical_plan::repartition::RepartitionExec;
 use 
crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+pub use datafusion_common::utils::project_schema;
 use datafusion_execution::TaskContext;
 pub use datafusion_physical_expr::{expressions, functions, hash_utils, udf};
 
diff --git a/datafusion/core/tests/custom_sources.rs 
b/datafusion/core/tests/custom_sources.rs
index 0f742f7b9b..771da80aa6 100644
--- a/datafusion/core/tests/custom_sources.rs
+++ b/datafusion/core/tests/custom_sources.rs
@@ -26,8 +26,8 @@ use datafusion::logical_expr::{
 use datafusion::physical_plan::empty::EmptyExec;
 use datafusion::physical_plan::expressions::PhysicalSortExpr;
 use datafusion::physical_plan::{
-    project_schema, ColumnStatistics, DisplayAs, ExecutionPlan, Partitioning,
-    RecordBatchStream, SendableRecordBatchStream, Statistics,
+    ColumnStatistics, DisplayAs, ExecutionPlan, Partitioning, 
RecordBatchStream,
+    SendableRecordBatchStream, Statistics,
 };
 use datafusion::scalar::ScalarValue;
 use datafusion::{
@@ -37,6 +37,7 @@ use datafusion::{
 use datafusion::{error::Result, physical_plan::DisplayFormatType};
 
 use datafusion_common::cast::as_primitive_array;
+use datafusion_common::project_schema;
 use futures::stream::Stream;
 use std::any::Any;
 use std::pin::Pin;
diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs 
b/datafusion/core/tests/custom_sources_cases/statistics.rs
index 167cf0e7f3..43e6c8851e 100644
--- a/datafusion/core/tests/custom_sources_cases/statistics.rs
+++ b/datafusion/core/tests/custom_sources_cases/statistics.rs
@@ -25,9 +25,8 @@ use datafusion::{
     error::Result,
     logical_expr::Expr,
     physical_plan::{
-        expressions::PhysicalSortExpr, project_schema, ColumnStatistics, 
DisplayAs,
-        DisplayFormatType, ExecutionPlan, Partitioning, 
SendableRecordBatchStream,
-        Statistics,
+        expressions::PhysicalSortExpr, ColumnStatistics, DisplayAs, 
DisplayFormatType,
+        ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics,
     },
     prelude::SessionContext,
     scalar::ScalarValue,
@@ -35,6 +34,7 @@ use datafusion::{
 
 use async_trait::async_trait;
 use datafusion::execution::context::{SessionState, TaskContext};
+use datafusion_common::project_schema;
 
 /// This is a testing structure for statistics
 /// It will act both as a table provider and execution plan

[arrow-datafusion] branch main updated: Minor: Move `project_schema` to `datafusion_common` (#7237)

Reply via email to