This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 61329a04d9 Minor: Move `project_schema` to `datafusion_common` (#7237)
61329a04d9 is described below
commit 61329a04d98cc7cf2a8fea47e5e7c991906d9f48
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Aug 9 00:14:55 2023 -0500
Minor: Move `project_schema` to `datafusion_common` (#7237)
---
datafusion/common/src/lib.rs | 1 +
datafusion/common/src/utils.rs | 42 +++++++++++++++++++++-
datafusion/core/src/datasource/empty.rs | 2 +-
datafusion/core/src/datasource/listing/table.rs | 4 +--
datafusion/core/src/physical_plan/memory.rs | 6 ++--
datafusion/core/src/physical_plan/mod.rs | 41 +--------------------
datafusion/core/tests/custom_sources.rs | 5 +--
.../core/tests/custom_sources_cases/statistics.rs | 6 ++--
8 files changed, 55 insertions(+), 52 deletions(-)
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index e6116029d6..b831d3d0ca 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -52,6 +52,7 @@ pub use scalar::{ScalarType, ScalarValue};
pub use schema_reference::{OwnedSchemaReference, SchemaReference};
pub use stats::{ColumnStatistics, Statistics};
pub use table_reference::{OwnedTableReference, ResolvedTableReference,
TableReference};
+pub use utils::project_schema;
/// Downcast an Arrow Array to a concrete type, return an
`DataFusionError::Internal` if the cast is
/// not possible. In normal usage of DataFusion the downcast should always
succeed.
diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs
index 9ab91bcdd8..54481672f9 100644
--- a/datafusion/common/src/utils.rs
+++ b/datafusion/common/src/utils.rs
@@ -21,7 +21,7 @@ use crate::{DataFusionError, Result, ScalarValue};
use arrow::array::{ArrayRef, PrimitiveArray};
use arrow::compute;
use arrow::compute::{lexicographical_partition_ranges, SortColumn,
SortOptions};
-use arrow::datatypes::UInt32Type;
+use arrow::datatypes::{SchemaRef, UInt32Type};
use arrow::record_batch::RecordBatch;
use sqlparser::ast::Ident;
use sqlparser::dialect::GenericDialect;
@@ -31,6 +31,46 @@ use std::cmp::Ordering;
use std::ops::Range;
use std::sync::Arc;
+/// Applies an optional projection to a [`SchemaRef`], returning the
+/// projected schema
+///
+/// Example:
+/// ```
+/// use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+/// use datafusion_common::project_schema;
+///
+/// // Schema with columns 'a', 'b', and 'c'
+/// let schema = SchemaRef::new(Schema::new(vec![
+/// Field::new("a", DataType::Int32, true),
+/// Field::new("b", DataType::Int64, true),
+/// Field::new("c", DataType::Utf8, true),
+/// ]));
+///
+/// // Pick columns 'c' and 'b'
+/// let projection = Some(vec![2,1]);
+/// let projected_schema = project_schema(
+/// &schema,
+/// projection.as_ref()
+/// ).unwrap();
+///
+/// let expected_schema = SchemaRef::new(Schema::new(vec![
+/// Field::new("c", DataType::Utf8, true),
+/// Field::new("b", DataType::Int64, true),
+/// ]));
+///
+/// assert_eq!(projected_schema, expected_schema);
+/// ```
+pub fn project_schema(
+ schema: &SchemaRef,
+ projection: Option<&Vec<usize>>,
+) -> Result<SchemaRef> {
+ let schema = match projection {
+ Some(columns) => Arc::new(schema.project(columns)?),
+ None => Arc::clone(schema),
+ };
+ Ok(schema)
+}
+
/// Given column vectors, returns row at `idx`.
pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) ->
Result<Vec<ScalarValue>> {
columns
diff --git a/datafusion/core/src/datasource/empty.rs
b/datafusion/core/src/datasource/empty.rs
index 37434002c1..77160aa5d1 100644
--- a/datafusion/core/src/datasource/empty.rs
+++ b/datafusion/core/src/datasource/empty.rs
@@ -22,12 +22,12 @@ use std::sync::Arc;
use arrow::datatypes::*;
use async_trait::async_trait;
+use datafusion_common::project_schema;
use crate::datasource::{TableProvider, TableType};
use crate::error::Result;
use crate::execution::context::SessionState;
use crate::logical_expr::Expr;
-use crate::physical_plan::project_schema;
use crate::physical_plan::{empty::EmptyExec, ExecutionPlan};
/// An empty plan that is useful for testing and generating plans
diff --git a/datafusion/core/src/datasource/listing/table.rs
b/datafusion/core/src/datasource/listing/table.rs
index b47d25d1f9..60e5428867 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -25,7 +25,7 @@ use arrow::datatypes::{DataType, Field, SchemaBuilder,
SchemaRef};
use arrow_schema::Schema;
use async_trait::async_trait;
use dashmap::DashMap;
-use datafusion_common::{plan_err, SchemaExt, ToDFSchema};
+use datafusion_common::{plan_err, project_schema, SchemaExt, ToDFSchema};
use datafusion_expr::expr::Sort;
use datafusion_optimizer::utils::conjunction;
use datafusion_physical_expr::{create_physical_expr, LexOrdering,
PhysicalSortExpr};
@@ -50,7 +50,7 @@ use crate::{
error::{DataFusionError, Result},
execution::context::SessionState,
logical_expr::Expr,
- physical_plan::{empty::EmptyExec, project_schema, ExecutionPlan,
Statistics},
+ physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics},
};
use super::PartitionedFile;
diff --git a/datafusion/core/src/physical_plan/memory.rs
b/datafusion/core/src/physical_plan/memory.rs
index 5e7917b978..afc63e7d4c 100644
--- a/datafusion/core/src/physical_plan/memory.rs
+++ b/datafusion/core/src/physical_plan/memory.rs
@@ -19,13 +19,13 @@
use super::expressions::PhysicalSortExpr;
use super::{
- common, project_schema, DisplayAs, DisplayFormatType, ExecutionPlan,
Partitioning,
- RecordBatchStream, SendableRecordBatchStream, Statistics,
+ common, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
RecordBatchStream,
+ SendableRecordBatchStream, Statistics,
};
use arrow::datatypes::SchemaRef;
use arrow::record_batch::RecordBatch;
use core::fmt;
-use datafusion_common::Result;
+use datafusion_common::{project_schema, Result};
use std::any::Any;
use std::sync::Arc;
use std::task::{Context, Poll};
diff --git a/datafusion/core/src/physical_plan/mod.rs
b/datafusion/core/src/physical_plan/mod.rs
index 66254ee6f5..cf33fbc8fb 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -660,46 +660,6 @@ use datafusion_physical_expr::{
pub use datafusion_physical_expr::{AggregateExpr, PhysicalExpr};
use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement};
-/// Applies an optional projection to a [`SchemaRef`], returning the
-/// projected schema
-///
-/// Example:
-/// ```
-/// use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-/// use datafusion::physical_plan::project_schema;
-///
-/// // Schema with columns 'a', 'b', and 'c'
-/// let schema = SchemaRef::new(Schema::new(vec![
-/// Field::new("a", DataType::Int32, true),
-/// Field::new("b", DataType::Int64, true),
-/// Field::new("c", DataType::Utf8, true),
-/// ]));
-///
-/// // Pick columns 'c' and 'b'
-/// let projection = Some(vec![2,1]);
-/// let projected_schema = project_schema(
-/// &schema,
-/// projection.as_ref()
-/// ).unwrap();
-///
-/// let expected_schema = SchemaRef::new(Schema::new(vec![
-/// Field::new("c", DataType::Utf8, true),
-/// Field::new("b", DataType::Int64, true),
-/// ]));
-///
-/// assert_eq!(projected_schema, expected_schema);
-/// ```
-pub fn project_schema(
- schema: &SchemaRef,
- projection: Option<&Vec<usize>>,
-) -> Result<SchemaRef> {
- let schema = match projection {
- Some(columns) => Arc::new(schema.project(columns)?),
- None => Arc::clone(schema),
- };
- Ok(schema)
-}
-
pub mod aggregates;
pub mod analyze;
pub mod coalesce_batches;
@@ -728,6 +688,7 @@ pub mod windows;
use crate::physical_plan::repartition::RepartitionExec;
use
crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+pub use datafusion_common::utils::project_schema;
use datafusion_execution::TaskContext;
pub use datafusion_physical_expr::{expressions, functions, hash_utils, udf};
diff --git a/datafusion/core/tests/custom_sources.rs
b/datafusion/core/tests/custom_sources.rs
index 0f742f7b9b..771da80aa6 100644
--- a/datafusion/core/tests/custom_sources.rs
+++ b/datafusion/core/tests/custom_sources.rs
@@ -26,8 +26,8 @@ use datafusion::logical_expr::{
use datafusion::physical_plan::empty::EmptyExec;
use datafusion::physical_plan::expressions::PhysicalSortExpr;
use datafusion::physical_plan::{
- project_schema, ColumnStatistics, DisplayAs, ExecutionPlan, Partitioning,
- RecordBatchStream, SendableRecordBatchStream, Statistics,
+ ColumnStatistics, DisplayAs, ExecutionPlan, Partitioning,
RecordBatchStream,
+ SendableRecordBatchStream, Statistics,
};
use datafusion::scalar::ScalarValue;
use datafusion::{
@@ -37,6 +37,7 @@ use datafusion::{
use datafusion::{error::Result, physical_plan::DisplayFormatType};
use datafusion_common::cast::as_primitive_array;
+use datafusion_common::project_schema;
use futures::stream::Stream;
use std::any::Any;
use std::pin::Pin;
diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs
b/datafusion/core/tests/custom_sources_cases/statistics.rs
index 167cf0e7f3..43e6c8851e 100644
--- a/datafusion/core/tests/custom_sources_cases/statistics.rs
+++ b/datafusion/core/tests/custom_sources_cases/statistics.rs
@@ -25,9 +25,8 @@ use datafusion::{
error::Result,
logical_expr::Expr,
physical_plan::{
- expressions::PhysicalSortExpr, project_schema, ColumnStatistics,
DisplayAs,
- DisplayFormatType, ExecutionPlan, Partitioning,
SendableRecordBatchStream,
- Statistics,
+ expressions::PhysicalSortExpr, ColumnStatistics, DisplayAs,
DisplayFormatType,
+ ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics,
},
prelude::SessionContext,
scalar::ScalarValue,
@@ -35,6 +34,7 @@ use datafusion::{
use async_trait::async_trait;
use datafusion::execution::context::{SessionState, TaskContext};
+use datafusion_common::project_schema;
/// This is a testing structure for statistics
/// It will act both as a table provider and execution plan