[arrow] branch master updated: ARROW-11401: [Rust][DataFusion] Pass slices instead of Vec in DataFrame API

jorgecarleitao Sun, 31 Jan 2021 02:03:49 -0800

This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new 437c917  ARROW-11401: [Rust][DataFusion] Pass slices instead of Vec in 
DataFrame API
437c917 is described below

commit 437c9173c3e067712eb714c643ca839acc7ed7f6
Author: Heres, Daniel <[email protected]>
AuthorDate: Sun Jan 31 11:02:41 2021 +0100

    ARROW-11401: [Rust][DataFusion] Pass slices instead of Vec in DataFrame API
    
    This PR makes the API a bit more ergonomic to use (saving 3 characters per 
parameter), avoiding creating a `Vec` with a macro, or cloning an existing one.
    
    It is also more consistent, for example `DataFrame::join` already was 
taking parameters by `&[&str]` instead of `Vec<&str>`.
    
    This is a (big) backwards incompatible change.
    
    Closes #9338 from Dandandan/dataframe_api
    
    Authored-by: Heres, Daniel <[email protected]>
    Signed-off-by: Jorge C. Leitao <[email protected]>
---
 rust/benchmarks/src/bin/tpch.rs                    |  2 +-
 rust/datafusion/examples/dataframe.rs              |  2 +-
 rust/datafusion/examples/dataframe_in_memory.rs    |  2 +-
 rust/datafusion/examples/simple_udaf.rs            |  2 +-
 rust/datafusion/examples/simple_udf.rs             |  2 +-
 rust/datafusion/src/dataframe.rs                   | 24 +++++------
 rust/datafusion/src/execution/context.rs           | 14 +++----
 rust/datafusion/src/execution/dataframe_impl.rs    | 32 +++++++--------
 rust/datafusion/src/lib.rs                         |  2 +-
 rust/datafusion/src/logical_plan/builder.rs        | 31 +++++++-------
 rust/datafusion/src/logical_plan/plan.rs           |  4 +-
 rust/datafusion/src/optimizer/filter_push_down.rs  | 48 +++++++++++-----------
 .../src/optimizer/projection_push_down.rs          | 26 ++++++------
 rust/datafusion/src/physical_plan/planner.rs       | 12 +++---
 rust/datafusion/src/sql/planner.rs                 | 22 +++++-----
 rust/datafusion/src/sql/utils.rs                   |  4 +-
 rust/datafusion/tests/custom_sources.rs            |  2 +-
 rust/datafusion/tests/provider_filter_pushdown.rs  |  2 +-
 18 files changed, 116 insertions(+), 117 deletions(-)

diff --git a/rust/benchmarks/src/bin/tpch.rs b/rust/benchmarks/src/bin/tpch.rs
index c3352ef..593d75c 100644
--- a/rust/benchmarks/src/bin/tpch.rs
+++ b/rust/benchmarks/src/bin/tpch.rs
@@ -1575,7 +1575,7 @@ mod tests {
                 .file_extension(".out");
             let df = ctx.read_csv(&format!("{}/answers/q{}.out", path, n), 
options)?;
             let df = df.select(
-                get_answer_schema(n)
+                &get_answer_schema(n)
                     .fields()
                     .iter()
                     .map(|field| {
diff --git a/rust/datafusion/examples/dataframe.rs 
b/rust/datafusion/examples/dataframe.rs
index 03cd806..cba4d87 100644
--- a/rust/datafusion/examples/dataframe.rs
+++ b/rust/datafusion/examples/dataframe.rs
@@ -34,7 +34,7 @@ async fn main() -> Result<()> {
     // define the query using the DataFrame trait
     let df = ctx
         .read_parquet(filename)?
-        .select_columns(vec!["id", "bool_col", "timestamp_col"])?
+        .select_columns(&["id", "bool_col", "timestamp_col"])?
         .filter(col("id").gt(lit(1)))?;
 
     // execute the query
diff --git a/rust/datafusion/examples/dataframe_in_memory.rs 
b/rust/datafusion/examples/dataframe_in_memory.rs
index 6923b80..ff35266 100644
--- a/rust/datafusion/examples/dataframe_in_memory.rs
+++ b/rust/datafusion/examples/dataframe_in_memory.rs
@@ -56,7 +56,7 @@ async fn main() -> Result<()> {
     // construct an expression corresponding to "SELECT a, b FROM t WHERE b = 
10" in SQL
     let filter = col("b").eq(lit(10));
 
-    let df = df.select_columns(vec!["a", "b"])?.filter(filter)?;
+    let df = df.select_columns(&["a", "b"])?.filter(filter)?;
 
     // execute
     let results = df.collect().await?;
diff --git a/rust/datafusion/examples/simple_udaf.rs 
b/rust/datafusion/examples/simple_udaf.rs
index 524a125..f27036e 100644
--- a/rust/datafusion/examples/simple_udaf.rs
+++ b/rust/datafusion/examples/simple_udaf.rs
@@ -148,7 +148,7 @@ async fn main() -> Result<()> {
     let df = ctx.table("t")?;
 
     // perform the aggregation
-    let df = df.aggregate(vec![], vec![geometric_mean.call(vec![col("a")])])?;
+    let df = df.aggregate(&[], &[geometric_mean.call(vec![col("a")])])?;
 
     // note that "a" is f32, not f64. DataFusion coerces it to match the 
UDAF's signature.
 
diff --git a/rust/datafusion/examples/simple_udf.rs 
b/rust/datafusion/examples/simple_udf.rs
index 94df59d..0eef801 100644
--- a/rust/datafusion/examples/simple_udf.rs
+++ b/rust/datafusion/examples/simple_udf.rs
@@ -130,7 +130,7 @@ async fn main() -> Result<()> {
     let expr1 = pow.call(vec![col("a"), col("b")]);
 
     // equivalent to `'SELECT pow(a, b), pow(a, b) AS pow1 FROM t'`
-    let df = df.select(vec![
+    let df = df.select(&[
         expr,
         // alias so that they have different column names
         expr1.alias("pow1"),
diff --git a/rust/datafusion/src/dataframe.rs b/rust/datafusion/src/dataframe.rs
index 1e3c1f1..04d827c 100644
--- a/rust/datafusion/src/dataframe.rs
+++ b/rust/datafusion/src/dataframe.rs
@@ -44,7 +44,7 @@ use async_trait::async_trait;
 /// let mut ctx = ExecutionContext::new();
 /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
 /// let df = df.filter(col("a").lt_eq(col("b")))?
-///            .aggregate(vec![col("a")], vec![min(col("b"))])?
+///            .aggregate(&[col("a")], &[min(col("b"))])?
 ///            .limit(100)?;
 /// let results = df.collect();
 /// # Ok(())
@@ -61,11 +61,11 @@ pub trait DataFrame {
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.select_columns(vec!["a", "b"])?;
+    /// let df = df.select_columns(&["a", "b"])?;
     /// # Ok(())
     /// # }
     /// ```
-    fn select_columns(&self, columns: Vec<&str>) -> Result<Arc<dyn DataFrame>>;
+    fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>>;
 
     /// Create a projection based on arbitrary expressions.
     ///
@@ -75,11 +75,11 @@ pub trait DataFrame {
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.select(vec![col("a") * col("b"), col("c")])?;
+    /// let df = df.select(&[col("a") * col("b"), col("c")])?;
     /// # Ok(())
     /// # }
     /// ```
-    fn select(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
+    fn select(&self, expr: &[Expr]) -> Result<Arc<dyn DataFrame>>;
 
     /// Filter a DataFrame to only include rows that match the specified 
filter expression.
     ///
@@ -105,17 +105,17 @@ pub trait DataFrame {
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
     ///
     /// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a"
-    /// let _ = df.aggregate(vec![col("a")], vec![min(col("b"))])?;
+    /// let _ = df.aggregate(&[col("a")], &[min(col("b"))])?;
     ///
     /// // The following use is the equivalent of "SELECT MIN(b)"
-    /// let _ = df.aggregate(vec![], vec![min(col("b"))])?;
+    /// let _ = df.aggregate(&[], &[min(col("b"))])?;
     /// # Ok(())
     /// # }
     /// ```
     fn aggregate(
         &self,
-        group_expr: Vec<Expr>,
-        aggr_expr: Vec<Expr>,
+        group_expr: &[Expr],
+        aggr_expr: &[Expr],
     ) -> Result<Arc<dyn DataFrame>>;
 
     /// Limit the number of rows returned from this DataFrame.
@@ -141,11 +141,11 @@ pub trait DataFrame {
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false, 
false)])?;
+    /// let df = df.sort(&[col("a").sort(true, true), col("b").sort(false, 
false)])?;
     /// # Ok(())
     /// # }
     /// ```
-    fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
+    fn sort(&self, expr: &[Expr]) -> Result<Arc<dyn DataFrame>>;
 
     /// Join this DataFrame with another DataFrame using the specified columns 
as join keys
     ///
@@ -157,7 +157,7 @@ pub trait DataFrame {
     /// let mut ctx = ExecutionContext::new();
     /// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
     /// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?
-    ///   .select(vec![
+    ///   .select(&[
     ///     col("a").alias("a2"),
     ///     col("b").alias("b2"),
     ///     col("c").alias("c2")])?;
diff --git a/rust/datafusion/src/execution/context.rs 
b/rust/datafusion/src/execution/context.rs
index 1accff2..e8185a8 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -75,7 +75,7 @@ use parquet::file::properties::WriterProperties;
 /// let mut ctx = ExecutionContext::new();
 /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
 /// let df = df.filter(col("a").lt_eq(col("b")))?
-///            .aggregate(vec![col("a")], vec![min(col("b"))])?
+///            .aggregate(&[col("a")], &[min(col("b"))])?
 ///            .limit(100)?;
 /// let results = df.collect();
 /// # Ok(())
@@ -787,7 +787,7 @@ mod tests {
 
         let table = ctx.table("test")?;
         let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
-            .project(vec![col("c2")])?
+            .project(&[col("c2")])?
             .build()?;
 
         let optimized_plan = ctx.optimize(&logical_plan)?;
@@ -839,7 +839,7 @@ mod tests {
         assert_eq!(schema.field_with_name("c1")?.is_nullable(), false);
 
         let plan = LogicalPlanBuilder::scan_empty("", schema.as_ref(), None)?
-            .project(vec![col("c1")])?
+            .project(&[col("c1")])?
             .build()?;
 
         let plan = ctx.optimize(&plan)?;
@@ -870,7 +870,7 @@ mod tests {
         )?]];
 
         let plan = LogicalPlanBuilder::scan_memory(partitions, schema, None)?
-            .project(vec![col("b")])?
+            .project(&[col("b")])?
             .build()?;
         assert_fields_eq(&plan, vec!["b"]);
 
@@ -1401,8 +1401,8 @@ mod tests {
         ]));
 
         let plan = LogicalPlanBuilder::scan_empty("", schema.as_ref(), None)?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .project(vec![col("c1"), col("SUM(c2)").alias("total_salary")])?
+            .aggregate(&[col("c1")], &[sum(col("c2"))])?
+            .project(&[col("c1"), col("SUM(c2)").alias("total_salary")])?
             .build()?;
 
         let plan = ctx.optimize(&plan)?;
@@ -1609,7 +1609,7 @@ mod tests {
         let t = ctx.table("t")?;
 
         let plan = LogicalPlanBuilder::from(&t.to_logical_plan())
-            .project(vec![
+            .project(&[
                 col("a"),
                 col("b"),
                 ctx.udf("my_add")?.call(vec![col("a"), col("b")]),
diff --git a/rust/datafusion/src/execution/dataframe_impl.rs 
b/rust/datafusion/src/execution/dataframe_impl.rs
index db8b86b..3a40d20 100644
--- a/rust/datafusion/src/execution/dataframe_impl.rs
+++ b/rust/datafusion/src/execution/dataframe_impl.rs
@@ -49,17 +49,17 @@ impl DataFrameImpl {
 #[async_trait]
 impl DataFrame for DataFrameImpl {
     /// Apply a projection based on a list of column names
-    fn select_columns(&self, columns: Vec<&str>) -> Result<Arc<dyn DataFrame>> 
{
+    fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>> {
         let fields = columns
             .iter()
             .map(|name| self.plan.schema().field_with_unqualified_name(name))
             .collect::<Result<Vec<_>>>()?;
-        let expr = fields.iter().map(|f| col(f.name())).collect();
-        self.select(expr)
+        let expr: Vec<Expr> = fields.iter().map(|f| col(f.name())).collect();
+        self.select(&expr)
     }
 
     /// Create a projection based on arbitrary expressions
-    fn select(&self, expr_list: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
+    fn select(&self, expr_list: &[Expr]) -> Result<Arc<dyn DataFrame>> {
         let plan = LogicalPlanBuilder::from(&self.plan)
             .project(expr_list)?
             .build()?;
@@ -77,8 +77,8 @@ impl DataFrame for DataFrameImpl {
     /// Perform an aggregate query
     fn aggregate(
         &self,
-        group_expr: Vec<Expr>,
-        aggr_expr: Vec<Expr>,
+        group_expr: &[Expr],
+        aggr_expr: &[Expr],
     ) -> Result<Arc<dyn DataFrame>> {
         let plan = LogicalPlanBuilder::from(&self.plan)
             .aggregate(group_expr, aggr_expr)?
@@ -93,7 +93,7 @@ impl DataFrame for DataFrameImpl {
     }
 
     /// Sort by specified sorting expressions
-    fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
+    fn sort(&self, expr: &[Expr]) -> Result<Arc<dyn DataFrame>> {
         let plan = LogicalPlanBuilder::from(&self.plan).sort(expr)?.build()?;
         Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
     }
@@ -168,7 +168,7 @@ mod tests {
     fn select_columns() -> Result<()> {
         // build plan using Table API
         let t = test_table()?;
-        let t2 = t.select_columns(vec!["c1", "c2", "c11"])?;
+        let t2 = t.select_columns(&["c1", "c2", "c11"])?;
         let plan = t2.to_logical_plan();
 
         // build query using SQL
@@ -184,7 +184,7 @@ mod tests {
     fn select_expr() -> Result<()> {
         // build plan using Table API
         let t = test_table()?;
-        let t2 = t.select(vec![col("c1"), col("c2"), col("c11")])?;
+        let t2 = t.select(&[col("c1"), col("c2"), col("c11")])?;
         let plan = t2.to_logical_plan();
 
         // build query using SQL
@@ -200,8 +200,8 @@ mod tests {
     fn aggregate() -> Result<()> {
         // build plan using DataFrame API
         let df = test_table()?;
-        let group_expr = vec![col("c1")];
-        let aggr_expr = vec![
+        let group_expr = &[col("c1")];
+        let aggr_expr = &[
             min(col("c12")),
             max(col("c12")),
             avg(col("c12")),
@@ -228,8 +228,8 @@ mod tests {
 
     #[tokio::test]
     async fn join() -> Result<()> {
-        let left = test_table()?.select_columns(vec!["c1", "c2"])?;
-        let right = test_table()?.select_columns(vec!["c1", "c3"])?;
+        let left = test_table()?.select_columns(&["c1", "c2"])?;
+        let right = test_table()?.select_columns(&["c1", "c3"])?;
         let left_rows = left.collect().await?;
         let right_rows = right.collect().await?;
         let join = left.join(right, JoinType::Inner, &["c1"], &["c1"])?;
@@ -247,7 +247,7 @@ mod tests {
     fn limit() -> Result<()> {
         // build query using Table API
         let t = test_table()?;
-        let t2 = t.select_columns(vec!["c1", "c2", "c11"])?.limit(10)?;
+        let t2 = t.select_columns(&["c1", "c2", "c11"])?.limit(10)?;
         let plan = t2.to_logical_plan();
 
         // build query using SQL
@@ -265,7 +265,7 @@ mod tests {
         // build query using Table API
         let df = test_table()?;
         let df = df
-            .select_columns(vec!["c1", "c2", "c11"])?
+            .select_columns(&["c1", "c2", "c11"])?
             .limit(10)?
             .explain(false)?;
         let plan = df.to_logical_plan();
@@ -302,7 +302,7 @@ mod tests {
 
         let f = df.registry();
 
-        let df = df.select(vec![f.udf("my_fn")?.call(vec![col("c12")])])?;
+        let df = df.select(&[f.udf("my_fn")?.call(vec![col("c12")])])?;
         let plan = df.to_logical_plan();
 
         // build query using SQL
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
index 3dec1d8..b2d3c8b 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/lib.rs
@@ -47,7 +47,7 @@
 //!
 //! // create a plan
 //! let df = df.filter(col("a").lt_eq(col("b")))?
-//!            .aggregate(vec![col("a")], vec![min(col("b"))])?
+//!            .aggregate(&[col("a")], &[min(col("b"))])?
 //!            .limit(100)?;
 //!
 //! // execute the plan
diff --git a/rust/datafusion/src/logical_plan/builder.rs 
b/rust/datafusion/src/logical_plan/builder.rs
index 8bb356a..9f5d781 100644
--- a/rust/datafusion/src/logical_plan/builder.rs
+++ b/rust/datafusion/src/logical_plan/builder.rs
@@ -131,7 +131,7 @@ impl LogicalPlanBuilder {
     /// This function errors under any of the following conditions:
     /// * Two or more expressions have the same name
     /// * An invalid expression is used (e.g. a `sort` expression)
-    pub fn project(&self, expr: Vec<Expr>) -> Result<Self> {
+    pub fn project(&self, expr: &[Expr]) -> Result<Self> {
         let input_schema = self.plan.schema();
         let mut projected_expr = vec![];
         (0..expr.len()).for_each(|i| match &expr[i] {
@@ -170,9 +170,9 @@ impl LogicalPlanBuilder {
     }
 
     /// Apply a sort
-    pub fn sort(&self, expr: Vec<Expr>) -> Result<Self> {
+    pub fn sort(&self, expr: &[Expr]) -> Result<Self> {
         Ok(Self::from(&LogicalPlan::Sort {
-            expr,
+            expr: expr.to_vec(),
             input: Arc::new(self.plan.clone()),
         }))
     }
@@ -220,9 +220,9 @@ impl LogicalPlanBuilder {
     }
 
     /// Apply an aggregate
-    pub fn aggregate(&self, group_expr: Vec<Expr>, aggr_expr: Vec<Expr>) -> 
Result<Self> {
-        let mut all_expr: Vec<Expr> = group_expr.clone();
-        aggr_expr.iter().for_each(|x| all_expr.push(x.clone()));
+    pub fn aggregate(&self, group_expr: &[Expr], aggr_expr: &[Expr]) -> 
Result<Self> {
+        let mut all_expr = group_expr.to_vec();
+        all_expr.extend_from_slice(aggr_expr);
 
         validate_unique_names("Aggregations", &all_expr, self.plan.schema())?;
 
@@ -231,8 +231,8 @@ impl LogicalPlanBuilder {
 
         Ok(Self::from(&LogicalPlan::Aggregate {
             input: Arc::new(self.plan.clone()),
-            group_expr,
-            aggr_expr,
+            group_expr: group_expr.to_vec(),
+            aggr_expr: aggr_expr.to_vec(),
             schema: DFSchemaRef::new(aggr_schema),
         }))
     }
@@ -351,7 +351,7 @@ mod tests {
             Some(vec![0, 3]),
         )?
         .filter(col("state").eq(lit("CO")))?
-        .project(vec![col("id")])?
+        .project(&[col("id")])?
         .build()?;
 
         let expected = "Projection: #id\
@@ -370,11 +370,8 @@ mod tests {
             &employee_schema(),
             Some(vec![3, 4]),
         )?
-        .aggregate(
-            vec![col("state")],
-            vec![sum(col("salary")).alias("total_salary")],
-        )?
-        .project(vec![col("state"), col("total_salary")])?
+        .aggregate(&[col("state")], 
&[sum(col("salary")).alias("total_salary")])?
+        .project(&[col("state"), col("total_salary")])?
         .build()?;
 
         let expected = "Projection: #state, #total_salary\
@@ -393,7 +390,7 @@ mod tests {
             &employee_schema(),
             Some(vec![3, 4]),
         )?
-        .sort(vec![
+        .sort(&[
             Expr::Sort {
                 expr: Box::new(col("state")),
                 asc: true,
@@ -423,7 +420,7 @@ mod tests {
             Some(vec![0, 3]),
         )?
         // two columns with the same name => error
-        .project(vec![col("id"), col("first_name").alias("id")]);
+        .project(&[col("id"), col("first_name").alias("id")]);
 
         match plan {
             Err(DataFusionError::Plan(e)) => {
@@ -446,7 +443,7 @@ mod tests {
             Some(vec![0, 3]),
         )?
         // two columns with the same name => error
-        .aggregate(vec![col("state")], 
vec![sum(col("salary")).alias("state")]);
+        .aggregate(&[col("state")], &[sum(col("salary")).alias("state")]);
 
         match plan {
             Err(DataFusionError::Plan(e)) => {
diff --git a/rust/datafusion/src/logical_plan/plan.rs 
b/rust/datafusion/src/logical_plan/plan.rs
index 8002d16..2afdefd 100644
--- a/rust/datafusion/src/logical_plan/plan.rs
+++ b/rust/datafusion/src/logical_plan/plan.rs
@@ -661,7 +661,7 @@ mod tests {
         .unwrap()
         .filter(col("state").eq(lit("CO")))
         .unwrap()
-        .project(vec![col("id")])
+        .project(&[col("id")])
         .unwrap()
         .build()
         .unwrap()
@@ -962,7 +962,7 @@ mod tests {
             .unwrap()
             .filter(col("state").eq(lit("CO")))
             .unwrap()
-            .project(vec![col("id")])
+            .project(&[col("id")])
             .unwrap()
             .build()
             .unwrap()
diff --git a/rust/datafusion/src/optimizer/filter_push_down.rs 
b/rust/datafusion/src/optimizer/filter_push_down.rs
index b72ede1..2903f95 100644
--- a/rust/datafusion/src/optimizer/filter_push_down.rs
+++ b/rust/datafusion/src/optimizer/filter_push_down.rs
@@ -448,7 +448,7 @@ mod tests {
     fn filter_before_projection() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
+            .project(&[col("a"), col("b")])?
             .filter(col("a").eq(lit(1i64)))?
             .build()?;
         // filter is before projection
@@ -464,7 +464,7 @@ mod tests {
     fn filter_after_limit() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
+            .project(&[col("a"), col("b")])?
             .limit(10)?
             .filter(col("a").eq(lit(1i64)))?
             .build()?;
@@ -482,8 +482,8 @@ mod tests {
     fn filter_jump_2_plans() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .project(vec![col("c"), col("b")])?
+            .project(&[col("a"), col("b"), col("c")])?
+            .project(&[col("c"), col("b")])?
             .filter(col("a").eq(lit(1i64)))?
             .build()?;
         // filter is before double projection
@@ -500,7 +500,7 @@ mod tests {
     fn filter_move_agg() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], 
vec![sum(col("b")).alias("total_salary")])?
+            .aggregate(&[col("a")], &[sum(col("b")).alias("total_salary")])?
             .filter(col("a").gt(lit(10i64)))?
             .build()?;
         // filter of key aggregation is commutative
@@ -516,7 +516,7 @@ mod tests {
     fn filter_keep_agg() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], vec![sum(col("b")).alias("b")])?
+            .aggregate(&[col("a")], &[sum(col("b")).alias("b")])?
             .filter(col("b").gt(lit(10i64)))?
             .build()?;
         // filter of aggregate is after aggregation since they are 
non-commutative
@@ -533,7 +533,7 @@ mod tests {
     fn alias() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
+            .project(&[col("a").alias("b"), col("c")])?
             .filter(col("b").eq(lit(1i64)))?
             .build()?;
         // filter is before projection
@@ -566,7 +566,7 @@ mod tests {
     fn complex_expression() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![
+            .project(&[
                 add(multiply(col("a"), lit(2)), col("c")).alias("b"),
                 col("c"),
             ])?
@@ -596,12 +596,12 @@ mod tests {
     fn complex_plan() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![
+            .project(&[
                 add(multiply(col("a"), lit(2)), col("c")).alias("b"),
                 col("c"),
             ])?
             // second projection where we rename columns, just to make it 
difficult
-            .project(vec![multiply(col("b"), lit(3)).alias("a"), col("c")])?
+            .project(&[multiply(col("b"), lit(3)).alias("a"), col("c")])?
             .filter(col("a").eq(lit(1i64)))?
             .build()?;
 
@@ -632,8 +632,8 @@ mod tests {
         // the aggregation allows one filter to pass (b), and the other one to 
not pass (SUM(c))
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .aggregate(vec![col("b")], vec![sum(col("c"))])?
+            .project(&[col("a").alias("b"), col("c")])?
+            .aggregate(&[col("b")], &[sum(col("c"))])?
             .filter(col("b").gt(lit(10i64)))?
             .filter(col("SUM(c)").gt(lit(10i64)))?
             .build()?;
@@ -668,8 +668,8 @@ mod tests {
         // the aggregation allows one filter to pass (b), and the other one to 
not pass (SUM(c))
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .aggregate(vec![col("b")], vec![sum(col("c"))])?
+            .project(&[col("a").alias("b"), col("c")])?
+            .aggregate(&[col("b")], &[sum(col("c"))])?
             .filter(and(
                 col("SUM(c)").gt(lit(10i64)),
                 and(col("b").gt(lit(10i64)), col("SUM(c)").lt(lit(20i64))),
@@ -703,10 +703,10 @@ mod tests {
     fn double_limit() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
+            .project(&[col("a"), col("b")])?
             .limit(20)?
             .limit(10)?
-            .project(vec![col("a"), col("b")])?
+            .project(&[col("a"), col("b")])?
             .filter(col("a").eq(lit(1i64)))?
             .build()?;
         // filter does not just any of the limits
@@ -726,10 +726,10 @@ mod tests {
     fn filter_2_breaks_limits() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
+            .project(&[col("a")])?
             .filter(col("a").lt_eq(lit(1i64)))?
             .limit(1)?
-            .project(vec![col("a")])?
+            .project(&[col("a")])?
             .filter(col("a").gt_eq(lit(1i64)))?
             .build()?;
         // Should be able to move both filters below the projections
@@ -765,7 +765,7 @@ mod tests {
             .limit(1)?
             .filter(col("a").lt_eq(lit(1i64)))?
             .filter(col("a").gt_eq(lit(1i64)))?
-            .project(vec![col("a")])?
+            .project(&[col("a")])?
             .build()?;
 
         // not part of the test
@@ -817,7 +817,7 @@ mod tests {
         let table_scan = test_table_scan()?;
         let left = LogicalPlanBuilder::from(&table_scan).build()?;
         let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
+            .project(&[col("a")])?
             .build()?;
         let plan = LogicalPlanBuilder::from(&left)
             .join(&right, JoinType::Inner, &["a"], &["a"])?
@@ -852,10 +852,10 @@ mod tests {
     fn filter_join_on_common_dependent() -> Result<()> {
         let table_scan = test_table_scan()?;
         let left = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c")])?
+            .project(&[col("a"), col("c")])?
             .build()?;
         let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
+            .project(&[col("a"), col("b")])?
             .build()?;
         let plan = LogicalPlanBuilder::from(&left)
             .join(&right, JoinType::Inner, &["a"], &["a"])?
@@ -886,10 +886,10 @@ mod tests {
     fn filter_join_on_one_side() -> Result<()> {
         let table_scan = test_table_scan()?;
         let left = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
+            .project(&[col("a"), col("b")])?
             .build()?;
         let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c")])?
+            .project(&[col("a"), col("c")])?
             .build()?;
         let plan = LogicalPlanBuilder::from(&left)
             .join(&right, JoinType::Inner, &["a"], &["a"])?
diff --git a/rust/datafusion/src/optimizer/projection_push_down.rs 
b/rust/datafusion/src/optimizer/projection_push_down.rs
index 948fe71..16076e9 100644
--- a/rust/datafusion/src/optimizer/projection_push_down.rs
+++ b/rust/datafusion/src/optimizer/projection_push_down.rs
@@ -312,7 +312,7 @@ mod tests {
         let table_scan = test_table_scan()?;
 
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![], vec![max(col("b"))])?
+            .aggregate(&[], &[max(col("b"))])?
             .build()?;
 
         let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
@@ -328,7 +328,7 @@ mod tests {
         let table_scan = test_table_scan()?;
 
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("c")], vec![max(col("b"))])?
+            .aggregate(&[col("c")], &[max(col("b"))])?
             .build()?;
 
         let expected = "Aggregate: groupBy=[[#c]], aggr=[[MAX(#b)]]\
@@ -345,7 +345,7 @@ mod tests {
 
         let plan = LogicalPlanBuilder::from(&table_scan)
             .filter(col("c"))?
-            .aggregate(vec![], vec![max(col("b"))])?
+            .aggregate(&[], &[max(col("b"))])?
             .build()?;
 
         let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
@@ -362,7 +362,7 @@ mod tests {
         let table_scan = test_table_scan()?;
 
         let projection = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![Expr::Cast {
+            .project(&[Expr::Cast {
                 expr: Box::new(col("c")),
                 data_type: DataType::Float64,
             }])?
@@ -383,7 +383,7 @@ mod tests {
         assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
 
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
+            .project(&[col("a"), col("b")])?
             .build()?;
 
         assert_fields_eq(&plan, vec!["a", "b"]);
@@ -403,7 +403,7 @@ mod tests {
         assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
 
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("c"), col("a")])?
+            .project(&[col("c"), col("a")])?
             .limit(5)?
             .build()?;
 
@@ -432,7 +432,7 @@ mod tests {
     fn table_scan_with_literal_projection() -> Result<()> {
         let table_scan = test_table_scan()?;
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![lit(1_i64), lit(2_i64)])?
+            .project(&[lit(1_i64), lit(2_i64)])?
             .build()?;
         let expected = "Projection: Int64(1), Int64(2)\
                       \n  TableScan: test projection=Some([0])";
@@ -449,9 +449,9 @@ mod tests {
 
         // we never use "b" in the first projection => remove it
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("c"), col("a"), col("b")])?
+            .project(&[col("c"), col("a"), col("b")])?
             .filter(col("c").gt(lit(1)))?
-            .aggregate(vec![col("c")], vec![max(col("a"))])?
+            .aggregate(&[col("c")], &[max(col("a"))])?
             .build()?;
 
         assert_fields_eq(&plan, vec!["c", "MAX(a)"]);
@@ -476,8 +476,8 @@ mod tests {
 
         // there is no need for the first projection
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("b")])?
-            .project(vec![lit(1).alias("a")])?
+            .project(&[col("b")])?
+            .project(&[lit(1).alias("a")])?
             .build()?;
 
         assert_fields_eq(&plan, vec!["a"]);
@@ -500,9 +500,9 @@ mod tests {
 
         // we never use "min(b)" => remove it
         let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a"), col("c")], vec![max(col("b")), 
min(col("b"))])?
+            .aggregate(&[col("a"), col("c")], &[max(col("b")), min(col("b"))])?
             .filter(col("c").gt(lit(1)))?
-            .project(vec![col("c"), col("a"), col("MAX(b)")])?
+            .project(&[col("c"), col("a"), col("MAX(b)")])?
             .build()?;
 
         assert_fields_eq(&plan, vec!["c", "a", "MAX(b)"]);
diff --git a/rust/datafusion/src/physical_plan/planner.rs 
b/rust/datafusion/src/physical_plan/planner.rs
index 6ba0d26..6c4aab9 100644
--- a/rust/datafusion/src/physical_plan/planner.rs
+++ b/rust/datafusion/src/physical_plan/planner.rs
@@ -788,9 +788,9 @@ mod tests {
         let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
             // filter clause needs the type coercion rule applied
             .filter(col("c7").lt(lit(5_u8)))?
-            .project(vec![col("c1"), col("c2")])?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .sort(vec![col("c1").sort(true, true)])?
+            .project(&[col("c1"), col("c2")])?
+            .aggregate(&[col("c1")], &[sum(col("c2"))])?
+            .sort(&[col("c1").sort(true, true)])?
             .limit(10)?
             .build()?;
 
@@ -861,7 +861,7 @@ mod tests {
         ];
         for case in cases {
             let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, 
None)?
-                .project(vec![case.clone()]);
+                .project(&[case.clone()]);
             let message = format!(
                 "Expression {:?} expected to error due to impossible coercion",
                 case
@@ -953,7 +953,7 @@ mod tests {
         let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
             // filter clause needs the type coercion rule applied
             .filter(col("c12").lt(lit(0.05)))?
-            .project(vec![col("c1").in_list(list, false)])?
+            .project(&[col("c1").in_list(list, false)])?
             .build()?;
         let execution_plan = plan(&logical_plan)?;
         // verify that the plan correctly adds cast from Int64(1) to Utf8
@@ -968,7 +968,7 @@ mod tests {
         let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
             // filter clause needs the type coercion rule applied
             .filter(col("c12").lt(lit(0.05)))?
-            .project(vec![col("c12").lt_eq(lit(0.025)).in_list(list, false)])?
+            .project(&[col("c12").lt_eq(lit(0.025)).in_list(list, false)])?
             .build()?;
         let execution_plan = plan(&logical_plan);
 
diff --git a/rust/datafusion/src/sql/planner.rs 
b/rust/datafusion/src/sql/planner.rs
index d97b52d..ab7ff13 100644
--- a/rust/datafusion/src/sql/planner.rs
+++ b/rust/datafusion/src/sql/planner.rs
@@ -431,7 +431,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 (plan, select_exprs)
             };
 
-        self.project(&plan, select_exprs_post_aggr, false)
+        self.project(&plan, &select_exprs_post_aggr, false)
     }
 
     /// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions.
@@ -462,7 +462,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     fn project(
         &self,
         input: &LogicalPlan,
-        expr: Vec<Expr>,
+        expr: &[Expr],
         force: bool,
     ) -> Result<LogicalPlan> {
         self.validate_schema_satisfies_exprs(&input.schema(), &expr)?;
@@ -484,9 +484,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     fn aggregate(
         &self,
         input: &LogicalPlan,
-        select_exprs: &Vec<Expr>,
-        group_by: &Vec<SQLExpr>,
-        aggr_exprs: &Vec<Expr>,
+        select_exprs: &[Expr],
+        group_by: &[SQLExpr],
+        aggr_exprs: &[Expr],
     ) -> Result<(LogicalPlan, Vec<Expr>)> {
         let group_by_exprs = group_by
             .iter()
@@ -500,7 +500,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             .collect::<Vec<Expr>>();
 
         let plan = LogicalPlanBuilder::from(&input)
-            .aggregate(group_by_exprs, aggr_exprs.clone())?
+            .aggregate(&group_by_exprs, aggr_exprs)?
             .build()?;
 
         // After aggregation, these are all of the columns that will be
@@ -547,7 +547,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     fn order_by(
         &self,
         plan: &LogicalPlan,
-        order_by: &Vec<OrderByExpr>,
+        order_by: &[OrderByExpr],
     ) -> Result<LogicalPlan> {
         if order_by.is_empty() {
             return Ok(plan.clone());
@@ -567,14 +567,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             })
             .collect();
 
-        LogicalPlanBuilder::from(&plan).sort(order_by_rex?)?.build()
+        LogicalPlanBuilder::from(&plan)
+            .sort(&order_by_rex?)?
+            .build()
     }
 
     /// Validate the schema provides all of the columns referenced in the 
expressions.
     fn validate_schema_satisfies_exprs(
         &self,
         schema: &DFSchema,
-        exprs: &Vec<Expr>,
+        exprs: &[Expr],
     ) -> Result<()> {
         find_column_exprs(exprs)
             .iter()
@@ -611,7 +613,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     /// Generate a relational expression from a SQL expression
     pub fn sql_to_rex(&self, sql: &SQLExpr, schema: &DFSchema) -> Result<Expr> 
{
         let expr = self.sql_expr_to_logical_expr(sql)?;
-        self.validate_schema_satisfies_exprs(schema, &vec![expr.clone()])?;
+        self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
         Ok(expr)
     }
 
diff --git a/rust/datafusion/src/sql/utils.rs b/rust/datafusion/src/sql/utils.rs
index 34bd55d..155a1f3 100644
--- a/rust/datafusion/src/sql/utils.rs
+++ b/rust/datafusion/src/sql/utils.rs
@@ -47,14 +47,14 @@ pub(crate) fn find_aggregate_exprs(exprs: &Vec<Expr>) -> 
Vec<Expr> {
 
 /// Collect all deeply nested `Expr::Column`'s. They are returned in order of
 /// appearance (depth first), with duplicates omitted.
-pub(crate) fn find_column_exprs(exprs: &Vec<Expr>) -> Vec<Expr> {
+pub(crate) fn find_column_exprs(exprs: &[Expr]) -> Vec<Expr> {
     find_exprs_in_exprs(exprs, &|nested_expr| matches!(nested_expr, 
Expr::Column(_)))
 }
 
 /// Search the provided `Expr`'s, and all of their nested `Expr`, for any that
 /// pass the provided test. The returned `Expr`'s are deduplicated and returned
 /// in order of appearance (depth first).
-fn find_exprs_in_exprs<F>(exprs: &Vec<Expr>, test_fn: &F) -> Vec<Expr>
+fn find_exprs_in_exprs<F>(exprs: &[Expr], test_fn: &F) -> Vec<Expr>
 where
     F: Fn(&Expr) -> bool,
 {
diff --git a/rust/datafusion/tests/custom_sources.rs 
b/rust/datafusion/tests/custom_sources.rs
index 979bde4..9778fea 100644
--- a/rust/datafusion/tests/custom_sources.rs
+++ b/rust/datafusion/tests/custom_sources.rs
@@ -161,7 +161,7 @@ async fn custom_source_dataframe() -> Result<()> {
 
     let table = ctx.read_table(Arc::new(CustomTableProvider))?;
     let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
-        .project(vec![col("c2")])?
+        .project(&[col("c2")])?
         .build()?;
 
     let optimized_plan = ctx.optimize(&logical_plan)?;
diff --git a/rust/datafusion/tests/provider_filter_pushdown.rs 
b/rust/datafusion/tests/provider_filter_pushdown.rs
index b4f0bfb..d4f8a6b 100644
--- a/rust/datafusion/tests/provider_filter_pushdown.rs
+++ b/rust/datafusion/tests/provider_filter_pushdown.rs
@@ -149,7 +149,7 @@ async fn assert_provider_row_count(value: i64, 
expected_count: u64) -> Result<()
     let df = ctx
         .read_table(Arc::new(provider.clone()))?
         .filter(col("flag").eq(lit(value)))?
-        .aggregate(vec![], vec![count(col("flag"))])?;
+        .aggregate(&[], &[count(col("flag"))])?;
 
     let results = df.collect().await?;
     let result_col: &UInt64Array = as_primitive_array(results[0].column(0));

[arrow] branch master updated: ARROW-11401: [Rust][DataFusion] Pass slices instead of Vec in DataFrame API

Reply via email to