This is an automated email from the ASF dual-hosted git repository.
jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 437c917 ARROW-11401: [Rust][DataFusion] Pass slices instead of Vec in
DataFrame API
437c917 is described below
commit 437c9173c3e067712eb714c643ca839acc7ed7f6
Author: Heres, Daniel <[email protected]>
AuthorDate: Sun Jan 31 11:02:41 2021 +0100
ARROW-11401: [Rust][DataFusion] Pass slices instead of Vec in DataFrame API
This PR makes the API a bit more ergonomic to use (saving 3 characters per
parameter), avoiding creating a `Vec` with a macro, or cloning an existing one.
It is also more consistent, for example `DataFrame::join` already was
taking parameters by `&[&str]` instead of `Vec<&str>`.
This is a (big) backwards incompatible change.
Closes #9338 from Dandandan/dataframe_api
Authored-by: Heres, Daniel <[email protected]>
Signed-off-by: Jorge C. Leitao <[email protected]>
---
rust/benchmarks/src/bin/tpch.rs | 2 +-
rust/datafusion/examples/dataframe.rs | 2 +-
rust/datafusion/examples/dataframe_in_memory.rs | 2 +-
rust/datafusion/examples/simple_udaf.rs | 2 +-
rust/datafusion/examples/simple_udf.rs | 2 +-
rust/datafusion/src/dataframe.rs | 24 +++++------
rust/datafusion/src/execution/context.rs | 14 +++----
rust/datafusion/src/execution/dataframe_impl.rs | 32 +++++++--------
rust/datafusion/src/lib.rs | 2 +-
rust/datafusion/src/logical_plan/builder.rs | 31 +++++++-------
rust/datafusion/src/logical_plan/plan.rs | 4 +-
rust/datafusion/src/optimizer/filter_push_down.rs | 48 +++++++++++-----------
.../src/optimizer/projection_push_down.rs | 26 ++++++------
rust/datafusion/src/physical_plan/planner.rs | 12 +++---
rust/datafusion/src/sql/planner.rs | 22 +++++-----
rust/datafusion/src/sql/utils.rs | 4 +-
rust/datafusion/tests/custom_sources.rs | 2 +-
rust/datafusion/tests/provider_filter_pushdown.rs | 2 +-
18 files changed, 116 insertions(+), 117 deletions(-)
diff --git a/rust/benchmarks/src/bin/tpch.rs b/rust/benchmarks/src/bin/tpch.rs
index c3352ef..593d75c 100644
--- a/rust/benchmarks/src/bin/tpch.rs
+++ b/rust/benchmarks/src/bin/tpch.rs
@@ -1575,7 +1575,7 @@ mod tests {
.file_extension(".out");
let df = ctx.read_csv(&format!("{}/answers/q{}.out", path, n),
options)?;
let df = df.select(
- get_answer_schema(n)
+ &get_answer_schema(n)
.fields()
.iter()
.map(|field| {
diff --git a/rust/datafusion/examples/dataframe.rs
b/rust/datafusion/examples/dataframe.rs
index 03cd806..cba4d87 100644
--- a/rust/datafusion/examples/dataframe.rs
+++ b/rust/datafusion/examples/dataframe.rs
@@ -34,7 +34,7 @@ async fn main() -> Result<()> {
// define the query using the DataFrame trait
let df = ctx
.read_parquet(filename)?
- .select_columns(vec!["id", "bool_col", "timestamp_col"])?
+ .select_columns(&["id", "bool_col", "timestamp_col"])?
.filter(col("id").gt(lit(1)))?;
// execute the query
diff --git a/rust/datafusion/examples/dataframe_in_memory.rs
b/rust/datafusion/examples/dataframe_in_memory.rs
index 6923b80..ff35266 100644
--- a/rust/datafusion/examples/dataframe_in_memory.rs
+++ b/rust/datafusion/examples/dataframe_in_memory.rs
@@ -56,7 +56,7 @@ async fn main() -> Result<()> {
// construct an expression corresponding to "SELECT a, b FROM t WHERE b =
10" in SQL
let filter = col("b").eq(lit(10));
- let df = df.select_columns(vec!["a", "b"])?.filter(filter)?;
+ let df = df.select_columns(&["a", "b"])?.filter(filter)?;
// execute
let results = df.collect().await?;
diff --git a/rust/datafusion/examples/simple_udaf.rs
b/rust/datafusion/examples/simple_udaf.rs
index 524a125..f27036e 100644
--- a/rust/datafusion/examples/simple_udaf.rs
+++ b/rust/datafusion/examples/simple_udaf.rs
@@ -148,7 +148,7 @@ async fn main() -> Result<()> {
let df = ctx.table("t")?;
// perform the aggregation
- let df = df.aggregate(vec![], vec![geometric_mean.call(vec![col("a")])])?;
+ let df = df.aggregate(&[], &[geometric_mean.call(vec![col("a")])])?;
// note that "a" is f32, not f64. DataFusion coerces it to match the
UDAF's signature.
diff --git a/rust/datafusion/examples/simple_udf.rs
b/rust/datafusion/examples/simple_udf.rs
index 94df59d..0eef801 100644
--- a/rust/datafusion/examples/simple_udf.rs
+++ b/rust/datafusion/examples/simple_udf.rs
@@ -130,7 +130,7 @@ async fn main() -> Result<()> {
let expr1 = pow.call(vec![col("a"), col("b")]);
// equivalent to `'SELECT pow(a, b), pow(a, b) AS pow1 FROM t'`
- let df = df.select(vec![
+ let df = df.select(&[
expr,
// alias so that they have different column names
expr1.alias("pow1"),
diff --git a/rust/datafusion/src/dataframe.rs b/rust/datafusion/src/dataframe.rs
index 1e3c1f1..04d827c 100644
--- a/rust/datafusion/src/dataframe.rs
+++ b/rust/datafusion/src/dataframe.rs
@@ -44,7 +44,7 @@ use async_trait::async_trait;
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
/// let df = df.filter(col("a").lt_eq(col("b")))?
-/// .aggregate(vec![col("a")], vec![min(col("b"))])?
+/// .aggregate(&[col("a")], &[min(col("b"))])?
/// .limit(100)?;
/// let results = df.collect();
/// # Ok(())
@@ -61,11 +61,11 @@ pub trait DataFrame {
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
- /// let df = df.select_columns(vec!["a", "b"])?;
+ /// let df = df.select_columns(&["a", "b"])?;
/// # Ok(())
/// # }
/// ```
- fn select_columns(&self, columns: Vec<&str>) -> Result<Arc<dyn DataFrame>>;
+ fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>>;
/// Create a projection based on arbitrary expressions.
///
@@ -75,11 +75,11 @@ pub trait DataFrame {
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
- /// let df = df.select(vec![col("a") * col("b"), col("c")])?;
+ /// let df = df.select(&[col("a") * col("b"), col("c")])?;
/// # Ok(())
/// # }
/// ```
- fn select(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
+ fn select(&self, expr: &[Expr]) -> Result<Arc<dyn DataFrame>>;
/// Filter a DataFrame to only include rows that match the specified
filter expression.
///
@@ -105,17 +105,17 @@ pub trait DataFrame {
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
///
/// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a"
- /// let _ = df.aggregate(vec![col("a")], vec![min(col("b"))])?;
+ /// let _ = df.aggregate(&[col("a")], &[min(col("b"))])?;
///
/// // The following use is the equivalent of "SELECT MIN(b)"
- /// let _ = df.aggregate(vec![], vec![min(col("b"))])?;
+ /// let _ = df.aggregate(&[], &[min(col("b"))])?;
/// # Ok(())
/// # }
/// ```
fn aggregate(
&self,
- group_expr: Vec<Expr>,
- aggr_expr: Vec<Expr>,
+ group_expr: &[Expr],
+ aggr_expr: &[Expr],
) -> Result<Arc<dyn DataFrame>>;
/// Limit the number of rows returned from this DataFrame.
@@ -141,11 +141,11 @@ pub trait DataFrame {
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
- /// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false,
false)])?;
+ /// let df = df.sort(&[col("a").sort(true, true), col("b").sort(false,
false)])?;
/// # Ok(())
/// # }
/// ```
- fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
+ fn sort(&self, expr: &[Expr]) -> Result<Arc<dyn DataFrame>>;
/// Join this DataFrame with another DataFrame using the specified columns
as join keys
///
@@ -157,7 +157,7 @@ pub trait DataFrame {
/// let mut ctx = ExecutionContext::new();
/// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
/// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?
- /// .select(vec![
+ /// .select(&[
/// col("a").alias("a2"),
/// col("b").alias("b2"),
/// col("c").alias("c2")])?;
diff --git a/rust/datafusion/src/execution/context.rs
b/rust/datafusion/src/execution/context.rs
index 1accff2..e8185a8 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -75,7 +75,7 @@ use parquet::file::properties::WriterProperties;
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
/// let df = df.filter(col("a").lt_eq(col("b")))?
-/// .aggregate(vec![col("a")], vec![min(col("b"))])?
+/// .aggregate(&[col("a")], &[min(col("b"))])?
/// .limit(100)?;
/// let results = df.collect();
/// # Ok(())
@@ -787,7 +787,7 @@ mod tests {
let table = ctx.table("test")?;
let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
- .project(vec![col("c2")])?
+ .project(&[col("c2")])?
.build()?;
let optimized_plan = ctx.optimize(&logical_plan)?;
@@ -839,7 +839,7 @@ mod tests {
assert_eq!(schema.field_with_name("c1")?.is_nullable(), false);
let plan = LogicalPlanBuilder::scan_empty("", schema.as_ref(), None)?
- .project(vec![col("c1")])?
+ .project(&[col("c1")])?
.build()?;
let plan = ctx.optimize(&plan)?;
@@ -870,7 +870,7 @@ mod tests {
)?]];
let plan = LogicalPlanBuilder::scan_memory(partitions, schema, None)?
- .project(vec![col("b")])?
+ .project(&[col("b")])?
.build()?;
assert_fields_eq(&plan, vec!["b"]);
@@ -1401,8 +1401,8 @@ mod tests {
]));
let plan = LogicalPlanBuilder::scan_empty("", schema.as_ref(), None)?
- .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
- .project(vec![col("c1"), col("SUM(c2)").alias("total_salary")])?
+ .aggregate(&[col("c1")], &[sum(col("c2"))])?
+ .project(&[col("c1"), col("SUM(c2)").alias("total_salary")])?
.build()?;
let plan = ctx.optimize(&plan)?;
@@ -1609,7 +1609,7 @@ mod tests {
let t = ctx.table("t")?;
let plan = LogicalPlanBuilder::from(&t.to_logical_plan())
- .project(vec![
+ .project(&[
col("a"),
col("b"),
ctx.udf("my_add")?.call(vec![col("a"), col("b")]),
diff --git a/rust/datafusion/src/execution/dataframe_impl.rs
b/rust/datafusion/src/execution/dataframe_impl.rs
index db8b86b..3a40d20 100644
--- a/rust/datafusion/src/execution/dataframe_impl.rs
+++ b/rust/datafusion/src/execution/dataframe_impl.rs
@@ -49,17 +49,17 @@ impl DataFrameImpl {
#[async_trait]
impl DataFrame for DataFrameImpl {
/// Apply a projection based on a list of column names
- fn select_columns(&self, columns: Vec<&str>) -> Result<Arc<dyn DataFrame>>
{
+ fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>> {
let fields = columns
.iter()
.map(|name| self.plan.schema().field_with_unqualified_name(name))
.collect::<Result<Vec<_>>>()?;
- let expr = fields.iter().map(|f| col(f.name())).collect();
- self.select(expr)
+ let expr: Vec<Expr> = fields.iter().map(|f| col(f.name())).collect();
+ self.select(&expr)
}
/// Create a projection based on arbitrary expressions
- fn select(&self, expr_list: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
+ fn select(&self, expr_list: &[Expr]) -> Result<Arc<dyn DataFrame>> {
let plan = LogicalPlanBuilder::from(&self.plan)
.project(expr_list)?
.build()?;
@@ -77,8 +77,8 @@ impl DataFrame for DataFrameImpl {
/// Perform an aggregate query
fn aggregate(
&self,
- group_expr: Vec<Expr>,
- aggr_expr: Vec<Expr>,
+ group_expr: &[Expr],
+ aggr_expr: &[Expr],
) -> Result<Arc<dyn DataFrame>> {
let plan = LogicalPlanBuilder::from(&self.plan)
.aggregate(group_expr, aggr_expr)?
@@ -93,7 +93,7 @@ impl DataFrame for DataFrameImpl {
}
/// Sort by specified sorting expressions
- fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
+ fn sort(&self, expr: &[Expr]) -> Result<Arc<dyn DataFrame>> {
let plan = LogicalPlanBuilder::from(&self.plan).sort(expr)?.build()?;
Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
}
@@ -168,7 +168,7 @@ mod tests {
fn select_columns() -> Result<()> {
// build plan using Table API
let t = test_table()?;
- let t2 = t.select_columns(vec!["c1", "c2", "c11"])?;
+ let t2 = t.select_columns(&["c1", "c2", "c11"])?;
let plan = t2.to_logical_plan();
// build query using SQL
@@ -184,7 +184,7 @@ mod tests {
fn select_expr() -> Result<()> {
// build plan using Table API
let t = test_table()?;
- let t2 = t.select(vec![col("c1"), col("c2"), col("c11")])?;
+ let t2 = t.select(&[col("c1"), col("c2"), col("c11")])?;
let plan = t2.to_logical_plan();
// build query using SQL
@@ -200,8 +200,8 @@ mod tests {
fn aggregate() -> Result<()> {
// build plan using DataFrame API
let df = test_table()?;
- let group_expr = vec![col("c1")];
- let aggr_expr = vec![
+ let group_expr = &[col("c1")];
+ let aggr_expr = &[
min(col("c12")),
max(col("c12")),
avg(col("c12")),
@@ -228,8 +228,8 @@ mod tests {
#[tokio::test]
async fn join() -> Result<()> {
- let left = test_table()?.select_columns(vec!["c1", "c2"])?;
- let right = test_table()?.select_columns(vec!["c1", "c3"])?;
+ let left = test_table()?.select_columns(&["c1", "c2"])?;
+ let right = test_table()?.select_columns(&["c1", "c3"])?;
let left_rows = left.collect().await?;
let right_rows = right.collect().await?;
let join = left.join(right, JoinType::Inner, &["c1"], &["c1"])?;
@@ -247,7 +247,7 @@ mod tests {
fn limit() -> Result<()> {
// build query using Table API
let t = test_table()?;
- let t2 = t.select_columns(vec!["c1", "c2", "c11"])?.limit(10)?;
+ let t2 = t.select_columns(&["c1", "c2", "c11"])?.limit(10)?;
let plan = t2.to_logical_plan();
// build query using SQL
@@ -265,7 +265,7 @@ mod tests {
// build query using Table API
let df = test_table()?;
let df = df
- .select_columns(vec!["c1", "c2", "c11"])?
+ .select_columns(&["c1", "c2", "c11"])?
.limit(10)?
.explain(false)?;
let plan = df.to_logical_plan();
@@ -302,7 +302,7 @@ mod tests {
let f = df.registry();
- let df = df.select(vec![f.udf("my_fn")?.call(vec![col("c12")])])?;
+ let df = df.select(&[f.udf("my_fn")?.call(vec![col("c12")])])?;
let plan = df.to_logical_plan();
// build query using SQL
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
index 3dec1d8..b2d3c8b 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/lib.rs
@@ -47,7 +47,7 @@
//!
//! // create a plan
//! let df = df.filter(col("a").lt_eq(col("b")))?
-//! .aggregate(vec![col("a")], vec![min(col("b"))])?
+//! .aggregate(&[col("a")], &[min(col("b"))])?
//! .limit(100)?;
//!
//! // execute the plan
diff --git a/rust/datafusion/src/logical_plan/builder.rs
b/rust/datafusion/src/logical_plan/builder.rs
index 8bb356a..9f5d781 100644
--- a/rust/datafusion/src/logical_plan/builder.rs
+++ b/rust/datafusion/src/logical_plan/builder.rs
@@ -131,7 +131,7 @@ impl LogicalPlanBuilder {
/// This function errors under any of the following conditions:
/// * Two or more expressions have the same name
/// * An invalid expression is used (e.g. a `sort` expression)
- pub fn project(&self, expr: Vec<Expr>) -> Result<Self> {
+ pub fn project(&self, expr: &[Expr]) -> Result<Self> {
let input_schema = self.plan.schema();
let mut projected_expr = vec![];
(0..expr.len()).for_each(|i| match &expr[i] {
@@ -170,9 +170,9 @@ impl LogicalPlanBuilder {
}
/// Apply a sort
- pub fn sort(&self, expr: Vec<Expr>) -> Result<Self> {
+ pub fn sort(&self, expr: &[Expr]) -> Result<Self> {
Ok(Self::from(&LogicalPlan::Sort {
- expr,
+ expr: expr.to_vec(),
input: Arc::new(self.plan.clone()),
}))
}
@@ -220,9 +220,9 @@ impl LogicalPlanBuilder {
}
/// Apply an aggregate
- pub fn aggregate(&self, group_expr: Vec<Expr>, aggr_expr: Vec<Expr>) ->
Result<Self> {
- let mut all_expr: Vec<Expr> = group_expr.clone();
- aggr_expr.iter().for_each(|x| all_expr.push(x.clone()));
+ pub fn aggregate(&self, group_expr: &[Expr], aggr_expr: &[Expr]) ->
Result<Self> {
+ let mut all_expr = group_expr.to_vec();
+ all_expr.extend_from_slice(aggr_expr);
validate_unique_names("Aggregations", &all_expr, self.plan.schema())?;
@@ -231,8 +231,8 @@ impl LogicalPlanBuilder {
Ok(Self::from(&LogicalPlan::Aggregate {
input: Arc::new(self.plan.clone()),
- group_expr,
- aggr_expr,
+ group_expr: group_expr.to_vec(),
+ aggr_expr: aggr_expr.to_vec(),
schema: DFSchemaRef::new(aggr_schema),
}))
}
@@ -351,7 +351,7 @@ mod tests {
Some(vec![0, 3]),
)?
.filter(col("state").eq(lit("CO")))?
- .project(vec![col("id")])?
+ .project(&[col("id")])?
.build()?;
let expected = "Projection: #id\
@@ -370,11 +370,8 @@ mod tests {
&employee_schema(),
Some(vec![3, 4]),
)?
- .aggregate(
- vec![col("state")],
- vec![sum(col("salary")).alias("total_salary")],
- )?
- .project(vec![col("state"), col("total_salary")])?
+ .aggregate(&[col("state")],
&[sum(col("salary")).alias("total_salary")])?
+ .project(&[col("state"), col("total_salary")])?
.build()?;
let expected = "Projection: #state, #total_salary\
@@ -393,7 +390,7 @@ mod tests {
&employee_schema(),
Some(vec![3, 4]),
)?
- .sort(vec![
+ .sort(&[
Expr::Sort {
expr: Box::new(col("state")),
asc: true,
@@ -423,7 +420,7 @@ mod tests {
Some(vec![0, 3]),
)?
// two columns with the same name => error
- .project(vec![col("id"), col("first_name").alias("id")]);
+ .project(&[col("id"), col("first_name").alias("id")]);
match plan {
Err(DataFusionError::Plan(e)) => {
@@ -446,7 +443,7 @@ mod tests {
Some(vec![0, 3]),
)?
// two columns with the same name => error
- .aggregate(vec![col("state")],
vec![sum(col("salary")).alias("state")]);
+ .aggregate(&[col("state")], &[sum(col("salary")).alias("state")]);
match plan {
Err(DataFusionError::Plan(e)) => {
diff --git a/rust/datafusion/src/logical_plan/plan.rs
b/rust/datafusion/src/logical_plan/plan.rs
index 8002d16..2afdefd 100644
--- a/rust/datafusion/src/logical_plan/plan.rs
+++ b/rust/datafusion/src/logical_plan/plan.rs
@@ -661,7 +661,7 @@ mod tests {
.unwrap()
.filter(col("state").eq(lit("CO")))
.unwrap()
- .project(vec![col("id")])
+ .project(&[col("id")])
.unwrap()
.build()
.unwrap()
@@ -962,7 +962,7 @@ mod tests {
.unwrap()
.filter(col("state").eq(lit("CO")))
.unwrap()
- .project(vec![col("id")])
+ .project(&[col("id")])
.unwrap()
.build()
.unwrap()
diff --git a/rust/datafusion/src/optimizer/filter_push_down.rs
b/rust/datafusion/src/optimizer/filter_push_down.rs
index b72ede1..2903f95 100644
--- a/rust/datafusion/src/optimizer/filter_push_down.rs
+++ b/rust/datafusion/src/optimizer/filter_push_down.rs
@@ -448,7 +448,7 @@ mod tests {
fn filter_before_projection() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("b")])?
+ .project(&[col("a"), col("b")])?
.filter(col("a").eq(lit(1i64)))?
.build()?;
// filter is before projection
@@ -464,7 +464,7 @@ mod tests {
fn filter_after_limit() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("b")])?
+ .project(&[col("a"), col("b")])?
.limit(10)?
.filter(col("a").eq(lit(1i64)))?
.build()?;
@@ -482,8 +482,8 @@ mod tests {
fn filter_jump_2_plans() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("b"), col("c")])?
- .project(vec![col("c"), col("b")])?
+ .project(&[col("a"), col("b"), col("c")])?
+ .project(&[col("c"), col("b")])?
.filter(col("a").eq(lit(1i64)))?
.build()?;
// filter is before double projection
@@ -500,7 +500,7 @@ mod tests {
fn filter_move_agg() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .aggregate(vec![col("a")],
vec![sum(col("b")).alias("total_salary")])?
+ .aggregate(&[col("a")], &[sum(col("b")).alias("total_salary")])?
.filter(col("a").gt(lit(10i64)))?
.build()?;
// filter of key aggregation is commutative
@@ -516,7 +516,7 @@ mod tests {
fn filter_keep_agg() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .aggregate(vec![col("a")], vec![sum(col("b")).alias("b")])?
+ .aggregate(&[col("a")], &[sum(col("b")).alias("b")])?
.filter(col("b").gt(lit(10i64)))?
.build()?;
// filter of aggregate is after aggregation since they are
non-commutative
@@ -533,7 +533,7 @@ mod tests {
fn alias() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a").alias("b"), col("c")])?
+ .project(&[col("a").alias("b"), col("c")])?
.filter(col("b").eq(lit(1i64)))?
.build()?;
// filter is before projection
@@ -566,7 +566,7 @@ mod tests {
fn complex_expression() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![
+ .project(&[
add(multiply(col("a"), lit(2)), col("c")).alias("b"),
col("c"),
])?
@@ -596,12 +596,12 @@ mod tests {
fn complex_plan() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![
+ .project(&[
add(multiply(col("a"), lit(2)), col("c")).alias("b"),
col("c"),
])?
// second projection where we rename columns, just to make it
difficult
- .project(vec![multiply(col("b"), lit(3)).alias("a"), col("c")])?
+ .project(&[multiply(col("b"), lit(3)).alias("a"), col("c")])?
.filter(col("a").eq(lit(1i64)))?
.build()?;
@@ -632,8 +632,8 @@ mod tests {
// the aggregation allows one filter to pass (b), and the other one to
not pass (SUM(c))
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a").alias("b"), col("c")])?
- .aggregate(vec![col("b")], vec![sum(col("c"))])?
+ .project(&[col("a").alias("b"), col("c")])?
+ .aggregate(&[col("b")], &[sum(col("c"))])?
.filter(col("b").gt(lit(10i64)))?
.filter(col("SUM(c)").gt(lit(10i64)))?
.build()?;
@@ -668,8 +668,8 @@ mod tests {
// the aggregation allows one filter to pass (b), and the other one to
not pass (SUM(c))
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a").alias("b"), col("c")])?
- .aggregate(vec![col("b")], vec![sum(col("c"))])?
+ .project(&[col("a").alias("b"), col("c")])?
+ .aggregate(&[col("b")], &[sum(col("c"))])?
.filter(and(
col("SUM(c)").gt(lit(10i64)),
and(col("b").gt(lit(10i64)), col("SUM(c)").lt(lit(20i64))),
@@ -703,10 +703,10 @@ mod tests {
fn double_limit() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("b")])?
+ .project(&[col("a"), col("b")])?
.limit(20)?
.limit(10)?
- .project(vec![col("a"), col("b")])?
+ .project(&[col("a"), col("b")])?
.filter(col("a").eq(lit(1i64)))?
.build()?;
// filter does not just any of the limits
@@ -726,10 +726,10 @@ mod tests {
fn filter_2_breaks_limits() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a")])?
+ .project(&[col("a")])?
.filter(col("a").lt_eq(lit(1i64)))?
.limit(1)?
- .project(vec![col("a")])?
+ .project(&[col("a")])?
.filter(col("a").gt_eq(lit(1i64)))?
.build()?;
// Should be able to move both filters below the projections
@@ -765,7 +765,7 @@ mod tests {
.limit(1)?
.filter(col("a").lt_eq(lit(1i64)))?
.filter(col("a").gt_eq(lit(1i64)))?
- .project(vec![col("a")])?
+ .project(&[col("a")])?
.build()?;
// not part of the test
@@ -817,7 +817,7 @@ mod tests {
let table_scan = test_table_scan()?;
let left = LogicalPlanBuilder::from(&table_scan).build()?;
let right = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a")])?
+ .project(&[col("a")])?
.build()?;
let plan = LogicalPlanBuilder::from(&left)
.join(&right, JoinType::Inner, &["a"], &["a"])?
@@ -852,10 +852,10 @@ mod tests {
fn filter_join_on_common_dependent() -> Result<()> {
let table_scan = test_table_scan()?;
let left = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("c")])?
+ .project(&[col("a"), col("c")])?
.build()?;
let right = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("b")])?
+ .project(&[col("a"), col("b")])?
.build()?;
let plan = LogicalPlanBuilder::from(&left)
.join(&right, JoinType::Inner, &["a"], &["a"])?
@@ -886,10 +886,10 @@ mod tests {
fn filter_join_on_one_side() -> Result<()> {
let table_scan = test_table_scan()?;
let left = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("b")])?
+ .project(&[col("a"), col("b")])?
.build()?;
let right = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("c")])?
+ .project(&[col("a"), col("c")])?
.build()?;
let plan = LogicalPlanBuilder::from(&left)
.join(&right, JoinType::Inner, &["a"], &["a"])?
diff --git a/rust/datafusion/src/optimizer/projection_push_down.rs
b/rust/datafusion/src/optimizer/projection_push_down.rs
index 948fe71..16076e9 100644
--- a/rust/datafusion/src/optimizer/projection_push_down.rs
+++ b/rust/datafusion/src/optimizer/projection_push_down.rs
@@ -312,7 +312,7 @@ mod tests {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .aggregate(vec![], vec![max(col("b"))])?
+ .aggregate(&[], &[max(col("b"))])?
.build()?;
let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
@@ -328,7 +328,7 @@ mod tests {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .aggregate(vec![col("c")], vec![max(col("b"))])?
+ .aggregate(&[col("c")], &[max(col("b"))])?
.build()?;
let expected = "Aggregate: groupBy=[[#c]], aggr=[[MAX(#b)]]\
@@ -345,7 +345,7 @@ mod tests {
let plan = LogicalPlanBuilder::from(&table_scan)
.filter(col("c"))?
- .aggregate(vec![], vec![max(col("b"))])?
+ .aggregate(&[], &[max(col("b"))])?
.build()?;
let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
@@ -362,7 +362,7 @@ mod tests {
let table_scan = test_table_scan()?;
let projection = LogicalPlanBuilder::from(&table_scan)
- .project(vec![Expr::Cast {
+ .project(&[Expr::Cast {
expr: Box::new(col("c")),
data_type: DataType::Float64,
}])?
@@ -383,7 +383,7 @@ mod tests {
assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("a"), col("b")])?
+ .project(&[col("a"), col("b")])?
.build()?;
assert_fields_eq(&plan, vec!["a", "b"]);
@@ -403,7 +403,7 @@ mod tests {
assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("c"), col("a")])?
+ .project(&[col("c"), col("a")])?
.limit(5)?
.build()?;
@@ -432,7 +432,7 @@ mod tests {
fn table_scan_with_literal_projection() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![lit(1_i64), lit(2_i64)])?
+ .project(&[lit(1_i64), lit(2_i64)])?
.build()?;
let expected = "Projection: Int64(1), Int64(2)\
\n TableScan: test projection=Some([0])";
@@ -449,9 +449,9 @@ mod tests {
// we never use "b" in the first projection => remove it
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("c"), col("a"), col("b")])?
+ .project(&[col("c"), col("a"), col("b")])?
.filter(col("c").gt(lit(1)))?
- .aggregate(vec![col("c")], vec![max(col("a"))])?
+ .aggregate(&[col("c")], &[max(col("a"))])?
.build()?;
assert_fields_eq(&plan, vec!["c", "MAX(a)"]);
@@ -476,8 +476,8 @@ mod tests {
// there is no need for the first projection
let plan = LogicalPlanBuilder::from(&table_scan)
- .project(vec![col("b")])?
- .project(vec![lit(1).alias("a")])?
+ .project(&[col("b")])?
+ .project(&[lit(1).alias("a")])?
.build()?;
assert_fields_eq(&plan, vec!["a"]);
@@ -500,9 +500,9 @@ mod tests {
// we never use "min(b)" => remove it
let plan = LogicalPlanBuilder::from(&table_scan)
- .aggregate(vec![col("a"), col("c")], vec![max(col("b")),
min(col("b"))])?
+ .aggregate(&[col("a"), col("c")], &[max(col("b")), min(col("b"))])?
.filter(col("c").gt(lit(1)))?
- .project(vec![col("c"), col("a"), col("MAX(b)")])?
+ .project(&[col("c"), col("a"), col("MAX(b)")])?
.build()?;
assert_fields_eq(&plan, vec!["c", "a", "MAX(b)"]);
diff --git a/rust/datafusion/src/physical_plan/planner.rs
b/rust/datafusion/src/physical_plan/planner.rs
index 6ba0d26..6c4aab9 100644
--- a/rust/datafusion/src/physical_plan/planner.rs
+++ b/rust/datafusion/src/physical_plan/planner.rs
@@ -788,9 +788,9 @@ mod tests {
let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
// filter clause needs the type coercion rule applied
.filter(col("c7").lt(lit(5_u8)))?
- .project(vec![col("c1"), col("c2")])?
- .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
- .sort(vec![col("c1").sort(true, true)])?
+ .project(&[col("c1"), col("c2")])?
+ .aggregate(&[col("c1")], &[sum(col("c2"))])?
+ .sort(&[col("c1").sort(true, true)])?
.limit(10)?
.build()?;
@@ -861,7 +861,7 @@ mod tests {
];
for case in cases {
let logical_plan = LogicalPlanBuilder::scan_csv(&path, options,
None)?
- .project(vec![case.clone()]);
+ .project(&[case.clone()]);
let message = format!(
"Expression {:?} expected to error due to impossible coercion",
case
@@ -953,7 +953,7 @@ mod tests {
let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
// filter clause needs the type coercion rule applied
.filter(col("c12").lt(lit(0.05)))?
- .project(vec![col("c1").in_list(list, false)])?
+ .project(&[col("c1").in_list(list, false)])?
.build()?;
let execution_plan = plan(&logical_plan)?;
// verify that the plan correctly adds cast from Int64(1) to Utf8
@@ -968,7 +968,7 @@ mod tests {
let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
// filter clause needs the type coercion rule applied
.filter(col("c12").lt(lit(0.05)))?
- .project(vec![col("c12").lt_eq(lit(0.025)).in_list(list, false)])?
+ .project(&[col("c12").lt_eq(lit(0.025)).in_list(list, false)])?
.build()?;
let execution_plan = plan(&logical_plan);
diff --git a/rust/datafusion/src/sql/planner.rs
b/rust/datafusion/src/sql/planner.rs
index d97b52d..ab7ff13 100644
--- a/rust/datafusion/src/sql/planner.rs
+++ b/rust/datafusion/src/sql/planner.rs
@@ -431,7 +431,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
(plan, select_exprs)
};
- self.project(&plan, select_exprs_post_aggr, false)
+ self.project(&plan, &select_exprs_post_aggr, false)
}
/// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions.
@@ -462,7 +462,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
fn project(
&self,
input: &LogicalPlan,
- expr: Vec<Expr>,
+ expr: &[Expr],
force: bool,
) -> Result<LogicalPlan> {
self.validate_schema_satisfies_exprs(&input.schema(), &expr)?;
@@ -484,9 +484,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
fn aggregate(
&self,
input: &LogicalPlan,
- select_exprs: &Vec<Expr>,
- group_by: &Vec<SQLExpr>,
- aggr_exprs: &Vec<Expr>,
+ select_exprs: &[Expr],
+ group_by: &[SQLExpr],
+ aggr_exprs: &[Expr],
) -> Result<(LogicalPlan, Vec<Expr>)> {
let group_by_exprs = group_by
.iter()
@@ -500,7 +500,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.collect::<Vec<Expr>>();
let plan = LogicalPlanBuilder::from(&input)
- .aggregate(group_by_exprs, aggr_exprs.clone())?
+ .aggregate(&group_by_exprs, aggr_exprs)?
.build()?;
// After aggregation, these are all of the columns that will be
@@ -547,7 +547,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
fn order_by(
&self,
plan: &LogicalPlan,
- order_by: &Vec<OrderByExpr>,
+ order_by: &[OrderByExpr],
) -> Result<LogicalPlan> {
if order_by.is_empty() {
return Ok(plan.clone());
@@ -567,14 +567,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
})
.collect();
- LogicalPlanBuilder::from(&plan).sort(order_by_rex?)?.build()
+ LogicalPlanBuilder::from(&plan)
+ .sort(&order_by_rex?)?
+ .build()
}
/// Validate the schema provides all of the columns referenced in the
expressions.
fn validate_schema_satisfies_exprs(
&self,
schema: &DFSchema,
- exprs: &Vec<Expr>,
+ exprs: &[Expr],
) -> Result<()> {
find_column_exprs(exprs)
.iter()
@@ -611,7 +613,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
/// Generate a relational expression from a SQL expression
pub fn sql_to_rex(&self, sql: &SQLExpr, schema: &DFSchema) -> Result<Expr>
{
let expr = self.sql_expr_to_logical_expr(sql)?;
- self.validate_schema_satisfies_exprs(schema, &vec![expr.clone()])?;
+ self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
Ok(expr)
}
diff --git a/rust/datafusion/src/sql/utils.rs b/rust/datafusion/src/sql/utils.rs
index 34bd55d..155a1f3 100644
--- a/rust/datafusion/src/sql/utils.rs
+++ b/rust/datafusion/src/sql/utils.rs
@@ -47,14 +47,14 @@ pub(crate) fn find_aggregate_exprs(exprs: &Vec<Expr>) ->
Vec<Expr> {
/// Collect all deeply nested `Expr::Column`'s. They are returned in order of
/// appearance (depth first), with duplicates omitted.
-pub(crate) fn find_column_exprs(exprs: &Vec<Expr>) -> Vec<Expr> {
+pub(crate) fn find_column_exprs(exprs: &[Expr]) -> Vec<Expr> {
find_exprs_in_exprs(exprs, &|nested_expr| matches!(nested_expr,
Expr::Column(_)))
}
/// Search the provided `Expr`'s, and all of their nested `Expr`, for any that
/// pass the provided test. The returned `Expr`'s are deduplicated and returned
/// in order of appearance (depth first).
-fn find_exprs_in_exprs<F>(exprs: &Vec<Expr>, test_fn: &F) -> Vec<Expr>
+fn find_exprs_in_exprs<F>(exprs: &[Expr], test_fn: &F) -> Vec<Expr>
where
F: Fn(&Expr) -> bool,
{
diff --git a/rust/datafusion/tests/custom_sources.rs
b/rust/datafusion/tests/custom_sources.rs
index 979bde4..9778fea 100644
--- a/rust/datafusion/tests/custom_sources.rs
+++ b/rust/datafusion/tests/custom_sources.rs
@@ -161,7 +161,7 @@ async fn custom_source_dataframe() -> Result<()> {
let table = ctx.read_table(Arc::new(CustomTableProvider))?;
let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
- .project(vec![col("c2")])?
+ .project(&[col("c2")])?
.build()?;
let optimized_plan = ctx.optimize(&logical_plan)?;
diff --git a/rust/datafusion/tests/provider_filter_pushdown.rs
b/rust/datafusion/tests/provider_filter_pushdown.rs
index b4f0bfb..d4f8a6b 100644
--- a/rust/datafusion/tests/provider_filter_pushdown.rs
+++ b/rust/datafusion/tests/provider_filter_pushdown.rs
@@ -149,7 +149,7 @@ async fn assert_provider_row_count(value: i64,
expected_count: u64) -> Result<()
let df = ctx
.read_table(Arc::new(provider.clone()))?
.filter(col("flag").eq(lit(value)))?
- .aggregate(vec![], vec![count(col("flag"))])?;
+ .aggregate(&[], &[count(col("flag"))])?;
let results = df.collect().await?;
let result_col: &UInt64Array = as_primitive_array(results[0].column(0));