This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 20f6f21 update stale documentations related to window functions (#598)
20f6f21 is described below
commit 20f6f21ec551f066fd9ff228cc5221f1068c8b03
Author: Jiayu Liu <[email protected]>
AuthorDate: Wed Jun 23 21:24:05 2021 +0800
update stale documentations related to window functions (#598)
* update stale documentations related to window functions
* update readme
---
README.md | 7 ++++---
ballista/rust/core/src/serde/logical_plan/from_proto.rs | 2 ++
ballista/rust/core/src/serde/logical_plan/to_proto.rs | 1 -
datafusion/src/logical_plan/builder.rs | 15 +++------------
datafusion/src/logical_plan/plan.rs | 6 ------
datafusion/src/physical_plan/windows.rs | 9 ---------
6 files changed, 9 insertions(+), 31 deletions(-)
diff --git a/README.md b/README.md
index 195d1a7..d316060 100644
--- a/README.md
+++ b/README.md
@@ -225,10 +225,11 @@ DataFusion also includes a simple command-line
interactive SQL utility. See the
- [x] FULL JOIN
- [x] CROSS JOIN
- [ ] Window
- - [x] [Empty window](https://github.com/apache/arrow-datafusion/issues/298)
+ - [x] Empty window
- [x] Common window functions
- - [ ] [Window with ORDER BY
clause](https://github.com/apache/arrow-datafusion/issues/360)
- - [ ] [Window with PARTITION BY
clause](https://github.com/apache/arrow-datafusion/issues/299)
+ - [x] Window with PARTITION BY clause
+ - [x] Window with ORDER BY clause
+ - [ ] Window with FILTER clause
- [ ] [Window with custom WINDOW
FRAME](https://github.com/apache/arrow-datafusion/issues/361)
- [ ] UDF and UDAF for window functions
diff --git a/ballista/rust/core/src/serde/logical_plan/from_proto.rs
b/ballista/rust/core/src/serde/logical_plan/from_proto.rs
index 1b7deb7..418d60d 100644
--- a/ballista/rust/core/src/serde/logical_plan/from_proto.rs
+++ b/ballista/rust/core/src/serde/logical_plan/from_proto.rs
@@ -1178,10 +1178,12 @@ impl TryFrom<protobuf::WindowFrameBound> for
WindowFrameBound {
}
protobuf::WindowFrameBoundType::Preceding => {
// FIXME implement bound value parsing
+ // https://github.com/apache/arrow-datafusion/issues/361
Ok(WindowFrameBound::Preceding(Some(1)))
}
protobuf::WindowFrameBoundType::Following => {
// FIXME implement bound value parsing
+ // https://github.com/apache/arrow-datafusion/issues/361
Ok(WindowFrameBound::Following(Some(1)))
}
}
diff --git a/ballista/rust/core/src/serde/logical_plan/to_proto.rs
b/ballista/rust/core/src/serde/logical_plan/to_proto.rs
index 24e2b56..4049622 100644
--- a/ballista/rust/core/src/serde/logical_plan/to_proto.rs
+++ b/ballista/rust/core/src/serde/logical_plan/to_proto.rs
@@ -1002,7 +1002,6 @@ impl TryInto<protobuf::LogicalExprNode> for &Expr {
ref partition_by,
ref order_by,
ref window_frame,
- ..
} => {
let window_function = match fun {
WindowFunction::AggregateFunction(fun) => {
diff --git a/datafusion/src/logical_plan/builder.rs
b/datafusion/src/logical_plan/builder.rs
index 4b4ed0f..147f832 100644
--- a/datafusion/src/logical_plan/builder.rs
+++ b/datafusion/src/logical_plan/builder.rs
@@ -351,23 +351,14 @@ impl LogicalPlanBuilder {
}))
}
- /// Apply a window
- ///
- /// NOTE: this feature is under development and this API will be changing
- ///
- /// - https://github.com/apache/arrow-datafusion/issues/359 basic structure
- /// - https://github.com/apache/arrow-datafusion/issues/298 empty over
clause
- /// - https://github.com/apache/arrow-datafusion/issues/299 with partition
clause
- /// - https://github.com/apache/arrow-datafusion/issues/360 with order by
- /// - https://github.com/apache/arrow-datafusion/issues/361 with window
frame
- pub fn window(&self, window_expr: Vec<Expr>) -> Result<Self> {
+ /// Apply a window functions to extend the schema
+ pub fn window(&self, window_expr: impl IntoIterator<Item = Expr>) ->
Result<Self> {
+ let window_expr = window_expr.into_iter().collect::<Vec<Expr>>();
let all_expr = window_expr.iter();
validate_unique_names("Windows", all_expr.clone(),
self.plan.schema())?;
-
let mut window_fields: Vec<DFField> =
exprlist_to_fields(all_expr, self.plan.schema())?;
window_fields.extend_from_slice(self.plan.schema().fields());
-
Ok(Self::from(&LogicalPlan::Window {
input: Arc::new(self.plan.clone()),
window_expr,
diff --git a/datafusion/src/logical_plan/plan.rs
b/datafusion/src/logical_plan/plan.rs
index 2562472..99f0fa1 100644
--- a/datafusion/src/logical_plan/plan.rs
+++ b/datafusion/src/logical_plan/plan.rs
@@ -95,12 +95,6 @@ pub enum LogicalPlan {
input: Arc<LogicalPlan>,
/// The window function expression
window_expr: Vec<Expr>,
- /// Filter by expressions
- // filter_by_expr: Vec<Expr>,
- /// Partition by expressions
- // partition_by_expr: Vec<Expr>,
- /// Window Frame
- // window_frame: Option<WindowFrame>,
/// The schema description of the window output
schema: DFSchemaRef,
},
diff --git a/datafusion/src/physical_plan/windows.rs
b/datafusion/src/physical_plan/windows.rs
index bb25551..2f53905 100644
--- a/datafusion/src/physical_plan/windows.rs
+++ b/datafusion/src/physical_plan/windows.rs
@@ -171,9 +171,6 @@ impl WindowExpr for BuiltInWindowExpr {
}
fn evaluate(&self, batch: &RecordBatch) -> Result<ArrayRef> {
- // FIXME, for now we assume all the rows belong to the same partition,
which will not be the
- // case when partition_by is supported, in which case we'll
parallelize the calls.
- // See https://github.com/apache/arrow-datafusion/issues/299
let values = self.evaluate_args(batch)?;
let partition_points = self.evaluate_partition_points(
batch.num_rows(),
@@ -309,9 +306,6 @@ impl WindowExpr for AggregateWindowExpr {
/// evaluate the window function values against the batch
fn evaluate(&self, batch: &RecordBatch) -> Result<ArrayRef> {
- // FIXME, for now we assume all the rows belong to the same partition,
which will not be the
- // case when partition_by is supported, in which case we'll
parallelize the calls.
- // See https://github.com/apache/arrow-datafusion/issues/299
match self.evaluation_mode() {
WindowFrameUnits::Range => self.peer_based_evaluate(batch),
WindowFrameUnits::Rows => self.row_based_evaluate(batch),
@@ -477,9 +471,6 @@ fn compute_window_aggregates(
window_expr: Vec<Arc<dyn WindowExpr>>,
batch: &RecordBatch,
) -> Result<Vec<ArrayRef>> {
- // FIXME, for now we assume all the rows belong to the same partition,
which will not be the
- // case when partition_by is supported, in which case we'll parallelize
the calls.
- // See https://github.com/apache/arrow-datafusion/issues/299
window_expr
.iter()
.map(|window_expr| window_expr.evaluate(batch))