mustafasrepo commented on code in PR #6671:
URL: https://github.com/apache/arrow-datafusion/pull/6671#discussion_r1230458156
##########
datafusion/core/tests/user_defined_aggregates.rs:
##########
@@ -79,15 +98,41 @@ async fn test_udaf_as_window() {
];
assert_batches_eq!(expected, &execute(&ctx, sql).await);
// aggregate over the entire window function call update_batch
- assert!(counters.update_batch());
- assert!(!counters.retract_batch());
+ assert!(test_state.update_batch());
+ assert!(!test_state.retract_batch());
}
/// User defined aggregate used as a window function with a window frame
#[tokio::test]
async fn test_udaf_as_window_with_frame() {
- let TestContext { ctx, counters } = TestContext::new();
+ let TestContext { ctx, test_state } = TestContext::new();
+ let sql = "SELECT time_sum(time) OVER(ORDER BY time ROWS BETWEEN 1
PRECEDING AND 1 FOLLOWING) as time_sum from t";
+ let expected = vec![
+ "+----------------------------+",
+ "| time_sum |",
+ "+----------------------------+",
+ "| 1970-01-01T00:00:00.000005 |",
+ "| 1970-01-01T00:00:00.000009 |",
+ "| 1970-01-01T00:00:00.000012 |",
+ "| 1970-01-01T00:00:00.000014 |",
+ "| 1970-01-01T00:00:00.000010 |",
+ "+----------------------------+",
+ ];
+ assert_batches_eq!(expected, &execute(&ctx, sql).await);
+ // user defined aggregates with window frame should be calling retract
batch
+ assert!(test_state.update_batch());
+ assert!(test_state.retract_batch());
+}
+
+/// Ensure that User defined aggregate used as a window function with a window
+/// frame, but that does not implement retract_batch, does not error
+#[tokio::test]
+async fn test_udaf_as_window_with_frame_without_retract_batch() {
+ let test_state = Arc::new(TestState::new().with_error_on_retract_batch());
+
+ let TestContext { ctx, test_state } =
TestContext::new_with_test_state(test_state);
let sql = "SELECT time_sum(time) OVER(ORDER BY time ROWS BETWEEN 1
PRECEDING AND 1 FOLLOWING) as time_sum from t";
+ // TODO: It is not clear why this is a different value than when retract
batch is used
Review Comment:
I think query above shouldn't work if its `retract_batch` method is not
implemented (Since its window frame boundary is `1 PRECEDING AND 1 FOLLOWING`
If the start boundary is different than `UNBOUNDED PRECEDING` `retract_batch`
method is needed to run accumulator.).
If the query were `SELECT time_sum(time) OVER(ORDER BY time ROWS BETWEEN
UNBOUNDED PRECEDING AND 1 FOLLOWING) as time_sum from t`,
`retract_batch` wouldn't have any significance, I would expect that the
result would same even if we implemented it or not.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]