Repository: drill Updated Branches: refs/heads/master 6bba69d48 -> 09b262776
DRILL-4657: Rank() will return wrong results if a frame of data is too big (more than 2 batches) this closes #499 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/09b26277 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/09b26277 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/09b26277 Branch: refs/heads/master Commit: 09b262776e965ea17a6a863801f7e1ee3e5b3d5a Parents: 6bba69d Author: adeneche <[email protected]> Authored: Wed May 4 12:08:36 2016 -0700 Committer: adeneche <[email protected]> Committed: Fri May 6 17:13:42 2016 -0700 ---------------------------------------------------------------------- .../physical/impl/window/WindowFunction.java | 12 +++- .../physical/impl/window/TestWindowFrame.java | 12 ++++ .../src/test/resources/window/4657.tsv | 60 ++++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/09b26277/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java index cd14b8a..5630ccf 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java @@ -221,10 +221,18 @@ public abstract class WindowFunction { @Override public boolean canDoWork(int numBatchesAvailable, final WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) { assert numBatchesAvailable > 0 : "canDoWork() should not be called when numBatchesAvailable == 0"; + if (type == Type.ROW_NUMBER) { + // row_number doesn't need to wait for anything + return true; + } + if (type == Type.RANK) { + // rank only works if we know how many rows we have in the current frame + // we could avoid this, but it requires more refactoring + return frameEndReached; + } // for CUME_DIST, PERCENT_RANK and NTILE we need the full partition - // otherwise we can process the first batch immediately - return partitionEndReached || ! requiresFullPartition(pop); + return partitionEndReached; } @Override http://git-wip-us.apache.org/repos/asf/drill/blob/09b26277/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java index f5e88d2..76f0935 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java @@ -439,4 +439,16 @@ public class TestWindowFrame extends BaseTestQuery { .baselineValues("EMPTY") .go(); } + + @Test + public void test4657() throws Exception { + testBuilder() + .sqlQuery("select row_number() over(order by position_id) rn, rank() over(order by position_id) rnk from dfs_test.`%s/window/b3.p2`", TEST_RES_PATH) + .ordered() + .csvBaselineFile("window/4657.tsv") + .baselineColumns("rn", "rnk") + .expectsNumBatches(4) // we expect 3 data batches and the fast schema + .go(); + } + } http://git-wip-us.apache.org/repos/asf/drill/blob/09b26277/exec/java-exec/src/test/resources/window/4657.tsv ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/window/4657.tsv b/exec/java-exec/src/test/resources/window/4657.tsv new file mode 100644 index 0000000..a7b1973 --- /dev/null +++ b/exec/java-exec/src/test/resources/window/4657.tsv @@ -0,0 +1,60 @@ +1 1 +3 1 +4 1 +5 1 +2 1 +11 6 +6 6 +7 6 +8 6 +9 6 +10 6 +12 6 +13 6 +14 6 +15 6 +16 6 +17 6 +18 6 +19 6 +20 6 +21 6 +22 6 +23 6 +24 6 +25 6 +26 6 +27 6 +28 6 +29 6 +30 6 +31 6 +32 6 +33 6 +34 6 +35 6 +36 6 +37 6 +38 6 +39 6 +40 6 +41 6 +42 6 +43 6 +44 6 +45 6 +46 6 +47 6 +48 6 +49 6 +50 6 +51 6 +52 6 +53 6 +54 6 +55 6 +56 6 +57 6 +58 6 +59 6 +60 6 \ No newline at end of file
