[ https://issues.apache.org/jira/browse/DRILL-4147?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15402508#comment-15402508 ]
ASF GitHub Bot commented on DRILL-4147: --------------------------------------- Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/555#discussion_r73022589 --- Diff: exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java --- @@ -1013,4 +1019,97 @@ public void testUnionAllInWith() throws Exception { .build() .run(); } + + @Test // DRILL-4147 // base case + public void testDrill4147_1() throws Exception { + final String l = FileUtils.getResourceAsFile("/multilevel/parquet/1994").toURI().toString(); + final String r = FileUtils.getResourceAsFile("/multilevel/parquet/1995").toURI().toString(); + + final String query = String.format("SELECT o_custkey FROM dfs_test.`%s` \n" + + "Union All SELECT o_custkey FROM dfs_test.`%s`", l, r); + + // Validate the plan + final String[] expectedPlan = {"UnionExchange.*\n", + ".*Project.*\n" + + ".*UnionAll"}; + final String[] excludedPlan = {}; + + test(sliceTargetSmall); + PlanTestBase.testPlanMatchingPatterns(query, expectedPlan, excludedPlan); + + try { + testBuilder() + .optionSettingQueriesForTestQuery(sliceTargetSmall) + .optionSettingQueriesForBaseline(sliceTargetDefault) + .unOrdered() + .sqlQuery(query) + .sqlBaselineQuery(query) + .build() + .run(); + } finally { + test(sliceTargetDefault); + } + } + + @Test // DRILL-4147 // group-by on top of union-all + public void testDrill4147_2() throws Exception { + final String l = FileUtils.getResourceAsFile("/multilevel/parquet/1994").toURI().toString(); + final String r = FileUtils.getResourceAsFile("/multilevel/parquet/1995").toURI().toString(); + + final String query = String.format("Select o_custkey, count(*) as cnt from \n" + + " (SELECT o_custkey FROM dfs_test.`%s` \n" + + "Union All SELECT o_custkey FROM dfs_test.`%s`) \n" + + "group by o_custkey", l, r); + + // Validate the plan + final String[] expectedPlan = {"(?s)UnionExchange.*HashAgg.*HashToRandomExchange.*UnionAll.*"}; + final String[] excludedPlan = {}; + + test(sliceTargetSmall); --- End diff -- These two calls should be inside the try block? > Union All operator runs in a single fragment > -------------------------------------------- > > Key: DRILL-4147 > URL: https://issues.apache.org/jira/browse/DRILL-4147 > Project: Apache Drill > Issue Type: Bug > Reporter: amit hadke > Assignee: Aman Sinha > > A User noticed that running select from a single directory is much faster > than union all on two directories. > (https://drill.apache.org/blog/2014/12/09/running-sql-queries-on-amazon-s3/#comment-2349732267) > > It seems like UNION ALL operator doesn't parallelize sub scans (its using > SINGLETON for distribution type). Everything is ran in single fragment. > We may have to use SubsetTransformer in UnionAllPrule. -- This message was sent by Atlassian JIRA (v6.3.4#6332)