LENS-603: Skip expression rewrite in columnar rewriter
Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/6d5f955f Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/6d5f955f Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/6d5f955f Branch: refs/heads/current-release-line Commit: 6d5f955fbdb904790506fcb1c8fa5fbe4458aa05 Parents: 1708094 Author: Sushil Mohanty <[email protected]> Authored: Fri Jun 19 12:10:47 2015 +0530 Committer: Rajat Khandelwal <[email protected]> Committed: Fri Jun 19 12:10:47 2015 +0530 ---------------------------------------------------------------------- .../lens/driver/jdbc/ColumnarSQLRewriter.java | 41 +++- .../driver/jdbc/TestColumnarSQLRewriter.java | 201 +++++++++---------- 2 files changed, 132 insertions(+), 110 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/6d5f955f/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java index 9ba136f..17ddbfd 100644 --- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java +++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java @@ -333,6 +333,43 @@ public class ColumnarSQLRewriter implements QueryRewriter { return joinCondition; } + /** + * Get the count of columns in a given select expression + * + * @param node + * @return Column count + */ + public int getColumnCount(ASTNode node) { + int count = 0; + for (int i = 0; i < node.getChildCount(); i++) { + ASTNode child = (ASTNode) node.getChild(i); + if (child.getToken().getType() == TOK_TABLE_OR_COL) { + count++; + } else { + count += getColumnCount(child); + } + } + return count; + } + + /** + * Check if expression is used in select + * + * @param node + * @return true if expressions is used + */ + public boolean isExpressionsUsed(ASTNode node) { + for (int i = 0; i < node.getChildCount(); i++) { + if (node.getChild(i).getType() == HiveParser.TOK_SELEXPR) { + int cnt = getColumnCount((ASTNode) node.getChild(i)); + if (cnt >= 2) { + return true; + } + } + } + return false; + } + /* * Get filter conditions if user has specified a join condition for filter pushdown. */ @@ -844,7 +881,9 @@ public class ColumnarSQLRewriter implements QueryRewriter { // Construct the final fact in-line query with keys, // measures and individual sub queries built. - if (whereTree == null || joinTree == null || allSubQueries.length() == 0 || aggColumn.isEmpty()) { + + if (whereTree == null || joinTree == null || allSubQueries.length() == 0 + || aggColumn.isEmpty() || isExpressionsUsed(selectAST)) { LOG.info("@@@Query not eligible for inner subquery rewrite"); // construct query without fact sub query constructQuery(selectTree, whereTree, groupByTree, havingTree, orderByTree, limit); http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/6d5f955f/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java index aab7f31..6d35b80 100644 --- a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java +++ b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java @@ -444,34 +444,21 @@ public class TestColumnarSQLRewriter { String actual = qtest.rewrite(query, conf, hconf); String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " - + "date(( time_dim___time_dim . day )), ( item_dim___item_dim . item_key ), case when " - + "(sum(sum_sales_fact___fact_dollars_sold) = 0 ) then 0.0 else sum(sum_sales_fact___fact_dollars_sold) " - + "end dollars_sold , format(sum(sum_sales_fact___fact_units_sold), 4 ), " - + "format(avg(avg_sales_fact___fact_dollars_sold), " - + "'##################.###' ), min(min_sales_fact___fact_dollars_sold), max(max_sales_fact___fact_dollars_sold) " - + "from (select sales_fact___fact.time_key,sales_fact___fact.location_key,sales_fact___fact.item_key," - + "sum(( sales_fact___fact . dollars_sold )) as sum_sales_fact___fact_dollars_sold, " - + "sum(( sales_fact___fact . units_sold )) as sum_sales_fact___fact_units_sold, " - + "avg(( sales_fact___fact . dollars_sold )) as avg_sales_fact___fact_dollars_sold, " - + "min(( sales_fact___fact . dollars_sold )) as min_sales_fact___fact_dollars_sold, " - + "max(( sales_fact___fact . dollars_sold )) as max_sales_fact___fact_dollars_sold " - + "from sales_fact sales_fact___fact where sales_fact___fact.time_key in ( select time_dim .time_key " - + "from time_dim where ( time_dim. time_key ) between date_add( '2013-01-01' , interval 1 day) " - + "and date_sub( '2013-01-31' , interval 3 day) ) and sales_fact___fact.location_key in " - + "( select location_dim .location_key from location_dim where " - + "(( location_dim. location_name ) = 'test123' ) ) " - + "and sales_fact___fact.item_key in ( select item_dim .item_key from item_dim where " - + "(( item_dim. item_name ) = 'item_1' ) ) group by sales_fact___fact.time_key," - + "sales_fact___fact.location_key,sales_fact___fact.item_key) sales_fact___fact " - + "inner join time_dim time_dim___time_dim on (( sales_fact___fact . time_key ) = " - + "( time_dim___time_dim . time_key )) inner join location_dim location_dim___location_dim " - + "on (( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) " - + "inner join item_dim item_dim___item_dim on ((( sales_fact___fact . item_key ) = " - + "( item_dim___item_dim . item_key )) and (( location_dim___location_dim . location_name ) = " - + " 'test123' )) where (( time_dim___time_dim . time_key ) between date_add( '2013-01-01' , interval 1 day) " - + "and date_sub( '2013-01-31' , interval 3 day) and (( item_dim___item_dim . item_name ) = 'item_1' )) " - + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " - + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) order by dollars_sold asc"; + + "date(( time_dim___time_dim . day )), ( item_dim___item_dim . item_key ), case when " + + "(sum(( sales_fact___fact . dollars_sold )) = 0 ) then 0.0 else sum(( sales_fact___fact . dollars_sold )) " + + "end dollars_sold , format(sum(( sales_fact___fact . units_sold )), 4 ), " + + "format(avg(( sales_fact___fact . dollars_sold )), '##################.###' ), " + + "min(( sales_fact___fact . dollars_sold )), max(( sales_fact___fact . dollars_sold )) " + + "from sales_fact sales_fact___fact inner join time_dim time_dim___time_dim on " + + "(( sales_fact___fact . time_key ) = ( time_dim___time_dim . time_key )) inner join " + + "location_dim location_dim___location_dim on (( sales_fact___fact . location_key ) = " + + "( location_dim___location_dim . location_key )) inner join item_dim item_dim___item_dim " + + "on ((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) and " + + "(( location_dim___location_dim . location_name ) = 'test123' )) where " + + "(( time_dim___time_dim . time_key ) between date_add( '2013-01-01' , interval 1 day) " + + "and date_sub( '2013-01-31' , interval 3 day) and (( item_dim___item_dim . item_name ) = 'item_1' )) " + + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) order by dollars_sold asc"; compareQueries(expected, actual); } @@ -514,63 +501,33 @@ public class TestColumnarSQLRewriter { String actual = qtest.rewrite(query, conf, hconf); String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " - + "( time_dim___time_dim . day ), case when (sum(sum_sales_fact___fact_dollars_sold) = 0 ) then " - + "0.0 else sum(sum_sales_fact___fact_dollars_sold) end dollars_sold " - + "from (select sales_fact___fact.time_key," - + "sales_fact___fact.location_key,sum(( sales_fact___fact . dollars_sold )) " - + "as sum_sales_fact___fact_dollars_sold " - + "from sales_fact sales_fact___fact where sales_fact___fact.time_key in " - + "( select time_dim .time_key from time_dim " - + "where ( time_dim. time_key ) between '2013-01-01' and '2013-01-05' ) " - + "and sales_fact___fact.location_key in " - + "( select location_dim .location_key from location_dim where " - + "(( location_dim. location_name ) = 'test123' ) ) " - + "group by sales_fact___fact.time_key,sales_fact___fact.location_key) sales_fact___fact inner join time_dim " - + "time_dim___time_dim on (( sales_fact___fact . time_key ) = ( time_dim___time_dim . time_key )) " - + "inner join location_dim location_dim___location_dim on ((( sales_fact___fact . location_key ) = " - + "( location_dim___location_dim . location_key )) " - + "and (( location_dim___location_dim . location_name ) = 'test123' )) " - + " where ( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-05' group by " - + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) " - + "order by dollars_sold asc union all select ( sales_fact___fact . time_key ), ( time_dim___time_dim . " - + "day_of_week ), ( time_dim___time_dim . day ), case when (sum(sum_sales_fact___fact_dollars_sold) = 0 ) " - + "then 0.0 else sum(sum_sales_fact___fact_dollars_sold) end dollars_sold " - + "from (select sales_fact___fact.time_key," - + "sales_fact___fact.location_key,sum(( sales_fact___fact . dollars_sold ))" - + " as sum_sales_fact___fact_dollars_sold " - + "from sales_fact sales_fact___fact where sales_fact___fact.time_key in ( select time_dim .time_key " - + "from time_dim where ( time_dim. time_key ) between '2013-02-01' and '2013-02-05' ) and " - + "sales_fact___fact.location_key in ( select location_dim .location_key from location_dim where " - + "(( location_dim. location_name ) = 'test123' ) ) " - + "group by sales_fact___fact.time_key,sales_fact___fact.location_key) " - + "sales_fact___fact inner join time_dim time_dim___time_dim on (( sales_fact___fact . time_key ) = " - + "( time_dim___time_dim . time_key )) inner join location_dim location_dim___location_dim " - + "on ((( sales_fact___fact . " - + "location_key ) = ( location_dim___location_dim . location_key )) " - + "and (( location_dim___location_dim . location_name ) = " - + "'test123' )) where ( time_dim___time_dim . time_key ) between '2013-02-01' and '2013-02-05' group by " - + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) " - + "order by dollars_sold asc union all " - + "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " - + "( time_dim___time_dim . day ), case when (sum(sum_sales_fact___fact_dollars_sold) = 0 ) then 0.0 else " - + "sum(sum_sales_fact___fact_dollars_sold) end dollars_sold from (select sales_fact___fact.time_key," - + "sales_fact___fact.location_key,sum(( sales_fact___fact . dollars_sold )) " - + "as sum_sales_fact___fact_dollars_sold " - + "from sales_fact sales_fact___fact where sales_fact___fact.time_key in " - + "( select time_dim .time_key from time_dim " - + "where ( time_dim. time_key ) between '2013-03-01' and '2013-03-05' ) " - + "and sales_fact___fact.location_key in " - + "( select location_dim .location_key from location_dim where " - + "(( location_dim. location_name ) = 'test123' ) ) " - + " group by sales_fact___fact.time_key,sales_fact___fact.location_key) sales_fact___fact " - + "inner join time_dim time_dim___time_dim " - + "on (( sales_fact___fact . time_key ) = ( time_dim___time_dim . time_key )) " - + "inner join location_dim location_dim___location_dim on ((( sales_fact___fact . location_key ) = " - + "( location_dim___location_dim . location_key )) " - + "and (( location_dim___location_dim . location_name ) = 'test123' )) " - + "where ( time_dim___time_dim . time_key ) between '2013-03-01' and '2013-03-05' " - + "group by ( sales_fact___fact . time_key ), " - + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) order by dollars_sold asc"; + + "( time_dim___time_dim . day ), case when (sum(( sales_fact___fact . dollars_sold )) = 0 ) " + + "then 0.0 else sum(( sales_fact___fact . dollars_sold )) end dollars_sold from sales_fact s" + + "ales_fact___fact inner join time_dim time_dim___time_dim on (( sales_fact___fact . time_key ) " + + "= ( time_dim___time_dim . time_key )) inner join location_dim location_dim___location_dim on " + + "((( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) and " + + "(( location_dim___location_dim . location_name ) = 'test123' )) where ( time_dim___time_dim . time_key ) " + + "between '2013-01-01' and '2013-01-05' group by ( sales_fact___fact . time_key ), " + + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) order by dollars_sold " + + "asc union all select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ), case when (sum(( sales_fact___fact . dollars_sold )) = 0 ) then 0.0 " + + "else sum(( sales_fact___fact . dollars_sold )) end dollars_sold from sales_fact sales_fact___fact " + + "inner join time_dim time_dim___time_dim on (( sales_fact___fact . time_key ) = " + + "( time_dim___time_dim . time_key )) inner join location_dim location_dim___location_dim on " + + "((( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) and " + + "(( location_dim___location_dim . location_name ) = 'test123' )) where ( time_dim___time_dim . time_key ) " + + "between '2013-02-01' and '2013-02-05' group by ( sales_fact___fact . time_key ), " + + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) order by dollars_sold asc union all " + + "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ), case when (sum(( sales_fact___fact . dollars_sold )) = 0 ) then 0.0 " + + "else sum(( sales_fact___fact . dollars_sold )) end dollars_sold from sales_fact sales_fact___fact " + + "inner join time_dim time_dim___time_dim on (( sales_fact___fact . time_key ) = " + + "( time_dim___time_dim . time_key )) inner join location_dim location_dim___location_dim on " + + "((( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) and " + + "(( location_dim___location_dim . location_name ) = 'test123' )) where " + + "( time_dim___time_dim . time_key ) between '2013-03-01' and '2013-03-05' " + + " group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ) order by dollars_sold asc"; compareQueries(expected, actual); } @@ -595,6 +552,41 @@ public class TestColumnarSQLRewriter { } @Test + public void testSkipExpression() throws ParseException, SemanticException, LensException { + + String query = "select fact.time_key,time_dim.day_of_week,time_dim.day,item_dim.item_key, " + + "sum(case when fact.dollars_sold = 0 then 0.0 else fact.dollars_sold end) dollars_sold, " + + "round(sum(fact.units_sold),2),avg(fact.dollars_sold),min(fact.dollars_sold),max(fact.dollars_sold), " + + "location_name from sales_fact fact " + "inner join time_dim time_dim on fact.time_key = time_dim.time_key " + + "inner join location_dim location_dim on fact.location_key = location_dim.location_key " + + "inner join item_dim item_dim on fact.item_key = item_dim.item_key " + + "and location_dim.location_name = 'test123' " + + "where time_dim.time_key between '2013-01-01' and '2013-01-31' " + "and item_dim.item_name = 'item_1' " + + "group by fact.time_key,time_dim.day_of_week,time_dim.day,item_dim.item_key " + "order by dollars_sold desc "; + + SessionState.start(hconf); + + String actual = qtest.rewrite(query, conf, hconf); + String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), sum( case when " + + "(( sales_fact___fact . dollars_sold ) = 0 ) then 0.0 else ( sales_fact___fact . dollars_sold ) end ) " + + "dollars_sold , round(sum(( sales_fact___fact . units_sold )), 2 ), " + + "avg(( sales_fact___fact . dollars_sold )), " + + "min(( sales_fact___fact . dollars_sold )), max(( sales_fact___fact . dollars_sold )), location_name , " + + " from sales_fact sales_fact___fact inner join time_dim time_dim___time_dim " + + "on (( sales_fact___fact . time_key ) = ( time_dim___time_dim . time_key )) inner join location_dim " + + "location_dim___location_dim on (( sales_fact___fact . location_key ) = " + + "( location_dim___location_dim . location_key )) inner join item_dim item_dim___item_dim on " + + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) and " + + "(( location_dim___location_dim . location_name ) = 'test123' )) where (( time_dim___time_dim . time_key ) " + + "between '2013-01-01' and '2013-01-31' and (( item_dim___item_dim . item_name ) = 'item_1' )) " + + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) order by dollars_sold desc"; + compareQueries(expected, actual); + + } + + @Test public void testFilter() throws ParseException, SemanticException, LensException { String query = "select max(fact.dollars_sold) from sales_fact fact " @@ -659,35 +651,26 @@ public class TestColumnarSQLRewriter { public void testReplaceAlias() throws ParseException, SemanticException, LensException { String query = "select fact.time_key,time_dim.day_of_week,time_dim.day," - + "case when sum(fact.dollars_sold) = 0 then 0.0 else sum(fact.dollars_sold) end dollars_sold " - + "from db.sales_fact as fact " + "inner join time_dim as time_dim on fact.time_key = time_dim.time_key " - + "inner join db.location_dim ld on fact.location_key = ld.location_key " + "and ld.location_name = 'test123' " - + "where time_dim.time_key between '2013-01-01' and '2013-01-31' " - + "group by fact.time_key,time_dim.day_of_week,time_dim.day " + "order by dollars_sold desc "; + + "case when sum(fact.dollars_sold) = 0 then 0.0 else sum(fact.dollars_sold) end dollars_sold " + + "from db.sales_fact as fact " + "inner join time_dim as time_dim on fact.time_key = time_dim.time_key " + + "inner join db.location_dim ld on fact.location_key = ld.location_key " + "and ld.location_name = 'test123' " + + "where time_dim.time_key between '2013-01-01' and '2013-01-31' " + + "group by fact.time_key,time_dim.day_of_week,time_dim.day " + "order by dollars_sold desc "; SessionState.start(hconf); String actual = qtest.rewrite(query, conf, hconf); String expected = "select ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), " - + "( time_dim___time_dim . day ), case when (sum(sum_sales_fact__db_sales_fact_fact_dollars_sold) = 0 ) " - + "then 0.0 else sum(sum_sales_fact__db_sales_fact_fact_dollars_sold) end dollars_sold from " - + "(select sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.time_key," - + "sum(( sales_fact__db_sales_fact_fact . dollars_sold )) as sum_sales_fact__db_sales_fact_fact_dollars_sold " - + "from db.sales_fact sales_fact__db_sales_fact_fact where sales_fact__db_sales_fact_fact.time_key in " - + "( select time_dim .time_key from time_dim " - + "where ( time_dim. time_key ) between '2013-01-01' and '2013-01-31' ) " - + "and sales_fact__db_sales_fact_fact.location_key in ( select location_dim .location_key from " - + "location_dim where (( location_dim. location_name ) = 'test123' ) ) group by " - + "sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.time_key) " - + "sales_fact__db_sales_fact_fact inner join time_dim time_dim___time_dim on " - + "(( sales_fact__db_sales_fact_fact . time_key ) = ( time_dim___time_dim . time_key )) " - + "inner join db.location_dim location_dim__db_location_dim_ld " - + "on ((( sales_fact__db_sales_fact_fact . location_key ) " - + "= ( location_dim__db_location_dim_ld . location_key )) and " - + "(( location_dim__db_location_dim_ld . location_name ) = 'test123' )) " - + " where ( time_dim___time_dim . time_key ) " - + "between '2013-01-01' and '2013-01-31' group by ( sales_fact__db_sales_fact_fact . time_key ), " - + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) order by dollars_sold desc"; + + "( time_dim___time_dim . day ), case when (sum(( sales_fact__db_sales_fact_fact . dollars_sold )) = 0 ) " + + "then 0.0 else sum(( sales_fact__db_sales_fact_fact . dollars_sold )) end dollars_sold " + + "from db.sales_fact sales_fact__db_sales_fact_fact inner join time_dim time_dim___time_dim " + + "on (( sales_fact__db_sales_fact_fact . time_key ) = ( time_dim___time_dim . time_key )) " + + "inner join db.location_dim location_dim__db_location_dim_ld on " + + "((( sales_fact__db_sales_fact_fact . location_key ) = ( location_dim__db_location_dim_ld . location_key )) " + + "and (( location_dim__db_location_dim_ld . location_name ) = 'test123' )) where " + + "( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' " + + "group by ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ) order by dollars_sold desc"; compareQueries(expected, actual); }
