Repository: lens Updated Branches: refs/heads/master 3cd03a37a -> 1f885cd3d
LENS-835 : Optimizing fact subquery by removing non aggregated expression columns from group by clause Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/1f885cd3 Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/1f885cd3 Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/1f885cd3 Branch: refs/heads/master Commit: 1f885cd3d74310ea1cb4e2049692d53d49e5e7cc Parents: 3cd03a3 Author: Sushil Mohanty <sushilmoha...@apache.org> Authored: Sun Oct 25 11:27:20 2015 +0530 Committer: Amareshwari Sriramadasu <amareshw...@apache.org> Committed: Sun Oct 25 11:27:20 2015 +0530 ---------------------------------------------------------------------- .../lens/driver/jdbc/ColumnarSQLRewriter.java | 18 ++- .../driver/jdbc/TestColumnarSQLRewriter.java | 149 +++++++++---------- 2 files changed, 85 insertions(+), 82 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java index 2d81ff2..3fdb40e 100644 --- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java +++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java @@ -533,15 +533,19 @@ public class ColumnarSQLRewriter implements QueryRewriter { log.debug("AST is null "); return; } - if (node.getToken().getType() == HiveParser.DOT - && node.getParent().getChild(0).getType() != HiveParser.Identifier) { - String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString(); - String column = node.getChild(1).toString().toLowerCase(); + if (HQLParser.isAggregateAST(node)) { + return; + } else { + if (node.getToken().getType() == HiveParser.DOT + && node.getParent().getChild(0).getType() != HiveParser.Identifier) { + String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString(); + String column = node.getChild(1).toString().toLowerCase(); - String factAlias = getFactAlias(); + String factAlias = getFactAlias(); - if (table.equals(factAlias)) { - factKeys.add(factAlias + "." + column); + if (table.equals(factAlias)) { + factKeys.add(factAlias + "." + column); + } } } http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java ---------------------------------------------------------------------- diff --git a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java index 77ab302..41c3c85 100644 --- a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java +++ b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java @@ -578,32 +578,31 @@ public class TestColumnarSQLRewriter { + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), sum(alias1) dollars_sold , " + "round(sum(alias2), 2 ), avg(alias3), min(alias4), max(alias5), location_name " + "from (select sales_fact___fact.time_key, sales_fact___fact.location_key, " - + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold,sum( case " - + "when (( sales_fact___fact . dollars_sold ) = 0 ) then 0.0 else " - + "( sales_fact___fact . dollars_sold ) end ) as alias1, " + + "sales_fact___fact.item_key,sum( case when (( sales_fact___fact . dollars_sold ) = 0 ) " + + "then 0.0 else ( sales_fact___fact . dollars_sold ) end ) as alias1, " + "sum(( sales_fact___fact . units_sold )) as alias2, " + "avg(( sales_fact___fact . dollars_sold )) as alias3, " - + "min(( sales_fact___fact . dollars_sold )) as alias4, " - + "max(( sales_fact___fact . dollars_sold )) as alias5 " - + "from sales_fact sales_fact___fact where sales_fact___fact.time_key " - + "in ( select time_dim .time_key from time_dim where " - + "( time_dim. time_key ) between '2013-01-01' and '2013-01-31' ) " - + "and sales_fact___fact.location_key in ( select location_dim .location_key " + + "min(( sales_fact___fact . dollars_sold )) as alias4, max(( sales_fact___fact . dollars_sold )) " + + "as alias5 from sales_fact sales_fact___fact where sales_fact___fact.time_key in " + + "( select time_dim .time_key from time_dim where ( time_dim. time_key ) " + + "between '2013-01-01' and '2013-01-31' ) and " + + "sales_fact___fact.location_key in ( select location_dim .location_key " + "from location_dim where (( location_dim. location_name ) = 'test123' ) ) " - + "and sales_fact___fact.item_key in ( select item_dim .item_key from item_dim " - + "where (( item_dim. item_name ) = 'item_1' ) ) group by sales_fact___fact.time_key, " - + "sales_fact___fact.location_key, sales_fact___fact.item_key, sales_fact___fact.dollars_sold) " - + "sales_fact___fact inner join (select time_key,day_of_week,day from time_dim) " - + "time_dim___time_dim on (( sales_fact___fact . time_key ) = " - + "( time_dim___time_dim . time_key )) inner join (select location_key," - + "location_name from location_dim) location_dim___location_dim on " - + "(( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) " - + "inner join (select item_key,item_name from item_dim) item_dim___item_dim on " - + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) and " - + "(( location_dim___location_dim . location_name ) = 'test123' )) where " - + "(( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' " - + "and (( item_dim___item_dim . item_name ) = 'item_1' )) group by " - + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "and sales_fact___fact.item_key in ( select item_dim .item_key from " + + "item_dim where (( item_dim. item_name ) = 'item_1' ) ) " + + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, " + + "sales_fact___fact.item_key) sales_fact___fact inner join " + + "(select time_key,day_of_week,day from time_dim) time_dim___time_dim on " + + "(( sales_fact___fact . time_key ) = ( time_dim___time_dim . time_key )) " + + "inner join (select location_key,location_name from location_dim) " + + "location_dim___location_dim on (( sales_fact___fact . location_key ) = " + + "( location_dim___location_dim . location_key )) inner join " + + "(select item_key,item_name from item_dim) item_dim___item_dim on " + + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) " + + "and (( location_dim___location_dim . location_name ) = 'test123' )) " + + "where (( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' " + + "and (( item_dim___item_dim . item_name ) = 'item_1' )) " + + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) " + "order by dollars_sold desc"; compareQueries(expected, actual); @@ -630,34 +629,35 @@ public class TestColumnarSQLRewriter { String actual = qtest.rewrite(query, conf, hconf); String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), " - + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), sum(alias1) dollars_sold , " - + "round(sum(alias2), 2 ), avg(alias6) avg_dollars_sold, min(alias4), max(alias5) max_dollars_sold, " - + "location_name , (avg(alias6) / 1.0 ) " - + "from (select sales_fact___fact.time_key, sales_fact___fact.location_key, sales_fact___fact.item_key," - + "sales_fact___fact.dollars_sold, sum( case when (( sales_fact___fact . dollars_sold ) = 0 ) then 0.0 end )" - + "as alias1, sum(( sales_fact___fact . units_sold )) as alias2, avg(( sales_fact___fact . dollars_sold )) " - + "as alias3, min(( sales_fact___fact . dollars_sold )) as alias4, " - + "max(( sales_fact___fact . dollars_sold )) as alias5, " - + "avg(( sales_fact___fact . dollars_sold )) as alias6 from sales_fact sales_fact___fact " - + "where sales_fact___fact.time_key in ( select time_dim .time_key from time_dim " - + "where ( time_dim. time_key ) between '2013-01-01' and '2013-01-31' ) " - + "and sales_fact___fact.location_key in ( select location_dim .location_key from " - + "location_dim where (( location_dim. location_name ) = 'test123' ) ) and sales_fact___fact.item_key in " - + "( select item_dim .item_key from item_dim where (( item_dim. item_name ) = 'item_1' ) ) " - + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, " - + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold) sales_fact___fact " - + "inner join (select time_key, day_of_week, day from time_dim) " - + "time_dim___time_dim on (( sales_fact___fact . time_key ) = " - + "( time_dim___time_dim . time_key )) inner join (select location_key, " - + "location_name from location_dim) location_dim___location_dim " - + "on (( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) " - + "inner join (select item_key, item_name from item_dim) item_dim___item_dim " - + "on ((( sales_fact___fact . item_key ) = " - + "( item_dim___item_dim . item_key )) and (( location_dim___location_dim . location_name ) = 'test123' )) " - + "where (( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' " - + "and (( item_dim___item_dim . item_name ) = 'item_1' )) group by ( sales_fact___fact . time_key ), " - + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ), " - + "( item_dim___item_dim . item_key ) order by dollars_sold desc"; + + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), " + + "sum(alias1) dollars_sold , round(sum(alias2), 2 ), avg(alias6) avg_dollars_sold , min(alias4), " + + "max(alias5) max_dollars_sold , location_name , (avg(alias6) / 1.0 ) from " + + "(select sales_fact___fact.time_key, sales_fact___fact.location_key, " + + "sales_fact___fact.item_key,sum( case when (( sales_fact___fact . dollars_sold ) = 0 ) " + + "then 0.0 end ) as alias1, sum(( sales_fact___fact . units_sold )) as alias2, " + + "avg(( sales_fact___fact . dollars_sold )) as alias3, min(( sales_fact___fact . dollars_sold )) " + + "as alias4, max(( sales_fact___fact . dollars_sold )) as alias5, " + + "avg(( sales_fact___fact . dollars_sold )) as alias6 " + + "from sales_fact sales_fact___fact where sales_fact___fact.time_key in " + + "( select time_dim .time_key from time_dim where ( time_dim. time_key ) " + + "between '2013-01-01' and '2013-01-31' ) and sales_fact___fact.location_key in " + + "( select location_dim .location_key from location_dim where " + + "(( location_dim. location_name ) = 'test123' ) ) and sales_fact___fact.item_key in " + + "( select item_dim .item_key from item_dim where (( item_dim. item_name ) = 'item_1' ) ) " + + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, sales_fact___fact.item_key) " + + "sales_fact___fact inner join (select time_key,day_of_week,day from time_dim) " + + "time_dim___time_dim on (( sales_fact___fact . time_key ) = " + + "( time_dim___time_dim . time_key )) inner join (select location_key,location_name " + + "from location_dim) location_dim___location_dim on (( sales_fact___fact . location_key ) " + + "= ( location_dim___location_dim . location_key )) inner join " + + "(select item_key,item_name from item_dim) item_dim___item_dim on " + + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) " + + "and (( location_dim___location_dim . location_name ) = 'test123' )) " + + "where (( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' " + + "and (( item_dim___item_dim . item_name ) = 'item_1' )) group by " + + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week )," + + " ( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) " + + "order by dollars_sold desc"; compareQueries(expected, actual); } @@ -951,33 +951,32 @@ public class TestColumnarSQLRewriter { String actual = qtest.rewrite(query, conf, hconf); String expected = "select ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), " + "( time_dim___time_dim . day ), ((sum(alias1) + 0 ) + (sum(alias2) + 0 )) expr1 , " - + "((sum(alias3) * 1000 ) / sum(alias4)) expr2 from (select sales_fact__db_sales_fact_fact.time_key, " - + "sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.status_id, " - + "sales_fact__db_sales_fact_fact.item_sold, sales_fact__db_sales_fact_fact.dollars_sold," - + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, sum(( sales_fact__db_sales_fact_fact " - + ". product_count )) as alias2, sum( case when ( sales_fact__db_sales_fact_fact . status_id ) " - + "is null then 0 when (( sales_fact__db_sales_fact_fact . status_id ) = 1 ) then " - + "( sales_fact__db_sales_fact_fact . item_sold ) else 0 end ) as alias3, sum( case " - + "when ( sales_fact__db_sales_fact_fact . status_id ) is null then 0 when " - + "(( sales_fact__db_sales_fact_fact . status_id ) = 2 ) then " - + "( sales_fact__db_sales_fact_fact . dollars_sold ) else 0 end ) as alias4 " - + "from db.sales_fact sales_fact__db_sales_fact_fact where sales_fact__db_sales_fact_fact.time_key in " - + " ( select time_dim .time_key from time_dim where ( time_dim. time_key ) " - + "between '2013-01-01' and '2013-01-31' ) and sales_fact__db_sales_fact_fact.location_key in " - + " ( select location_dim .location_key from location_dim where (( location_dim. location_name ) " - + "= 'test123' ) ) group by sales_fact__db_sales_fact_fact.time_key, " - + "sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.status_id, " - + "sales_fact__db_sales_fact_fact.item_sold, sales_fact__db_sales_fact_fact.dollars_sold) " + + "((sum(alias3) * 1000 ) / sum(alias4)) expr2 from " + + "(select sales_fact__db_sales_fact_fact.time_key, sales_fact__db_sales_fact_fact.location_key," + + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, " + + "sum(( sales_fact__db_sales_fact_fact . product_count )) as alias2, " + + "sum( case when ( sales_fact__db_sales_fact_fact . status_id ) is null then 0 " + + "when (( sales_fact__db_sales_fact_fact . status_id ) = 1 ) then " + + "( sales_fact__db_sales_fact_fact . item_sold ) else 0 end ) as alias3, " + + "sum( case when ( sales_fact__db_sales_fact_fact . status_id ) is null " + + "then 0 when (( sales_fact__db_sales_fact_fact . status_id ) = 2 ) " + + "then ( sales_fact__db_sales_fact_fact . dollars_sold ) else 0 end ) as " + + "alias4 from db.sales_fact sales_fact__db_sales_fact_fact where " + + "sales_fact__db_sales_fact_fact.time_key in ( select time_dim .time_key from time_dim " + + "where ( time_dim. time_key ) between '2013-01-01' and '2013-01-31' ) " + + "and sales_fact__db_sales_fact_fact.location_key in ( select location_dim .location_key " + + "from location_dim where (( location_dim. location_name ) = 'test123' ) ) " + + "group by sales_fact__db_sales_fact_fact.time_key, sales_fact__db_sales_fact_fact.location_key) " + "sales_fact__db_sales_fact_fact inner join (select time_key,day_of_week,day from time_dim) " + "time_dim___time_dim on (( sales_fact__db_sales_fact_fact . time_key ) = " - + "( time_dim___time_dim . time_key )) inner join (select location_key,location_name " - + "from db.location_dim) location_dim__db_location_dim_ld on ((( sales_fact__db_sales_fact_fact . " - + "location_key ) = ( location_dim__db_location_dim_ld . location_key )) and " + + "( time_dim___time_dim . time_key )) inner join (select location_key," + + "location_name from db.location_dim) location_dim__db_location_dim_ld on " + + "((( sales_fact__db_sales_fact_fact . location_key ) = " + + "( location_dim__db_location_dim_ld . location_key )) and " + "(( location_dim__db_location_dim_ld . location_name ) = 'test123' )) " - + "where ( time_dim___time_dim . time_key ) between '2013-01-01' and " - + "'2013-01-31' group by ( sales_fact__db_sales_fact_fact . time_key ), " - + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) " - + "order by dollars_sold desc"; + + "where ( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' " + + "group by ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), " + + "( time_dim___time_dim . day ) order by dollars_sold desc"; compareQueries(expected, actual); }