Repository: lens
Updated Branches:
  refs/heads/master 3cd03a37a -> 1f885cd3d


LENS-835 : Optimizing fact subquery by removing non aggregated expression 
columns from group by clause


Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/1f885cd3
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/1f885cd3
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/1f885cd3

Branch: refs/heads/master
Commit: 1f885cd3d74310ea1cb4e2049692d53d49e5e7cc
Parents: 3cd03a3
Author: Sushil Mohanty <sushilmoha...@apache.org>
Authored: Sun Oct 25 11:27:20 2015 +0530
Committer: Amareshwari Sriramadasu <amareshw...@apache.org>
Committed: Sun Oct 25 11:27:20 2015 +0530

----------------------------------------------------------------------
 .../lens/driver/jdbc/ColumnarSQLRewriter.java   |  18 ++-
 .../driver/jdbc/TestColumnarSQLRewriter.java    | 149 +++++++++----------
 2 files changed, 85 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git 
a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
 
b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
index 2d81ff2..3fdb40e 100644
--- 
a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
+++ 
b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
@@ -533,15 +533,19 @@ public class ColumnarSQLRewriter implements QueryRewriter 
{
       log.debug("AST is null ");
       return;
     }
-    if (node.getToken().getType() == HiveParser.DOT
-      && node.getParent().getChild(0).getType() != HiveParser.Identifier) {
-      String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, 
Identifier).toString();
-      String column = node.getChild(1).toString().toLowerCase();
+    if (HQLParser.isAggregateAST(node)) {
+      return;
+    } else {
+      if (node.getToken().getType() == HiveParser.DOT
+              && node.getParent().getChild(0).getType() != 
HiveParser.Identifier) {
+        String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, 
Identifier).toString();
+        String column = node.getChild(1).toString().toLowerCase();
 
-      String factAlias = getFactAlias();
+        String factAlias = getFactAlias();
 
-      if (table.equals(factAlias)) {
-        factKeys.add(factAlias + "." + column);
+        if (table.equals(factAlias)) {
+          factKeys.add(factAlias + "." + column);
+        }
       }
     }
 

http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git 
a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
 
b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
index 77ab302..41c3c85 100644
--- 
a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
+++ 
b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
@@ -578,32 +578,31 @@ public class TestColumnarSQLRewriter {
             + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key 
), sum(alias1) dollars_sold , "
             + "round(sum(alias2),  2 ), avg(alias3), min(alias4), max(alias5), 
 location_name  "
             + "from  (select sales_fact___fact.time_key, 
sales_fact___fact.location_key, "
-            + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold,sum( 
case  "
-            + "when (( sales_fact___fact . dollars_sold ) =  0 ) then  0.0  
else "
-            + "( sales_fact___fact . dollars_sold ) end ) as alias1, "
+            + "sales_fact___fact.item_key,sum( case  when (( sales_fact___fact 
. dollars_sold ) =  0 ) "
+            + "then  0.0  else ( sales_fact___fact . dollars_sold ) end ) as 
alias1, "
             + "sum(( sales_fact___fact . units_sold )) as alias2, "
             + "avg(( sales_fact___fact . dollars_sold )) as alias3, "
-            + "min(( sales_fact___fact . dollars_sold )) as alias4, "
-            + "max(( sales_fact___fact . dollars_sold )) as alias5 "
-            + "from sales_fact sales_fact___fact where 
sales_fact___fact.time_key "
-            + "in  (  select time_dim .time_key from time_dim where "
-            + "( time_dim. time_key ) between  '2013-01-01'  and  '2013-01-31' 
 ) "
-            + "and sales_fact___fact.location_key in  (  select location_dim 
.location_key "
+            + "min(( sales_fact___fact . dollars_sold )) as alias4, max(( 
sales_fact___fact . dollars_sold )) "
+            + "as alias5 from sales_fact sales_fact___fact where 
sales_fact___fact.time_key in  "
+            + "(  select time_dim .time_key from time_dim where ( time_dim. 
time_key ) "
+            + "between  '2013-01-01'  and  '2013-01-31'  ) and "
+            + "sales_fact___fact.location_key in  (  select location_dim 
.location_key "
             + "from location_dim where (( location_dim. location_name ) =  
'test123' ) ) "
-            + "and sales_fact___fact.item_key in  (  select item_dim .item_key 
from item_dim "
-            + "where (( item_dim. item_name ) =  'item_1' ) )  group by 
sales_fact___fact.time_key, "
-            + "sales_fact___fact.location_key, sales_fact___fact.item_key, 
sales_fact___fact.dollars_sold) "
-            + "sales_fact___fact  inner join (select time_key,day_of_week,day 
from time_dim) "
-            + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
-            + "( time_dim___time_dim . time_key ))  inner join (select 
location_key,"
-            + "location_name from location_dim) location_dim___location_dim on 
"
-            + "(( sales_fact___fact . location_key ) = ( 
location_dim___location_dim . location_key ))  "
-            + "inner join (select item_key,item_name from item_dim) 
item_dim___item_dim on "
-            + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . 
item_key )) and "
-            + "(( location_dim___location_dim . location_name ) =  'test123' 
))  where "
-            + "(( time_dim___time_dim . time_key ) between  '2013-01-01'  and  
'2013-01-31'  "
-            + "and (( item_dim___item_dim . item_name ) =  'item_1' )) group 
by "
-            + "( sales_fact___fact . time_key ), ( time_dim___time_dim . 
day_of_week ), "
+            + "and sales_fact___fact.item_key in  (  select item_dim .item_key 
from "
+            + "item_dim where (( item_dim. item_name ) =  'item_1' ) )  "
+            + "group by sales_fact___fact.time_key, 
sales_fact___fact.location_key, "
+            + "sales_fact___fact.item_key) sales_fact___fact  inner join "
+            + "(select time_key,day_of_week,day from time_dim) 
time_dim___time_dim on "
+            + "(( sales_fact___fact . time_key ) = ( time_dim___time_dim . 
time_key ))  "
+            + "inner join (select location_key,location_name from 
location_dim) "
+            + "location_dim___location_dim on (( sales_fact___fact . 
location_key ) = "
+            + "( location_dim___location_dim . location_key ))  inner join "
+            + "(select item_key,item_name from item_dim) item_dim___item_dim 
on "
+            + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . 
item_key )) "
+            + "and (( location_dim___location_dim . location_name ) =  
'test123' ))  "
+            + "where (( time_dim___time_dim . time_key ) between  '2013-01-01' 
 and  '2013-01-31'  "
+            + "and (( item_dim___item_dim . item_name ) =  'item_1' )) "
+            + "group by ( sales_fact___fact . time_key ), ( 
time_dim___time_dim . day_of_week ), "
             + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key 
) "
             + "order by dollars_sold  desc";
     compareQueries(expected, actual);
@@ -630,34 +629,35 @@ public class TestColumnarSQLRewriter {
 
     String actual = qtest.rewrite(query, conf, hconf);
     String expected = "select ( sales_fact___fact . time_key ), ( 
time_dim___time_dim . day_of_week ), "
-        + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), 
sum(alias1) dollars_sold , "
-        + "round(sum(alias2),  2 ), avg(alias6) avg_dollars_sold, min(alias4), 
max(alias5) max_dollars_sold,  "
-        + "location_name , (avg(alias6) /  1.0 ) "
-        + "from  (select sales_fact___fact.time_key, 
sales_fact___fact.location_key, sales_fact___fact.item_key,"
-        + "sales_fact___fact.dollars_sold, sum( case  when (( 
sales_fact___fact . dollars_sold ) =  0 ) then 0.0  end )"
-        + "as alias1, sum(( sales_fact___fact . units_sold )) as alias2, avg(( 
sales_fact___fact . dollars_sold )) "
-        + "as alias3, min(( sales_fact___fact . dollars_sold )) as alias4, "
-        + "max(( sales_fact___fact . dollars_sold )) as alias5, "
-        + "avg(( sales_fact___fact . dollars_sold )) as alias6 from sales_fact 
sales_fact___fact "
-        + "where sales_fact___fact.time_key in  (  select time_dim .time_key 
from time_dim "
-        + "where ( time_dim. time_key ) between  '2013-01-01'  and  
'2013-01-31'  ) "
-        + "and sales_fact___fact.location_key in  (  select location_dim 
.location_key from "
-        + "location_dim where (( location_dim. location_name ) =  'test123' ) 
) and sales_fact___fact.item_key in  "
-        + "(  select item_dim .item_key from item_dim where (( item_dim. 
item_name ) =  'item_1' ) )  "
-        + "group by sales_fact___fact.time_key, 
sales_fact___fact.location_key, "
-        + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold) 
sales_fact___fact  "
-        + "inner join (select time_key, day_of_week, day from time_dim) "
-        + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
-        + "( time_dim___time_dim . time_key ))  inner join (select 
location_key, "
-        + "location_name from location_dim) location_dim___location_dim "
-        + "on (( sales_fact___fact . location_key ) = ( 
location_dim___location_dim . location_key ))  "
-        + "inner join (select item_key, item_name from item_dim) 
item_dim___item_dim "
-        + "on ((( sales_fact___fact . item_key ) = "
-        + "( item_dim___item_dim . item_key )) and (( 
location_dim___location_dim . location_name ) =  'test123' ))  "
-        + "where (( time_dim___time_dim . time_key ) between  '2013-01-01'  
and  '2013-01-31'  "
-        + "and (( item_dim___item_dim . item_name ) =  'item_1' )) group by ( 
sales_fact___fact . time_key ), "
-        + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day 
), "
-        + "( item_dim___item_dim . item_key ) order by dollars_sold  desc";
+            + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key 
), "
+            + "sum(alias1) dollars_sold , round(sum(alias2),  2 ), avg(alias6) 
avg_dollars_sold , min(alias4), "
+            + "max(alias5) max_dollars_sold ,  location_name , (avg(alias6) /  
1.0 ) from  "
+            + "(select sales_fact___fact.time_key, 
sales_fact___fact.location_key, "
+            + "sales_fact___fact.item_key,sum( case  when (( sales_fact___fact 
. dollars_sold ) =  0 ) "
+            + "then  0.0  end ) as alias1, sum(( sales_fact___fact . 
units_sold )) as alias2, "
+            + "avg(( sales_fact___fact . dollars_sold )) as alias3, min(( 
sales_fact___fact . dollars_sold )) "
+            + "as alias4, max(( sales_fact___fact . dollars_sold )) as alias5, 
"
+            + "avg(( sales_fact___fact . dollars_sold )) as alias6 "
+            + "from sales_fact sales_fact___fact where 
sales_fact___fact.time_key in  "
+            + "(  select time_dim .time_key from time_dim where ( time_dim. 
time_key ) "
+            + "between  '2013-01-01'  and  '2013-01-31'  ) and 
sales_fact___fact.location_key in  "
+            + "(  select location_dim .location_key from location_dim where "
+            + "(( location_dim. location_name ) =  'test123' ) ) and 
sales_fact___fact.item_key in  "
+            + "(  select item_dim .item_key from item_dim where (( item_dim. 
item_name ) =  'item_1' ) )  "
+            + "group by sales_fact___fact.time_key, 
sales_fact___fact.location_key, sales_fact___fact.item_key) "
+            + "sales_fact___fact  inner join (select time_key,day_of_week,day 
from time_dim) "
+            + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
+            + "( time_dim___time_dim . time_key ))  inner join (select 
location_key,location_name "
+            + "from location_dim) location_dim___location_dim on (( 
sales_fact___fact . location_key ) "
+            + "= ( location_dim___location_dim . location_key ))  inner join "
+            + "(select item_key,item_name from item_dim) item_dim___item_dim 
on "
+            + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . 
item_key )) "
+            + "and (( location_dim___location_dim . location_name ) =  
'test123' ))  "
+            + "where (( time_dim___time_dim . time_key ) between  '2013-01-01' 
 and  '2013-01-31'  "
+            + "and (( item_dim___item_dim . item_name ) =  'item_1' )) group 
by "
+            + "( sales_fact___fact . time_key ), ( time_dim___time_dim . 
day_of_week ),"
+            + " ( time_dim___time_dim . day ), ( item_dim___item_dim . 
item_key ) "
+            + "order by dollars_sold  desc";
     compareQueries(expected, actual);
 
   }
@@ -951,33 +951,32 @@ public class TestColumnarSQLRewriter {
     String actual = qtest.rewrite(query, conf, hconf);
     String expected = "select ( sales_fact__db_sales_fact_fact . time_key ), ( 
time_dim___time_dim . day_of_week ), "
             + "( time_dim___time_dim . day ), ((sum(alias1) +  0 ) + 
(sum(alias2) +  0 )) expr1 , "
-            + "((sum(alias3) *  1000 ) / sum(alias4)) expr2  from  (select 
sales_fact__db_sales_fact_fact.time_key, "
-            + "sales_fact__db_sales_fact_fact.location_key, 
sales_fact__db_sales_fact_fact.status_id, "
-            + "sales_fact__db_sales_fact_fact.item_sold, 
sales_fact__db_sales_fact_fact.dollars_sold,"
-            + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, 
sum(( sales_fact__db_sales_fact_fact "
-            + ". product_count )) as alias2, sum( case  when ( 
sales_fact__db_sales_fact_fact . status_id ) "
-            + "is null  then  0  when (( sales_fact__db_sales_fact_fact . 
status_id ) =  1 ) then "
-            + "( sales_fact__db_sales_fact_fact . item_sold ) else  0  end ) 
as alias3, sum( case  "
-            + "when ( sales_fact__db_sales_fact_fact . status_id ) is null  
then  0  when "
-            + "(( sales_fact__db_sales_fact_fact . status_id ) =  2 ) then "
-            + "( sales_fact__db_sales_fact_fact . dollars_sold ) else  0  end 
) as alias4 "
-            + "from db.sales_fact sales_fact__db_sales_fact_fact where 
sales_fact__db_sales_fact_fact.time_key in "
-            + " (  select time_dim .time_key from time_dim where ( time_dim. 
time_key ) "
-            + "between  '2013-01-01'  and  '2013-01-31'  ) and 
sales_fact__db_sales_fact_fact.location_key in "
-            + " (  select location_dim .location_key from location_dim where 
(( location_dim. location_name ) "
-            + "=  'test123' ) )  group by 
sales_fact__db_sales_fact_fact.time_key, "
-            + "sales_fact__db_sales_fact_fact.location_key, 
sales_fact__db_sales_fact_fact.status_id, "
-            + "sales_fact__db_sales_fact_fact.item_sold, 
sales_fact__db_sales_fact_fact.dollars_sold) "
+            + "((sum(alias3) *  1000 ) / sum(alias4)) expr2  from  "
+            + "(select sales_fact__db_sales_fact_fact.time_key, 
sales_fact__db_sales_fact_fact.location_key,"
+            + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, 
"
+            + "sum(( sales_fact__db_sales_fact_fact . product_count )) as 
alias2, "
+            + "sum( case  when ( sales_fact__db_sales_fact_fact . status_id ) 
is null  then  0  "
+            + "when (( sales_fact__db_sales_fact_fact . status_id ) =  1 ) 
then "
+            + "( sales_fact__db_sales_fact_fact . item_sold ) else  0  end ) 
as alias3, "
+            + "sum( case  when ( sales_fact__db_sales_fact_fact . status_id ) 
is null  "
+            + "then  0  when (( sales_fact__db_sales_fact_fact . status_id ) = 
 2 ) "
+            + "then ( sales_fact__db_sales_fact_fact . dollars_sold ) else  0  
end ) as "
+            + "alias4 from db.sales_fact sales_fact__db_sales_fact_fact where "
+            + "sales_fact__db_sales_fact_fact.time_key in  (  select time_dim 
.time_key from time_dim "
+            + "where ( time_dim. time_key ) between  '2013-01-01'  and  
'2013-01-31'  ) "
+            + "and sales_fact__db_sales_fact_fact.location_key in  (  select 
location_dim .location_key "
+            + "from location_dim where (( location_dim. location_name ) =  
'test123' ) )  "
+            + "group by sales_fact__db_sales_fact_fact.time_key, 
sales_fact__db_sales_fact_fact.location_key) "
             + "sales_fact__db_sales_fact_fact  inner join (select 
time_key,day_of_week,day from time_dim) "
             + "time_dim___time_dim on (( sales_fact__db_sales_fact_fact . 
time_key ) = "
-            + "( time_dim___time_dim . time_key ))  inner join (select 
location_key,location_name "
-            + "from db.location_dim) location_dim__db_location_dim_ld on ((( 
sales_fact__db_sales_fact_fact . "
-            + "location_key ) = ( location_dim__db_location_dim_ld . 
location_key )) and "
+            + "( time_dim___time_dim . time_key ))  inner join (select 
location_key,"
+            + "location_name from db.location_dim) 
location_dim__db_location_dim_ld on "
+            + "((( sales_fact__db_sales_fact_fact . location_key ) = "
+            + "( location_dim__db_location_dim_ld . location_key )) and "
             + "(( location_dim__db_location_dim_ld . location_name ) =  
'test123' ))  "
-            + "where ( time_dim___time_dim . time_key ) between  '2013-01-01'  
and  "
-            + "'2013-01-31'  group by ( sales_fact__db_sales_fact_fact . 
time_key ), "
-            + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . 
day ) "
-            + "order by dollars_sold  desc";
+            + "where ( time_dim___time_dim . time_key ) between  '2013-01-01'  
and  '2013-01-31'  "
+            + "group by ( sales_fact__db_sales_fact_fact . time_key ), ( 
time_dim___time_dim . day_of_week ), "
+            + "( time_dim___time_dim . day ) order by dollars_sold  desc";
 
     compareQueries(expected, actual);
   }

Reply via email to