Repository: lens Updated Branches: refs/heads/master cb2529672 -> 89b17e640
LENS-1430 : Expressions are not getting resolved correctly in union query Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/89b17e64 Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/89b17e64 Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/89b17e64 Branch: refs/heads/master Commit: 89b17e640b47ff9e071f8df53c05ae0feaf25181 Parents: cb25296 Author: Sushil Mohanty <sushil.k.moha...@gmail.com> Authored: Tue Jun 6 16:42:10 2017 +0530 Committer: sushilmohanty <sushilmoha...@apache.org> Committed: Tue Jun 6 16:42:10 2017 +0530 ---------------------------------------------------------------------- .../lens/cube/parse/ExpressionResolver.java | 36 +++++++++++- .../cube/parse/TestUnionAndJoinCandidates.java | 60 +++++++++++++++++++- .../resources/schema/cubes/base/basecube.xml | 12 ++++ .../resources/schema/cubes/base/testcube.xml | 9 +++ .../cubes/derived/union_join_ctx_der1.xml | 2 + .../schema/facts/union_join_ctx_fact1.xml | 2 +- .../schema/facts/union_join_ctx_fact3.xml | 1 + .../schema/facts/union_join_ctx_fact4.xml | 59 +++++++++++++++++++ 8 files changed, 177 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java index f38aa54..ea6d5c7 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java @@ -285,6 +285,7 @@ class ExpressionResolver implements ContextRewriter { @Getter private Map<String, Set<ExpressionContext>> allExprsQueried = new HashMap<String, Set<ExpressionContext>>(); private Map<String, Set<PickedExpression>> pickedExpressions = new HashMap<String, Set<PickedExpression>>(); + private Map<String, ASTNode> nonPickedExpressionsForCandidate = new HashMap<String, ASTNode>(); private final CubeQueryContext cubeql; ExpressionResolverContext(CubeQueryContext cubeql) { @@ -407,6 +408,7 @@ class ExpressionResolver implements ContextRewriter { } pickedExpressions.clear(); + nonPickedExpressionsForCandidate.clear(); return exprDims; } @@ -426,7 +428,7 @@ class ExpressionResolver implements ContextRewriter { // Having ast is not copied now, it's maintained in cubeQueryContext, each fact processes that serially. if (queryAST.getHavingAST() != null) { replaceAST(cubeql, queryAST.getHavingAST()); - } else if (cubeql.getHavingAST() != null) { + } else if (cubeql.getHavingAST() != null && nonPickedExpressionsForCandidate.isEmpty()) { replaceAST(cubeql, cubeql.getHavingAST()); queryAST.setHavingAST(MetastoreUtil.copyAST(cubeql.getHavingAST())); } @@ -459,6 +461,9 @@ class ExpressionResolver implements ContextRewriter { if (expr != null) { node1.setChild(i, replaceAlias(expr.getRewrittenAST(), cubeql)); } + } else if (nonPickedExpressionsForCandidate.containsKey(column)) { + node1.setChild(i, nonPickedExpressionsForCandidate.get(column)); + } } } @@ -477,7 +482,7 @@ class ExpressionResolver implements ContextRewriter { return null; } - private void pickExpressionsForTable(CandidateTable cTable) { + private void pickExpressionsForTable(CandidateTable cTable) throws LensException { for (Map.Entry<String, Set<ExpressionContext>> ecEntry : allExprsQueried.entrySet()) { Set<ExpressionContext> ecSet = ecEntry.getValue(); for (ExpressionContext ec : ecSet) { @@ -488,6 +493,8 @@ class ExpressionResolver implements ContextRewriter { // pick first evaluable expression pickedExpressions.computeIfAbsent(ecEntry.getKey(), k -> new HashSet<>()) .add(new PickedExpression(ec.srcAlias, ec.evaluableExpressions.get(cTable).iterator().next())); + } else { + nonPickedExpressionsForCandidate.put(ecEntry.getKey(), getDefaultExpr(getExprAst(ec))); } } } @@ -495,6 +502,31 @@ class ExpressionResolver implements ContextRewriter { } } + private ASTNode getExprAst(ExpressionContext ec) { + Set<StorageCandidate> scSet = CandidateUtil.getStorageCandidates(cubeql.getCandidates()); + Set<String> storageTableNames = new HashSet<String>(); + for (StorageCandidate sc : scSet) { + storageTableNames.add(sc.getStorageTable()); + } + for (CandidateTable table : ec.evaluableExpressions.keySet()) { + if (storageTableNames.contains(table.getStorageTable())) { + return MetastoreUtil.copyAST(ec.evaluableExpressions.get(table).iterator().next().finalAST); + } + } + return null; + } + + private ASTNode getDefaultExpr(ASTNode node) { + if (HQLParser.isAggregateAST(node)) { + node.setChild(1, new ASTNode(new CommonToken(HiveParser.Identifier, "0.0"))); + } + for (int i = 0; i < node.getChildCount(); i++) { + ASTNode child = (ASTNode) node.getChild(i); + getDefaultExpr(child); + } + return node; + } + void pruneExpressions() { for (Set<ExpressionContext> ecSet : allExprsQueried.values()) { for (ExpressionContext ec : ecSet) { http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java index 9422a5c..429e1c6 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java @@ -49,10 +49,12 @@ public class TestUnionAndJoinCandidates extends TestQueryRewrite { getValidStorageTablesKey("union_join_ctx_fact1"), "C1_union_join_ctx_fact1", getValidStorageTablesKey("union_join_ctx_fact2"), "C1_union_join_ctx_fact2", getValidStorageTablesKey("union_join_ctx_fact3"), "C1_union_join_ctx_fact3", + getValidStorageTablesKey("union_join_ctx_fact4"), "C1_union_join_ctx_fact4", // Update periods getValidUpdatePeriodsKey("union_join_ctx_fact1", "C1"), "DAILY", getValidUpdatePeriodsKey("union_join_ctx_fact2", "C1"), "DAILY", - getValidUpdatePeriodsKey("union_join_ctx_fact3", "C1"), "DAILY"); + getValidUpdatePeriodsKey("union_join_ctx_fact3", "C1"), "DAILY", + getValidUpdatePeriodsKey("union_join_ctx_fact4", "C1"), "DAILY"); conf.setBoolean(DISABLE_AUTO_JOINS, false); conf.setBoolean(ENABLE_SELECT_TO_GROUPBY, true); conf.setBoolean(ENABLE_GROUP_BY_TO_SELECT, true); @@ -90,6 +92,62 @@ public class TestUnionAndJoinCandidates extends TestQueryRewrite { } @Test + public void testCustomExpressionForJoinCandidate() throws ParseException, LensException { + // Expr : (case when union_join_ctx_msr2_expr = 0 then 0 else + // union_join_ctx_msr4_expr * 100 / union_join_ctx_msr2_expr end) is completely answered by + // c1_union_join_ctx_fact4 and partly answered by c1_union_join_ctx_fact3 + String colsSelected = " union_join_ctx_notnullcityid, union_join_ctx_msr22 , " + + "case when union_join_ctx_msr2_expr = 0 then 0 else " + + " union_join_ctx_msr4_expr * 100 / union_join_ctx_msr2_expr end"; + String whereCond = "(" + TWO_MONTHS_RANGE_UPTO_DAYS + ")"; + String rewrittenQuery = rewrite("select " + colsSelected + " from basecube where " + whereCond, conf); + assertTrue(rewrittenQuery.contains("UNION ALL")); + String expectedInnerSelect1 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 " + + "else (basecube.union_join_ctx_cityid) end as `alias0`, 0.0 as `alias1`, " + + "sum((basecube.union_join_ctx_msr2)) as `alias2`, sum((basecube.union_join_ctx_msr4)) as `alias3` " + + "FROM TestQueryRewrite.c1_union_join_ctx_fact4 basecube"; + String expectedInnerSelect2 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 " + + "else (basecube.union_join_ctx_cityid) end as `alias0`, (basecube.union_join_ctx_msr22) as `alias1`, " + + "0.0 as `alias2`, 0.0 as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact3 basecube"; + String outerSelect = "SELECT (basecube.alias0) as `union_join_ctx_notnullcityid`, (basecube.alias1) " + + "as `union_join_ctx_msr22`, case when ((sum((basecube.alias2)) + 0) = 0) then 0 else " + + "(((sum((basecube.alias3)) + 0) * 100) / (sum((basecube.alias2)) + 0)) end as " + + "`case when (union_join_ctx_msr2_expr = 0) then 0 " + + "else ((union_join_ctx_msr4_expr * 100) / union_join_ctx_msr2_expr) end`"; + String outerGroupBy = "GROUP BY (basecube.alias0)"; + compareContains(expectedInnerSelect1, rewrittenQuery); + compareContains(expectedInnerSelect2, rewrittenQuery); + compareContains(outerSelect, rewrittenQuery); + compareContains(outerGroupBy, rewrittenQuery); + } + + @Test + public void testDuplicateMeasureProjectionInJoinCandidate() throws ParseException, LensException { + // union_join_ctx_msr2 is common between two storage candidates and it should be answered from one + // and the other fact will have it replaced with 0.0 + String colsSelected = " union_join_ctx_notnullcityid, sum(union_join_ctx_msr22) , " + + "sum(union_join_ctx_msr2), sum(union_join_ctx_msr4) "; + String whereCond = "(" + TWO_MONTHS_RANGE_UPTO_DAYS + ")"; + String rewrittenQuery = rewrite("select " + colsSelected + " from basecube where " + whereCond, conf); + assertTrue(rewrittenQuery.contains("UNION ALL")); + String expectedInnerSelect1 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 " + + "else (basecube.union_join_ctx_cityid) end as `alias0`, 0.0 as `alias1`, " + + "sum((basecube.union_join_ctx_msr2)) as `alias2`, sum((basecube.union_join_ctx_msr4)) " + + "as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact4 basecube"; + String expectedInnerSelect2 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 else " + + "(basecube.union_join_ctx_cityid) end as `alias0`, sum((basecube.union_join_ctx_msr22)) as `alias1`, " + + "0.0 as `alias2`, 0.0 as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact3 basecube"; + String outerSelect = "SELECT (basecube.alias0) as `union_join_ctx_notnullcityid`, sum((basecube.alias1)) " + + "as `sum(union_join_ctx_msr22)`, sum((basecube.alias2)) as `sum(union_join_ctx_msr2)`, " + + "sum((basecube.alias3)) as `sum(union_join_ctx_msr4)` FROM"; + String outerGroupBy = "GROUP BY (basecube.alias0)"; + compareContains(expectedInnerSelect1, rewrittenQuery); + compareContains(expectedInnerSelect2, rewrittenQuery); + compareContains(outerSelect, rewrittenQuery); + compareContains(outerGroupBy, rewrittenQuery); + } + + @Test(invocationCount = 100) public void testFinalCandidateRewrittenQuery() throws ParseException, LensException { try { // Query with non projected measure in having clause. http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/base/basecube.xml ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/resources/schema/cubes/base/basecube.xml b/lens-cube/src/test/resources/schema/cubes/base/basecube.xml index 55099c8..708b510 100644 --- a/lens-cube/src/test/resources/schema/cubes/base/basecube.xml +++ b/lens-cube/src/test/resources/schema/cubes/base/basecube.xml @@ -46,6 +46,10 @@ </measure> <measure _type="INT" name="union_join_ctx_msr2" description="union_join_ctx_second measure"> </measure> + <measure _type="INT" name="union_join_ctx_msr22" description="union_join_ctx_second measure"> + </measure> + <measure _type="INT" name="union_join_ctx_msr4" description="union_join_ctx_fourth measure"> + </measure> <measure _type="FLOAT" default_aggr="SUM" unit="RS" name="msr2" display_string="Measure2" description="second measure"> </measure> @@ -249,6 +253,14 @@ description="union_join_ctx_non zero msr2 sum"> <expr_spec expr="sum(case when union_join_ctx_msr2 > 0 then union_join_ctx_msr2 else 0 end)"/> </expression> + <expression _type="int" name="union_join_ctx_msr2_expr" display_string="union_join_ctx_msr2_expr" + description="union_join_ctx_msr2_expr"> + <expr_spec expr="sum(union_join_ctx_msr2) + 0"/> + </expression> + <expression _type="int" name="union_join_ctx_msr4_expr" display_string="union_join_ctx_msr4_expr" + description="union_join_ctx_msr4_expr"> + <expr_spec expr="sum(union_join_ctx_msr4) + 0"/> + </expression> <expression _type="double" name="flooredmsr12" display_string="Floored msr12" description="floored measure12"> <expr_spec expr="floor(msr12)"/> </expression> http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/base/testcube.xml ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/resources/schema/cubes/base/testcube.xml b/lens-cube/src/test/resources/schema/cubes/base/testcube.xml index ef13700..088c8de 100644 --- a/lens-cube/src/test/resources/schema/cubes/base/testcube.xml +++ b/lens-cube/src/test/resources/schema/cubes/base/testcube.xml @@ -39,6 +39,7 @@ description="fifteenth measure"/> <measure _type="INT" name="union_join_ctx_msr3" description="union_join_ctx_third measure"/> <measure _type="INT" name="union_join_ctx_msr2" description="union_join_ctx_second measure"/> + <measure _type="INT" name="union_join_ctx_msr4" description="union_join_ctx_fourth measure"/> <measure _type="FLOAT" default_aggr="SUM" unit="RS" name="msr2" display_string="Measure2" description="second measure"/> <measure _type="DOUBLE" default_aggr="MAX" name="msr3" display_string="Measure3" description="third measure"/> @@ -267,6 +268,14 @@ description="union_join_ctx_Not null cityid"> <expr_spec expr="case when union_join_ctx_cityid is null then 0 else union_join_ctx_cityid end"/> </expression> + <expression _type="int" name="union_join_ctx_msr2_expr" display_string="union_join_ctx_msr2_expr" + description="union_join_ctx_msr2_expr"> + <expr_spec expr="sum(union_join_ctx_msr2) + 0"/> + </expression> + <expression _type="int" name="union_join_ctx_msr4_expr" display_string="union_join_ctx_msr4_expr" + description="union_join_ctx_msr4_expr"> + <expr_spec expr="sum(union_join_ctx_msr4) + 0"/> + </expression> <expression _type="String" name="isindia" display_string="Is Indian City/state" description="is indian city/state"> <expr_spec expr="cubecity.name == 'DELHI' OR cubestate.name == 'KARNATAKA' OR cubestate.name == 'MAHARASHTRA'"/> </expression> http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml b/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml index 69eda6d..c23a029 100644 --- a/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml +++ b/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml @@ -35,6 +35,8 @@ <measure_name>union_join_ctx_msr2</measure_name> <measure_name>union_join_ctx_msr1</measure_name> <measure_name>union_join_ctx_msr3</measure_name> + <measure_name>union_join_ctx_msr4</measure_name> + <measure_name>union_join_ctx_msr22</measure_name> <measure_name>segmsr1</measure_name> </measure_names> <dim_attr_names> http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml index d5ac70a..b1f868b 100644 --- a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml +++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml @@ -21,7 +21,7 @@ --> <x_fact_table name="union_join_ctx_fact1" cube_name="baseCube" weight="5.0" xmlns="uri:lens:cube:0.1"> <columns> - <column name="union_join_ctx_msr1" _type="int" comment="first measure"/> + <column name="union_join_ctx_msr1" _type="int" comment="second measure"/> <column name="d_time" _type="timestamp" comment="event time"/> <column name="union_join_ctx_zipcode" _type="int" comment="zip"/> <column name="union_join_ctx_cityid" _type="int" comment="city id"/> http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml index 27e859e..b9557a7 100644 --- a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml +++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml @@ -22,6 +22,7 @@ <x_fact_table name="union_join_ctx_fact3" cube_name="baseCube" weight="5.0" xmlns="uri:lens:cube:0.1"> <columns> <column name="union_join_ctx_msr2" _type="int" comment="second measure"/> + <column name="union_join_ctx_msr22" _type="int" comment="second measure"/> <column name="d_time" _type="timestamp" comment="event time"/> <column name="union_join_ctx_zipcode" _type="int" comment="zip"/> <column name="union_join_ctx_cityid" _type="int" comment="city id"/> http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml new file mode 100644 index 0000000..2150304 --- /dev/null +++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml @@ -0,0 +1,59 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<x_fact_table name="union_join_ctx_fact4" cube_name="baseCube" weight="6.0" xmlns="uri:lens:cube:0.1"> + <columns> + <column name="union_join_ctx_msr4" _type="int" comment="fourth measure"/> + <column name="union_join_ctx_msr2" _type="int" comment="second measure"/> + <column name="d_time" _type="timestamp" comment="event time"/> + <column name="union_join_ctx_zipcode" _type="int" comment="zip"/> + <column name="union_join_ctx_cityid" _type="int" comment="city id"/> + </columns> + <properties> + <property name="cube.fact.union_join_ctx_fact4.cubename" value="baseCube"/> + <property name="cube.fact.absolute.start.time" value="$absolute{now.day - 90 days}"/> + <property name="cube.fact.union_join_ctx_fact4.c1.updateperiods" value="DAILY"/> + <property name="cube.fact.absolute.end.time" value="$absolute{now.day + 7 days}"/> + <property name="cube.fact.is.aggregated" value="false"/> + <property name="cube.fact.union_join_ctx_fact4.storages" value="C1"/> + <property name="cube.table.union_join_ctx_fact4.weight" value="5.0"/> + </properties> + <storage_tables> + <storage_table> + <update_periods> + <update_period>DAILY</update_period> + </update_periods> + <storage_name>C1</storage_name> + <table_desc external="false"> + <part_cols> + <column name="dt" _type="string" comment="date partition"/> + </part_cols> + <table_parameters> + <property name="cube.storagetable.time.partcols" value="dt"/> + </table_parameters> + <serde_parameters> + <property name="serialization.format" value="1"/> + </serde_parameters> + <time_part_cols>dt</time_part_cols> + </table_desc> + </storage_table> + </storage_tables> +</x_fact_table> \ No newline at end of file