LENS-611: Query does not fail for conflicting time dimension and measure (Himanshu)
Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/ede537e6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/ede537e6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/ede537e6 Branch: refs/heads/current-release-line Commit: ede537e6bf22176dd689d5d727605e8c6442dae5 Parents: c823ec1 Author: Himanshu Gahlaut <[email protected]> Authored: Wed Jun 17 14:45:47 2015 +0530 Committer: Himanshu Gahlaut <[email protected]> Committed: Wed Jun 17 14:45:47 2015 +0530 ---------------------------------------------------------------------- .../apache/lens/cube/parse/ColumnResolver.java | 47 +++++++--- .../lens/cube/parse/CubeQueryContext.java | 23 +++++ .../apache/lens/cube/parse/FieldValidator.java | 17 ++-- .../apache/lens/cube/parse/CubeTestSetup.java | 10 ++- .../FieldsCannotBeQueriedTogetherTest.java | 90 +++++++++++++++----- 5 files changed, 149 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ede537e6/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java index 0849381..2ff5959 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java @@ -31,6 +31,8 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.SemanticException; +import com.google.common.base.Optional; + class ColumnResolver implements ContextRewriter { public ColumnResolver(Configuration conf) { @@ -111,7 +113,6 @@ class ColumnResolver implements ContextRewriter { String column = colIdent.getText().toLowerCase(); String table = tabident.getText().toLowerCase(); - tqc.addColumnsQueried(table, column); } } @@ -188,7 +189,9 @@ class ColumnResolver implements ContextRewriter { // column is an existing alias return; } - cubeql.addColumnsQueried(CubeQueryContext.DEFAULT_TABLE, column); + + addColumnQueriedWithTimeRangeFuncCheck(cubeql, parent, CubeQueryContext.DEFAULT_TABLE, column); + } else if (node.getToken().getType() == DOT) { // This is for the case where column name is prefixed by table name // or table alias @@ -200,15 +203,12 @@ class ColumnResolver implements ContextRewriter { String column = colIdent.getText().toLowerCase(); String table = tabident.getText().toLowerCase(); - cubeql.addColumnsQueried(table, column); + addColumnQueriedWithTimeRangeFuncCheck(cubeql, parent, table, column); + } else if (node.getToken().getType() == TOK_FUNCTION) { ASTNode fname = HQLParser.findNodeByPath(node, Identifier); if (fname != null && CubeQueryContext.TIME_RANGE_FUNC.equalsIgnoreCase(fname.getText())) { - if (!cubeql.shouldReplaceTimeDimWithPart()) { - // Skip columns in timerange clause if replace timedimension with part - // column is on - addColumnsForWhere(cubeql, (ASTNode) node.getChild(1), node); - } + addColumnsForWhere(cubeql, (ASTNode) node.getChild(1), node); } else { for (int i = 0; i < node.getChildCount(); i++) { addColumnsForWhere(cubeql, (ASTNode) node.getChild(i), node); @@ -221,6 +221,35 @@ class ColumnResolver implements ContextRewriter { } } + private static void addColumnQueriedWithTimeRangeFuncCheck(final CubeQueryContext cubeql, final ASTNode parent, + final String table, final String column) { + + if (isTimeRangeFunc(parent)) { + cubeql.addQueriedTimeDimensionCols(column); + cubeql.addColumnsQueriedWithTimeDimCheck(CubeQueryContext.DEFAULT_TABLE, column); + } else { + cubeql.addColumnsQueried(table, column); + } + } + + private static boolean isTimeRangeFunc(final ASTNode node) { + + Optional<String> funcNameOp = getNameIfFunc(node); + final String funcName = funcNameOp.isPresent() ? funcNameOp.get() : null; + return CubeQueryContext.TIME_RANGE_FUNC.equalsIgnoreCase(funcName); + } + + private static Optional<String> getNameIfFunc(final ASTNode node) { + + String funcName = null; + if (node.getToken().getType() == TOK_FUNCTION) { + ASTNode foundNode = HQLParser.findNodeByPath(node, Identifier); + if (foundNode != null) { + funcName = foundNode.getText(); + } + } + return Optional.fromNullable(funcName); + } private static void addColumnsForSelectExpr(final CubeQueryContext cubeql, ASTNode node, ASTNode parent, Set<String> cols) { if (node.getToken().getType() == TOK_TABLE_OR_COL && (parent != null && parent.getToken().getType() != DOT)) { @@ -243,7 +272,6 @@ class ColumnResolver implements ContextRewriter { String column = colIdent.getText().toLowerCase(); String table = tabident.getText().toLowerCase(); - cubeql.addColumnsQueried(table, column); cols.add(column); } else { @@ -252,5 +280,4 @@ class ColumnResolver implements ContextRewriter { } } } - } http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ede537e6/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java index b7900be..ae65287 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java @@ -23,6 +23,8 @@ import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TABLE_OR_COL; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TMP_FILE; +import static com.google.common.base.Preconditions.checkArgument; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.*; @@ -42,6 +44,7 @@ import org.apache.hadoop.hive.ql.parse.*; import org.codehaus.jackson.map.ObjectMapper; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import lombok.Getter; import lombok.Setter; @@ -83,6 +86,8 @@ public class CubeQueryContext implements TrackQueriedColumns { @Getter private final Set<String> queriedExprs = new HashSet<String>(); + private final Set<String> queriedTimeDimCols = new LinkedHashSet<String>(); + @Getter private final Set<String> queriedExprsWithMeasures = new HashSet<String>(); @@ -902,7 +907,15 @@ public class CubeQueryContext implements TrackQueriedColumns { addColumnsQueried(getAliasForTableName(table.getName()), column); } + public void addColumnsQueriedWithTimeDimCheck(String alias, String timeDimColumn) { + + if (!shouldReplaceTimeDimWithPart()) { + addColumnsQueried(alias, timeDimColumn); + } + } + public void addColumnsQueried(String alias, String column) { + Set<String> cols = tblAliasToColumns.get(alias.toLowerCase()); if (cols == null) { cols = new LinkedHashSet<String>(); @@ -1154,4 +1167,14 @@ public class CubeQueryContext implements TrackQueriedColumns { } } } + + public void addQueriedTimeDimensionCols(final String timeDimColName) { + + checkArgument(StringUtils.isNotBlank(timeDimColName)); + this.queriedTimeDimCols.add(timeDimColName); + } + + public ImmutableSet<String> getQueriedTimeDimCols() { + return ImmutableSet.copyOf(this.queriedTimeDimCols); + } } http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ede537e6/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java index 60a767d..03377dd 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java @@ -32,6 +32,8 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.SemanticException; +import com.google.common.collect.ImmutableSet; + /** * Validate fields based on cube queryability */ @@ -57,23 +59,28 @@ public class FieldValidator implements ContextRewriter { throw new SemanticException(e); } - // dim attributes and chained source columns should only come from WHERE and GROUP BY ASTs - Set<String> queriedDimAttrs = new LinkedHashSet<String>(); + ImmutableSet<String> queriedTimeDimCols = cubeql.getQueriedTimeDimCols(); + + Set<String> queriedDimAttrs = new LinkedHashSet<String>(queriedTimeDimCols); + Set<String> nonQueryableFields = new LinkedHashSet<String>(queriedTimeDimCols); + Set<String> queriedMsrs = new LinkedHashSet<String>(cubeql.getQueriedMsrs()); queriedMsrs.addAll(getMeasuresFromExprMeasures(cubeql)); Set<String> chainedSrcColumns = new HashSet<String>(); - Set<String> nonQueryableFields = new LinkedHashSet<String>(); - findDimAttrsAndChainSourceColumns(cubeql, cubeql.getGroupByAST(), queriedDimAttrs, - chainedSrcColumns, nonQueryableFields); + // dim attributes and chained source columns should only come from WHERE and GROUP BY ASTs + findDimAttrsAndChainSourceColumns(cubeql, cubeql.getGroupByAST(), queriedDimAttrs, chainedSrcColumns, + nonQueryableFields); findDimAttrsAndChainSourceColumns(cubeql, cubeql.getWhereAST(), queriedDimAttrs, chainedSrcColumns, nonQueryableFields); // do validation // Find atleast one derived cube which contains all the dimensions // queried. + boolean derivedCubeFound = false; for (DerivedCube dcube : dcubes) { + if (dcube.getDimAttributeNames().containsAll(chainedSrcColumns) && dcube.getDimAttributeNames().containsAll(queriedDimAttrs)) { // remove all the measures that are covered http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ede537e6/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java index 49fabff..432c5f4 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java @@ -833,9 +833,9 @@ public class CubeTestSetup { }; // add ref dim through chain - cubeDimensions2.add(new ReferencedDimAtrribute( - new FieldSchema("cityStateCapital", "string", "State's capital thru city"), "State's capital thru city", - "cityState", "capital", null, null, null)); + cubeDimensions2.add( + new ReferencedDimAtrribute(new FieldSchema("cityStateCapital", "string", "State's capital thru city"), + "State's capital thru city", "cityState", "capital", null, null, null)); client.createCube(BASE_CUBE_NAME, cubeMeasures2, cubeDimensions2, exprs, joinchains, cubeProperties); Map<String, String> derivedProperties = new HashMap<String, String>(); @@ -846,6 +846,7 @@ public class CubeTestSetup { Set<String> dimensions = new HashSet<String>(); dimensions.add("dim1"); dimensions.add("dim11"); + dimensions.add("d_time"); client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME1, measures, dimensions, derivedProperties, 5L); measures = new HashSet<String>(); @@ -860,6 +861,8 @@ public class CubeTestSetup { dimensions.add("dim2"); dimensions.add("dim11"); dimensions.add("dim12"); + dimensions.add("d_time"); + dimensions.add("test_time_dim"); client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME2, measures, dimensions, derivedProperties, 10L); measures = new HashSet<String>(); measures.add("msr3"); @@ -867,6 +870,7 @@ public class CubeTestSetup { dimensions = new HashSet<String>(); dimensions.add("dim1"); dimensions.add("location"); + dimensions.add("d_time"); client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME3, measures, dimensions, derivedProperties, 20L); // create base cube facts http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ede537e6/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java index 0a37c3d..398c45e 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java @@ -18,7 +18,7 @@ */ package org.apache.lens.cube.parse; -import static org.apache.lens.cube.parse.CubeTestSetup.TWO_DAYS_RANGE; +import static org.apache.lens.cube.parse.CubeTestSetup.*; import static org.testng.Assert.assertEquals; import static org.testng.Assert.fail; @@ -59,7 +59,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { exception. */ testFieldsCannotBeQueriedTogetherError("select dim2, SUM(msr1) from basecube where " + TWO_DAYS_RANGE, - Arrays.asList("dim2", "msr1")); + Arrays.asList("dim2", "d_time", "msr1")); } @Test @@ -72,7 +72,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select dim2, sum(roundedmsr1) from basecube where " + TWO_DAYS_RANGE, - Arrays.asList("dim2", "msr1")); + Arrays.asList("dim2", "d_time", "msr1")); } @Test @@ -85,7 +85,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select substrexprdim2, SUM(msr1) from basecube where " + TWO_DAYS_RANGE, - Arrays.asList("dim2", "msr1")); + Arrays.asList("dim2", "d_time", "msr1")); } @Test @@ -98,7 +98,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { derived cube, hence query shall be disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select substrexprdim2, sum(roundedmsr1) from basecube where " - + TWO_DAYS_RANGE, Arrays.asList("dim2", "msr1")); + + TWO_DAYS_RANGE, Arrays.asList("dim2", "d_time", "msr1")); } @Test @@ -114,7 +114,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { query shall be disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select citystate.name, SUM(msr1) from basecube where " + TWO_DAYS_RANGE, - Arrays.asList("citystate.name", "msr1")); + Arrays.asList("citystate.name", "d_time", "msr1")); } @Test @@ -130,7 +130,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { in the same derived cube, hence query shall be disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select citystate.name, sum(roundedmsr1) from basecube where " - + TWO_DAYS_RANGE, Arrays.asList("citystate.name", "msr1")); + + TWO_DAYS_RANGE, Arrays.asList("citystate.name", "d_time", "msr1")); } @Test @@ -146,7 +146,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { in the same derived cube, hence query shall be disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select cubestateName, sum(roundedmsr1) from basecube where " - + TWO_DAYS_RANGE, Arrays.asList("cubestate.name", "msr1")); + + TWO_DAYS_RANGE, Arrays.asList("cubestate.name", "d_time", "msr1")); } @Test @@ -162,7 +162,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { shall be disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select SUM(msr1) from basecube where cityState.name = 'foo' and " - + TWO_DAYS_RANGE, Arrays.asList("citystate.name", "msr1")); + + TWO_DAYS_RANGE, Arrays.asList("citystate.name", "d_time", "msr1")); } @Test @@ -178,7 +178,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { same derived cube, hence query shall be disallowed with appropriate exception. */ testFieldsCannotBeQueriedTogetherError("select sum(roundedmsr1) from basecube where cityState.name = 'foo' and " - + TWO_DAYS_RANGE, Arrays.asList("citystate.name", "msr1")); + + TWO_DAYS_RANGE, Arrays.asList("citystate.name", "d_time", "msr1")); } @Test @@ -193,8 +193,9 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { queryable through chain source column cityid. cityid and roundedmsr1( expression over msr1) are not present in the same derived cube, hence query shall be disallowed with appropriate exception. */ - testFieldsCannotBeQueriedTogetherError("select sum(roundedmsr1) from basecube where cubestatename = 'foo' and " - + TWO_DAYS_RANGE, Arrays.asList("cubestate.name", "msr1")); + testFieldsCannotBeQueriedTogetherError( + "select sum(roundedmsr1) from basecube where cubestatename = 'foo' and " + TWO_DAYS_RANGE, + Arrays.asList("cubestate.name", "d_time", "msr1")); } @Test @@ -237,7 +238,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { /* dim2 and countryid are not present in the same derived cube, hence query should be disallowed */ testFieldsCannotBeQueriedTogetherError("select dim2, countryid, SUM(msr2) from basecube where " + TWO_DAYS_RANGE, - Arrays.asList("countryid", "dim2")); + Arrays.asList("countryid", "d_time", "dim2")); } @Test @@ -248,7 +249,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { * disallowed */ testFieldsCannotBeQueriedTogetherError("select substrexprdim2, cubeStateName, countryid, SUM(msr2) from basecube" - + " where " + TWO_DAYS_RANGE, Arrays.asList("countryid", "dim2", "cubestate.name")); + + " where " + TWO_DAYS_RANGE, Arrays.asList("countryid", "dim2", "cubestate.name", "d_time")); } @Test @@ -256,8 +257,8 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { /* newmeasure is not present in any derived cube, hence the query should be disallowed. */ - testFieldsCannotBeQueriedTogetherError("select newmeasure from basecube where " - + TWO_DAYS_RANGE, Arrays.asList("newmeasure")); + testFieldsCannotBeQueriedTogetherError("select newmeasure from basecube where " + TWO_DAYS_RANGE, + Arrays.asList( "d_time", "newmeasure")); } @Test @@ -266,7 +267,7 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { /* newexpr : expression over newmeasure is not present in any derived cube, hence the query should be disallowed. */ testFieldsCannotBeQueriedTogetherError("select newexpr from basecube where " - + TWO_DAYS_RANGE, Arrays.asList("newmeasure")); + + TWO_DAYS_RANGE, Arrays.asList("d_time", "newmeasure")); } @Test @@ -283,16 +284,65 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { testFieldsCannotBeQueriedTogetherError( "select citystatecapital, SUM(msr1) from basecube where " + TWO_DAYS_RANGE, - Arrays.asList("citystatecapital", "msr1")); + Arrays.asList("citystatecapital", "d_time", "msr1")); + } + + @Test + public void testQueryWtihTimeDimAndReplaceTimeDimSwitchTrue() throws ParseException, SemanticException, + LensException { + + /* If a time dimension and measure are not present in the same derived cube, then query shall be disallowed. + + The testQuery in this test uses d_time in time range func. d_time is a time dimension in basecube. + d_time is present as a dimension in derived cube where as msr4 is not present in the same derived cube, hence + the query shall be disallowed. + + The testQuery in this test uses its own queryConf which has CubeQueryConfUtil.REPLACE_TIMEDIM_WITH_PART_COL + set to true. */ + + Configuration queryConf = new Configuration(conf); + queryConf.setBoolean(CubeQueryConfUtil.REPLACE_TIMEDIM_WITH_PART_COL, true); + + testFieldsCannotBeQueriedTogetherError("select msr4 from basecube where " + "time_range_in(d_time, '" + + getDateUptoHours(TWODAYS_BACK) + "','" + getDateUptoHours(CubeTestSetup.NOW) + "')", + Arrays.asList("d_time", "msr4"), queryConf); + } + + @Test + public void testQueryWtihTimeDimAndReplaceTimeDimSwitchFalse() throws ParseException, SemanticException, + LensException { + + /* If a time dimension and measure are not present in the same derived cube, then query shall be disallowed. + + The testQuery in this test uses d_time in time range func. d_time is a time dimension in basecube. + d_time is present as a dimension in derived cube where as msr4 is not present in the same derived cube, hence + the query shall be disallowed. + + The testQuery in this test uses its own queryConf which has CubeQueryConfUtil.REPLACE_TIMEDIM_WITH_PART_COL + set to false */ + + Configuration queryConf = new Configuration(conf); + queryConf.setBoolean(CubeQueryConfUtil.REPLACE_TIMEDIM_WITH_PART_COL, false); + + testFieldsCannotBeQueriedTogetherError("select msr4 from basecube where " + "time_range_in(d_time, '" + + getDateUptoHours(TWODAYS_BACK) + "','" + getDateUptoHours(CubeTestSetup.NOW) + "')", + Arrays.asList("d_time", "msr4"), queryConf); } private void testFieldsCannotBeQueriedTogetherError(final String testQuery, final List<String> conflictingFields) throws ParseException, SemanticException, LensException { + testFieldsCannotBeQueriedTogetherError(testQuery, conflictingFields, conf); + } + + private void testFieldsCannotBeQueriedTogetherError(final String testQuery, final List<String> conflictingFields, + final Configuration queryConf) + throws ParseException, SemanticException, LensException { try { - String hqlQuery = rewrite(testQuery, conf); - fail("Expected FieldsCannotBeQueriedTogetherException but it didn't come, rewrittenQuery:" + hqlQuery); + String hqlQuery = rewrite(testQuery, queryConf); + fail("Expected Query Rewrite to fail with FieldsCannotBeQueriedTogetherException, however it didn't happen. " + + "Query got re-written to:" + hqlQuery); } catch(FieldsCannotBeQueriedTogetherException actualException) { SortedSet<String> expectedFields = new TreeSet<String>(conflictingFields);
