LENS-174: Cube rewriter changes to support multiple expressions
Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/82553db3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/82553db3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/82553db3 Branch: refs/heads/master Commit: 82553db30abbc981c7e7bbc3788faa7db3270156 Parents: ca30cf8 Author: Amareshwari Sriramadasu <[email protected]> Authored: Tue May 26 18:06:41 2015 +0530 Committer: Rajat Khandelwal <[email protected]> Committed: Tue May 26 18:06:41 2015 +0530 ---------------------------------------------------------------------- .../apache/lens/cube/metadata/Dimension.java | 6 +- .../apache/lens/cube/metadata/ExprColumn.java | 2 +- .../lens/cube/parse/AggregateResolver.java | 113 ++- .../apache/lens/cube/parse/AliasReplacer.java | 46 +- .../apache/lens/cube/parse/CandidateFact.java | 12 +- .../cube/parse/CandidateTablePruneCause.java | 28 +- .../lens/cube/parse/CandidateTableResolver.java | 73 +- .../apache/lens/cube/parse/ColumnResolver.java | 26 +- .../lens/cube/parse/CubeQueryContext.java | 98 ++- .../lens/cube/parse/CubeQueryRewriter.java | 15 +- .../cube/parse/DenormalizationResolver.java | 60 +- .../lens/cube/parse/ExpressionResolver.java | 744 +++++++++++++++++-- .../apache/lens/cube/parse/FieldValidator.java | 57 +- .../apache/lens/cube/parse/JoinResolver.java | 4 +- .../lens/cube/parse/TimerangeResolver.java | 1 - .../lens/cube/parse/TrackQueriedColumns.java | 27 + .../apache/lens/cube/parse/CubeTestSetup.java | 46 +- .../FieldsCannotBeQueriedTogetherTest.java | 138 +++- .../lens/cube/parse/TestBaseCubeQueries.java | 112 +-- .../cube/parse/TestDenormalizationResolver.java | 45 +- .../lens/cube/parse/TestExpressionContext.java | 111 +++ .../lens/cube/parse/TestExpressionResolver.java | 306 ++++++-- .../lens/cube/parse/TestJoinResolver.java | 54 +- .../lens/cube/parse/TestQueryMetrics.java | 14 +- lens-cube/src/test/resources/log4j.properties | 8 +- 25 files changed, 1726 insertions(+), 420 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/metadata/Dimension.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/Dimension.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/Dimension.java index 1a4b581..9cba3a7 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/Dimension.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/Dimension.java @@ -171,7 +171,11 @@ public class Dimension extends AbstractBaseTable { } public CubeColumn getColumnByName(String column) { - return getAttributeByName(column); + CubeColumn cubeCol = super.getExpressionByName(column); + if (cubeCol == null) { + cubeCol = getAttributeByName(column); + } + return cubeCol; } /** http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/metadata/ExprColumn.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/ExprColumn.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/ExprColumn.java index a7f711f..3fc661f 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/ExprColumn.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/ExprColumn.java @@ -142,7 +142,7 @@ public class ExprColumn extends CubeColumn { this.endTime = endTime; } - synchronized ASTNode getASTNode() { + public synchronized ASTNode getASTNode() { if (astNode == null) { try { if (StringUtils.isNotBlank(expr)) { http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java index 76b5729..2ad2b7c 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java @@ -125,7 +125,7 @@ class AggregateResolver implements ContextRewriter { } private void transform(CubeQueryContext cubeql, ASTNode parent, ASTNode node, int nodePos) throws SemanticException { - if (parent == null || node == null) { + if (node == null) { return; } int nodeType = node.getToken().getType(); @@ -135,16 +135,18 @@ class AggregateResolver implements ContextRewriter { // Leaf node ASTNode wrapped = wrapAggregate(cubeql, node); if (wrapped != node) { - parent.setChild(nodePos, wrapped); - // Check if this node has an alias - ASTNode sibling = HQLParser.findNodeByPath(parent, Identifier); - String expr; - if (sibling != null) { - expr = HQLParser.getString(parent); - } else { - expr = HQLParser.getString(wrapped); + if (parent != null) { + parent.setChild(nodePos, wrapped); + // Check if this node has an alias + ASTNode sibling = HQLParser.findNodeByPath(parent, Identifier); + String expr; + if (sibling != null) { + expr = HQLParser.getString(parent); + } else { + expr = HQLParser.getString(wrapped); + } + cubeql.addAggregateExpr(expr.trim()); } - cubeql.addAggregateExpr(expr.trim()); } } else { // Dig deeper in non-leaf nodes @@ -176,23 +178,31 @@ class AggregateResolver implements ContextRewriter { String msrname = StringUtils.isBlank(tabname) ? colname : tabname + "." + colname; if (cubeql.isCubeMeasure(msrname)) { - CubeMeasure measure = cubeql.getCube().getMeasureByName(colname); - String aggregateFn = measure.getAggregate(); + if (cubeql.getQueriedExprs().contains(colname)) { + String alias = cubeql.getAliasForTableName(cubeql.getCube().getName()); + for (ASTNode exprNode : cubeql.getExprCtx().getExpressionContext(colname, alias).getAllASTNodes()) { + transform(cubeql, null, exprNode, 0); + } + return node; + } else { + CubeMeasure measure = cubeql.getCube().getMeasureByName(colname); + String aggregateFn = measure.getAggregate(); - if (StringUtils.isBlank(aggregateFn)) { - throw new SemanticException(ErrorMsg.NO_DEFAULT_AGGREGATE, colname); - } - ASTNode fnroot = new ASTNode(new CommonToken(HiveParser.TOK_FUNCTION)); - fnroot.setParent(node.getParent()); + if (StringUtils.isBlank(aggregateFn)) { + throw new SemanticException(ErrorMsg.NO_DEFAULT_AGGREGATE, colname); + } + ASTNode fnroot = new ASTNode(new CommonToken(HiveParser.TOK_FUNCTION)); + fnroot.setParent(node.getParent()); - ASTNode fnIdentNode = new ASTNode(new CommonToken(HiveParser.Identifier, aggregateFn)); - fnIdentNode.setParent(fnroot); - fnroot.addChild(fnIdentNode); + ASTNode fnIdentNode = new ASTNode(new CommonToken(HiveParser.Identifier, aggregateFn)); + fnIdentNode.setParent(fnroot); + fnroot.addChild(fnIdentNode); - node.setParent(fnroot); - fnroot.addChild(node); + node.setParent(fnroot); + fnroot.addChild(node); - return fnroot; + return fnroot; + } } else { return node; } @@ -208,7 +218,7 @@ class AggregateResolver implements ContextRewriter { if (node.getChild(0).getType() == HiveParser.Identifier) { function = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(0).getText()); } - } else if (isMeasure(cubeql, node)) { + } else if (cubeql.isMeasure(node)) { // Exit for the recursion String colname; @@ -219,16 +229,27 @@ class AggregateResolver implements ContextRewriter { ASTNode colIdent = (ASTNode) node.getChild(1); colname = colIdent.getText(); } - CubeMeasure measure = cubeql.getCube().getMeasureByName(colname); - if (function != null && !function.isEmpty()) { - // Get the cube measure object and check if the passed function is the - // default one set for this measure - return !function.equalsIgnoreCase(measure.getAggregate()); - } else if (!aggregateResolverDisabled && measure.getAggregate() != null) { - // not inside any aggregate, but default aggregate exists + colname = colname.toLowerCase(); + if (cubeql.getQueriedExprs().contains(colname)) { + String cubeAlias = cubeql.getAliasForTableName(cubeql.getCube().getName()); + for (ASTNode exprNode : cubeql.getExprCtx().getExpressionContext(colname, cubeAlias).getAllASTNodes()) { + if (hasMeasuresNotInDefaultAggregates(cubeql, exprNode, function, aggregateResolverDisabled)) { + return true; + } + } return false; + } else { + CubeMeasure measure = cubeql.getCube().getMeasureByName(colname); + if (function != null && !function.isEmpty()) { + // Get the cube measure object and check if the passed function is the + // default one set for this measure + return !function.equalsIgnoreCase(measure.getAggregate()); + } else if (!aggregateResolverDisabled && measure.getAggregate() != null) { + // not inside any aggregate, but default aggregate exists + return false; + } + return true; } - return true; } for (int i = 0; i < node.getChildCount(); i++) { @@ -270,7 +291,7 @@ class AggregateResolver implements ContextRewriter { boolean isDistinct = hasDistinct; if (exprTokenType == HiveParser.TOK_FUNCTIONDI || exprTokenType == HiveParser.TOK_SELECTDI) { isDistinct = true; - } else if (isMeasure(cubeql, node) && isDistinct) { + } else if (cubeql.isMeasure(node) && isDistinct) { // Exit for the recursion return true; } @@ -289,7 +310,7 @@ class AggregateResolver implements ContextRewriter { return false; } - if (isMeasure(cubeql, node)) { + if (cubeql.isMeasure(node)) { return true; } @@ -302,30 +323,6 @@ class AggregateResolver implements ContextRewriter { return false; } - private boolean isMeasure(CubeQueryContext cubeql, ASTNode node) { - String tabname = null; - String colname; - int nodeType = node.getToken().getType(); - if (!(nodeType == HiveParser.TOK_TABLE_OR_COL || nodeType == HiveParser.DOT)) { - return false; - } - - if (nodeType == HiveParser.TOK_TABLE_OR_COL) { - colname = ((ASTNode) node.getChild(0)).getText(); - } else { - // node in 'alias.column' format - ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier); - ASTNode colIdent = (ASTNode) node.getChild(1); - - colname = colIdent.getText(); - tabname = tabident.getText(); - } - - String msrname = StringUtils.isBlank(tabname) ? colname : tabname + "." + colname; - - return cubeql.isCubeMeasure(msrname); - } - static void updateAggregates(ASTNode root, CubeQueryContext cubeql) { if (root == null) { return; http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java index 9d367c3..4d3443c 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java @@ -21,18 +21,14 @@ package org.apache.lens.cube.parse; import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier; import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_SELEXPR; -import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; -import org.apache.lens.cube.metadata.AbstractCubeTable; import org.apache.lens.cube.metadata.CubeInterface; import org.apache.lens.cube.metadata.Dimension; import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -50,21 +46,20 @@ import org.antlr.runtime.CommonToken; */ class AliasReplacer implements ContextRewriter { - private static final Log LOG = LogFactory.getLog(AliasReplacer.class.getName()); - - // Mapping of a qualified column name to its table alias - private Map<String, String> colToTableAlias; - public AliasReplacer(Configuration conf) { } @Override public void rewriteContext(CubeQueryContext cubeql) throws SemanticException { - colToTableAlias = new HashMap<String, String>(); + Map<String, String> colToTableAlias = cubeql.getColToTableAlias(); extractTabAliasForCol(cubeql); findDimAttributesAndMeasures(cubeql); + if (colToTableAlias.isEmpty()) { + return; + } + // Rewrite the all the columns in the query with table alias prefixed. // If col1 of table tab1 is accessed, it would be changed as tab1.col1. // If tab1 is already aliased say with t1, col1 is changed as t1.col1 @@ -76,10 +71,6 @@ class AliasReplacer implements ContextRewriter { // 2: (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) // (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) // 3: (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) srckey)))) - if (colToTableAlias == null) { - return; - } - replaceAliases(cubeql.getSelectAST(), 0, colToTableAlias); replaceAliases(cubeql.getHavingAST(), 0, colToTableAlias); @@ -92,13 +83,11 @@ class AliasReplacer implements ContextRewriter { replaceAliases(cubeql.getJoinTree(), 0, colToTableAlias); - // Update the aggregate expression set AggregateResolver.updateAggregates(cubeql.getSelectAST(), cubeql); AggregateResolver.updateAggregates(cubeql.getHavingAST(), cubeql); // Update alias map as well updateAliasMap(cubeql.getSelectAST(), cubeql); - } /** @@ -112,22 +101,31 @@ class AliasReplacer implements ContextRewriter { Set<String> cubeColsQueried = cubeql.getColumnsQueried(cube.getName()); Set<String> queriedDimAttrs = new HashSet<String>(); Set<String> queriedMsrs = new HashSet<String>(); + Set<String> queriedExprs = new HashSet<String>(); if (cubeColsQueried != null && !cubeColsQueried.isEmpty()) { for (String col : cubeColsQueried) { if (cube.getMeasureNames().contains(col)) { queriedMsrs.add(col); } else if (cube.getDimAttributeNames().contains(col)) { queriedDimAttrs.add(col); + } else if (cube.getExpressionNames().contains(col)) { + queriedExprs.add(col); } } } cubeql.addQueriedDimAttrs(queriedDimAttrs); cubeql.addQueriedMsrs(queriedMsrs); + cubeql.addQueriedExprs(queriedExprs); } } private void extractTabAliasForCol(CubeQueryContext cubeql) throws SemanticException { - Set<String> columns = cubeql.getTblAliasToColumns().get(CubeQueryContext.DEFAULT_TABLE); + extractTabAliasForCol(cubeql.getColToTableAlias(), cubeql, cubeql); + } + + static void extractTabAliasForCol(Map<String, String> colToTableAlias, CubeQueryContext cubeql, + TrackQueriedColumns tqc) throws SemanticException { + Set<String> columns = tqc.getTblAliasToColumns().get(CubeQueryContext.DEFAULT_TABLE); if (columns == null) { return; } @@ -136,8 +134,9 @@ class AliasReplacer implements ContextRewriter { if (cubeql.getCube() != null) { Set<String> cols = cubeql.getCube().getAllFieldNames(); if (cols.contains(col.toLowerCase())) { - colToTableAlias.put(col.toLowerCase(), cubeql.getAliasForTableName(cubeql.getCube().getName())); - cubeql.addColumnsQueried((AbstractCubeTable) cubeql.getCube(), col.toLowerCase()); + String cubeAlias = cubeql.getAliasForTableName(cubeql.getCube().getName()); + colToTableAlias.put(col.toLowerCase(), cubeAlias); + tqc.addColumnsQueried(cubeAlias, col.toLowerCase()); inCube = true; } } @@ -148,8 +147,9 @@ class AliasReplacer implements ContextRewriter { if (prevDim != null && !prevDim.equals(dim.getName())) { throw new SemanticException(ErrorMsg.AMBIGOUS_DIM_COLUMN, col, prevDim, dim.getName()); } - colToTableAlias.put(col.toLowerCase(), cubeql.getAliasForTableName(dim.getName())); - cubeql.addColumnsQueried(dim, col.toLowerCase()); + String dimAlias = cubeql.getAliasForTableName(dim.getName()); + colToTableAlias.put(col.toLowerCase(), dimAlias); + tqc.addColumnsQueried(dimAlias, col.toLowerCase()); } else { // throw error because column is in both cube and dimension table throw new SemanticException(ErrorMsg.AMBIGOUS_CUBE_COLUMN, col, cubeql.getCube().getName(), dim.getName()); @@ -162,7 +162,7 @@ class AliasReplacer implements ContextRewriter { } } - private void replaceAliases(ASTNode node, int nodePos, Map<String, String> colToTableAlias) { + static void replaceAliases(ASTNode node, int nodePos, Map<String, String> colToTableAlias) { if (node == null) { return; } @@ -211,7 +211,7 @@ class AliasReplacer implements ContextRewriter { } } - private void updateAliasMap(ASTNode root, CubeQueryContext cubeql) { + static void updateAliasMap(ASTNode root, CubeQueryContext cubeql) { if (root == null) { return; } http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java index 52bf9aa..63d2508 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java @@ -67,6 +67,7 @@ public class CandidateFact implements CandidateTable { private ASTNode whereAST; private ASTNode groupbyAST; private ASTNode havingAST; + private ASTNode joinTree; private List<TimeRangeNode> timenodes = Lists.newArrayList(); private final List<Integer> selectIndices = Lists.newArrayList(); private final List<Integer> dimFieldIndices = Lists.newArrayList(); @@ -140,6 +141,9 @@ public class CandidateFact implements CandidateTable { public void copyASTs(CubeQueryContext cubeql) throws SemanticException { this.selectAST = HQLParser.copyAST(cubeql.getSelectAST()); this.whereAST = HQLParser.copyAST(cubeql.getWhereAST()); + if (cubeql.getJoinTree() != null) { + this.joinTree = HQLParser.copyAST(cubeql.getJoinTree()); + } if (cubeql.getGroupByAST() != null) { this.groupbyAST = HQLParser.copyAST(cubeql.getGroupByAST()); } @@ -180,13 +184,13 @@ public class CandidateFact implements CandidateTable { * @throws SemanticException */ public void updateASTs(CubeQueryContext cubeql) throws SemanticException { - Set<String> cubeColsQueried = cubeql.getColumnsQueried(cubeql.getCube().getName()); + Set<String> cubeCols = cubeql.getCube().getAllFieldNames(); // update select AST with selected fields int currentChild = 0; for (int i = 0; i < cubeql.getSelectAST().getChildCount(); i++) { ASTNode selectExpr = (ASTNode) this.selectAST.getChild(currentChild); - Set<String> exprCols = getColsInExpr(cubeColsQueried, selectExpr); + Set<String> exprCols = getColsInExpr(cubeCols, selectExpr); if (getColumns().containsAll(exprCols)) { selectIndices.add(i); if (cubeql.getQueriedDimAttrs().containsAll(exprCols)) { @@ -419,4 +423,8 @@ public class CandidateFact implements CandidateTable { } return timePartDimensions; } + + public ASTNode getJoinTree() { + return joinTree; + } } http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java index 8c009b2..5a1f8f9 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java @@ -58,6 +58,16 @@ public class CandidateTablePruneCause { MORE_PARTITIONS("Picked table has more partitions than minimum"), // invalid cube table INVALID("Invalid cube table provided in query"), + // expression is not evaluable in the candidate + EXPRESSION_NOT_EVALUABLE("%s expressions not evaluable") { + Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { + List<String> columns = new ArrayList<String>(); + for (CandidateTablePruneCause cause : causes) { + columns.addAll(cause.getMissingExpressions()); + } + return new String[]{columns.toString()}; + } + }, // column not valid in cube table COLUMN_NOT_VALID("Column not valid in cube table"), // column not found in cube table @@ -206,6 +216,8 @@ public class CandidateTablePruneCause { private List<String> missingUpdatePeriods; // populated in case of missing columns private List<String> missingColumns; + // populated in case of expressions not evaluable + private List<String> missingExpressions; // populated in case of no column part of a join path private List<String> joinColumns; // the columns that are missing default aggregate. only set in case of MISSING_DEFAULT_AGGREGATE @@ -227,9 +239,11 @@ public class CandidateTablePruneCause { return cause; } - public static CandidateTablePruneCause columnNotFound(Collection<String> missingColumns) { + public static CandidateTablePruneCause columnNotFound(Collection<String>... missingColumns) { List<String> colList = new ArrayList<String>(); - colList.addAll(missingColumns); + for (Collection<String> missing : missingColumns) { + colList.addAll(missing); + } CandidateTablePruneCause cause = new CandidateTablePruneCause(CandidateTablePruneCode.COLUMN_NOT_FOUND); cause.setMissingColumns(colList); return cause; @@ -243,6 +257,16 @@ public class CandidateTablePruneCause { return columnNotFound(colList); } + public static CandidateTablePruneCause expressionNotEvaluable(String... exprs) { + List<String> colList = new ArrayList<String>(); + for (String column : exprs) { + colList.add(column); + } + CandidateTablePruneCause cause = new CandidateTablePruneCause(CandidateTablePruneCode.EXPRESSION_NOT_EVALUABLE); + cause.setMissingExpressions(colList); + return cause; + } + public static CandidateTablePruneCause missingPartitions(Set<String> nonExistingParts) { CandidateTablePruneCause cause = new CandidateTablePruneCause(CandidateTablePruneCode.MISSING_PARTITIONS); http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java index 40561ad..d56fb80 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java @@ -248,44 +248,76 @@ class CandidateTableResolver implements ContextRewriter { } } } + // go over expressions queried + // if expression has no measures, prune facts which cannot evaluate expression + // if expression has measures, they should be considered along with other measures and see if the fact can be + // part of measure covering set + for (String expr : cubeql.getQueriedExprs()) { + cubeql.getExprCtx().updateEvaluables(expr, cfact); + if (!cubeql.getQueriedExprsWithMeasures().contains(expr) && !cubeql.getExprCtx().isEvaluable(expr, cfact)) { + // if expression has no measures, prune facts which cannot evaluate expression + LOG.info("Not considering fact table:" + cfact + " as expression " + expr + " is not evaluatable"); + cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.expressionNotEvaluable(expr)); + toRemove = true; + break; + } + } // check if the candidate fact has atleast one measure queried - if (!checkForColumnExists(cfact, queriedMsrs)) { + // if expression has measures, they should be considered along with other measures and see if the fact can be + // part of measure covering set + if (!checkForColumnExists(cfact, queriedMsrs) + && (cubeql.getQueriedExprsWithMeasures().isEmpty() + || cubeql.getExprCtx().allNotEvaluable(cubeql.getQueriedExprsWithMeasures(), cfact))) { LOG.info("Not considering fact table:" + cfact + " as columns " + queriedMsrs + " is not available"); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(queriedMsrs)); + cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(queriedMsrs, + cubeql.getQueriedExprsWithMeasures())); toRemove = true; } if (toRemove) { i.remove(); } } + Set<String> dimExprs = new HashSet<String>(cubeql.getQueriedExprs()); + dimExprs.removeAll(cubeql.getQueriedExprsWithMeasures()); + if (cubeql.getCandidateFacts().size() == 0) { + throw new SemanticException(ErrorMsg.NO_FACT_HAS_COLUMN, + (!queriedDimAttrs.isEmpty() ? queriedDimAttrs.toString() : "") + + (!dimExprs.isEmpty() ? dimExprs.toString() : "")); + } // Find out candidate fact table sets which contain all the measures // queried List<CandidateFact> cfacts = new ArrayList<CandidateFact>(cubeql.getCandidateFacts()); - Set<Set<CandidateFact>> cfactset = findCoveringSets(cfacts, queriedMsrs); + Set<Set<CandidateFact>> cfactset = findCoveringSets(cubeql, cfacts, queriedMsrs, + cubeql.getQueriedExprsWithMeasures()); LOG.info("Measure covering fact sets :" + cfactset); + String msrString = (!queriedMsrs.isEmpty() ? queriedMsrs.toString() : "") + + (!cubeql.getQueriedExprsWithMeasures().isEmpty() ? cubeql.getQueriedExprsWithMeasures().toString() : ""); if (cfactset.isEmpty()) { - throw new SemanticException(ErrorMsg.NO_FACT_HAS_COLUMN, queriedMsrs.toString()); + throw new SemanticException(ErrorMsg.NO_FACT_HAS_COLUMN, msrString); } cubeql.getCandidateFactSets().addAll(cfactset); - cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCode.COLUMN_NOT_FOUND); + cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.columnNotFound(queriedMsrs, + cubeql.getQueriedExprsWithMeasures())); if (cubeql.getCandidateFacts().size() == 0) { - throw new SemanticException(ErrorMsg.NO_FACT_HAS_COLUMN, queriedDimAttrs.toString()); + throw new SemanticException(ErrorMsg.NO_FACT_HAS_COLUMN, msrString); } } } - static Set<Set<CandidateFact>> findCoveringSets(List<CandidateFact> cfactsPassed, Set<String> msrs) { + static Set<Set<CandidateFact>> findCoveringSets(CubeQueryContext cubeql, List<CandidateFact> cfactsPassed, + Set<String> msrs, Set<String> exprsWithMeasures) { Set<Set<CandidateFact>> cfactset = new HashSet<Set<CandidateFact>>(); List<CandidateFact> cfacts = new ArrayList<CandidateFact>(cfactsPassed); for (Iterator<CandidateFact> i = cfacts.iterator(); i.hasNext();) { CandidateFact cfact = i.next(); i.remove(); - if (!checkForColumnExists(cfact, msrs)) { + if (!checkForColumnExists(cfact, msrs) + && (exprsWithMeasures.isEmpty() || cubeql.getExprCtx().allNotEvaluable(exprsWithMeasures, cfact))) { // check if fact contains any of the maeasures // if not ignore the fact continue; - } else if (cfact.getColumns().containsAll(msrs)) { + } else if (cfact.getColumns().containsAll(msrs) && cubeql.getExprCtx().allEvaluable(cfact, exprsWithMeasures)) { // return single set Set<CandidateFact> one = new LinkedHashSet<CandidateFact>(); one.add(cfact); @@ -293,15 +325,17 @@ class CandidateTableResolver implements ContextRewriter { } else { // find the remaining measures in other facts Set<String> remainingMsrs = new HashSet<String>(msrs); + Set<String> remainingExprs = new HashSet<String>(exprsWithMeasures); remainingMsrs.removeAll(cfact.getColumns()); - Set<Set<CandidateFact>> coveringSets = findCoveringSets(cfacts, remainingMsrs); + remainingExprs.removeAll(cubeql.getExprCtx().coveringExpressions(exprsWithMeasures, cfact)); + Set<Set<CandidateFact>> coveringSets = findCoveringSets(cubeql, cfacts, remainingMsrs, remainingExprs); if (!coveringSets.isEmpty()) { for (Set<CandidateFact> set : coveringSets) { set.add(cfact); cfactset.add(set); } } else { - LOG.info("Couldnt find any set containing remaining measures:" + remainingMsrs); + LOG.info("Couldnt find any set containing remaining measures:" + remainingMsrs + " " + remainingExprs); } } } @@ -503,9 +537,20 @@ class CandidateTableResolver implements ContextRewriter { if (cubeql.getColumnsQueried(dim.getName()) != null) { for (String col : cubeql.getColumnsQueried(dim.getName())) { if (!cdim.getColumns().contains(col.toLowerCase())) { - // check if it available as reference, if not remove the - // candidate - if (!cubeql.getDeNormCtx().addRefUsage(cdim, col, dim.getName())) { + // check if the column is an expression + if (cdim.getBaseTable().getExpressionNames().contains(col)) { + cubeql.getExprCtx().updateEvaluables(col, cdim); + // check if the expression is evaluatable + if (!cubeql.getExprCtx().isEvaluable(col, cdim)) { + LOG.info("Not considering dimtable:" + cdim + " as expression " + col + " is not evaluatable"); + cubeql.addDimPruningMsgs(dim, cdim.getTable(), CandidateTablePruneCause.expressionNotEvaluable( + col)); + i.remove(); + break; + } + } else if (!cubeql.getDeNormCtx().addRefUsage(cdim, col, dim.getName())) { + // check if it available as reference, if not remove the + // candidate LOG.info("Not considering dimtable:" + cdim + " as column " + col + " is not available"); cubeql.addDimPruningMsgs(dim, cdim.getTable(), CandidateTablePruneCause.columnNotFound(col)); i.remove(); http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java index 1aa33db..0849381 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java @@ -28,7 +28,6 @@ import org.apache.lens.cube.parse.HQLParser.TreeNode; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -60,27 +59,24 @@ class ColumnResolver implements ContextRewriter { } getColsForSelectTree(cubeql); getColsForWhereTree(cubeql); - getColsForTree(cubeql, cubeql.getJoinTree()); - getColsForTree(cubeql, cubeql.getGroupByAST()); - getColsForTree(cubeql, cubeql.getHavingAST()); - getColsForTree(cubeql, cubeql.getOrderByAST()); + getColsForTree(cubeql, cubeql.getJoinTree(), cubeql); + getColsForTree(cubeql, cubeql.getGroupByAST(), cubeql); + getColsForTree(cubeql, cubeql.getHavingAST(), cubeql); + getColsForTree(cubeql, cubeql.getOrderByAST(), cubeql); // Update join dimension tables for (String table : cubeql.getTblAliasToColumns().keySet()) { - try { - if (!CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(table)) { - if (!cubeql.addQueriedTable(table)) { - throw new SemanticException(ErrorMsg.NEITHER_CUBE_NOR_DIMENSION); - } + if (!CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(table)) { + if (!cubeql.addQueriedTable(table)) { + throw new SemanticException(ErrorMsg.NEITHER_CUBE_NOR_DIMENSION); } - } catch (HiveException e) { - throw new SemanticException(e); } } } // finds columns in AST passed. - private void getColsForTree(final CubeQueryContext cubeql, ASTNode tree) throws SemanticException { + static void getColsForTree(final CubeQueryContext cubeql, ASTNode tree, final TrackQueriedColumns tqc) + throws SemanticException { if (tree == null) { return; } @@ -104,7 +100,7 @@ class ColumnResolver implements ContextRewriter { // column is an existing alias return; } - cubeql.addColumnsQueried(CubeQueryContext.DEFAULT_TABLE, column); + tqc.addColumnsQueried(CubeQueryContext.DEFAULT_TABLE, column); } else if (node.getToken().getType() == DOT) { // This is for the case where column name is prefixed by table name // or table alias @@ -116,7 +112,7 @@ class ColumnResolver implements ContextRewriter { String column = colIdent.getText().toLowerCase(); String table = tabident.getText().toLowerCase(); - cubeql.addColumnsQueried(table, column); + tqc.addColumnsQueried(table, column); } } }); http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java index 3964c1a..0409af3 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java @@ -19,6 +19,9 @@ package org.apache.lens.cube.parse; +import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier; +import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TABLE_OR_COL; + import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TMP_FILE; import java.io.ByteArrayOutputStream; @@ -45,7 +48,7 @@ import lombok.Getter; import lombok.Setter; import lombok.ToString; -public class CubeQueryContext { +public class CubeQueryContext implements TrackQueriedColumns { public static final String TIME_RANGE_FUNC = "time_range_in"; public static final String NOW = "now"; public static final String DEFAULT_TABLE = "_default_"; @@ -78,6 +81,16 @@ public class CubeQueryContext { @Getter private final Set<String> queriedMsrs = new HashSet<String>(); + @Getter + private final Set<String> queriedExprs = new HashSet<String>(); + + @Getter + private final Set<String> queriedExprsWithMeasures = new HashSet<String>(); + + @Getter + // Mapping of a qualified column name to its table alias + private final Map<String, String> colToTableAlias = new HashMap<String, String>(); + @Getter() private final Set<Set<CandidateFact>> candidateFactSets = new HashSet<Set<CandidateFact>>(); @@ -134,6 +147,9 @@ public class CubeQueryContext { private JoinResolver.AutoJoinContext autoJoinCtx; @Getter @Setter + private ExpressionResolver.ExpressionResolverContext exprCtx; + @Getter + @Setter private DenormalizationResolver.DenormalizationContext deNormCtx; @Getter private PruneCauses<CubeFactTable> factPruningMsgs = @@ -341,7 +357,7 @@ public class CubeQueryContext { boolean isRequiredInJoinChain = false; } - public void addOptionalDimTable(String alias, String col, CandidateTable candidate, boolean isRequiredInJoin) + public void addOptionalDimTable(String alias, CandidateTable candidate, boolean isRequiredInJoin, String... cols) throws SemanticException { alias = alias.toLowerCase(); try { @@ -354,8 +370,10 @@ public class CubeQueryContext { optDim = new OptionalDimCtx(); optionalDimensions.put(dim, optDim); } - if (col != null && candidate != null) { - optDim.colQueried.add(col); + if (cols != null && candidate != null) { + for (String col : cols) { + optDim.colQueried.add(col); + } optDim.requiredForCandidates.add(candidate); } if (!optDim.isRequiredInJoinChain) { @@ -413,7 +431,7 @@ public class CubeQueryContext { return cubeTblEntry.getKey(); } } - return tableName; + return tableName.toLowerCase(); } public void print() { @@ -784,6 +802,22 @@ public class CubeQueryContext { } } + // pick dimension tables required during expression expansion for the picked fact and dimensions + Set<Dimension> exprDimTables = new HashSet<Dimension>(); + if (cfacts != null) { + for (CandidateFact cfact : cfacts) { + Set<Dimension> factExprDimTables = exprCtx.rewriteExprCtx(cfact, dimsToQuery, cfacts.size() > 1); + exprDimTables.addAll(factExprDimTables); + if (cfacts.size() > 1) { + factDimMap.get(cfact).addAll(factExprDimTables); + } + } + } else { + // dim only query + exprDimTables.addAll(exprCtx.rewriteExprCtx(null, dimsToQuery, false)); + } + dimsToQuery.putAll(pickCandidateDimsToQuery(exprDimTables)); + // pick denorm tables for the picked fact and dimensions Set<Dimension> denormTables = new HashSet<Dimension>(); if (cfacts != null) { @@ -891,25 +925,57 @@ public class CubeQueryContext { String[] split = StringUtils.split(col, "."); if (split.length <= 1) { - return cube.getMeasureNames().contains(col.trim().toLowerCase()); + col = col.trim().toLowerCase(); + if (queriedExprs.contains(col)) { + return exprCtx.getExpressionContext(col, getAliasForTableName(cube.getName())).isHasMeasures(); + } else { + return cube.getMeasureNames().contains(col); + } } else { - String cubeName = split[0].trim(); - String colName = split[1].trim(); - if (cubeName.equalsIgnoreCase(cube.getName()) - || cubeName.equalsIgnoreCase(getAliasForTableName(cube.getName()))) { - return cube.getMeasureNames().contains(colName.toLowerCase()); + String cubeName = split[0].trim().toLowerCase(); + String colName = split[1].trim().toLowerCase(); + if (cubeName.equalsIgnoreCase(cube.getName()) || cubeName.equals(getAliasForTableName(cube.getName()))) { + if (queriedExprs.contains(colName)) { + return exprCtx.getExpressionContext(colName, cubeName).isHasMeasures(); + } else { + return cube.getMeasureNames().contains(colName.toLowerCase()); + } } else { return false; } } } + boolean isMeasure(ASTNode node) { + String tabname = null; + String colname; + int nodeType = node.getToken().getType(); + if (!(nodeType == HiveParser.TOK_TABLE_OR_COL || nodeType == HiveParser.DOT)) { + return false; + } + + if (nodeType == HiveParser.TOK_TABLE_OR_COL) { + colname = ((ASTNode) node.getChild(0)).getText(); + } else { + // node in 'alias.column' format + ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier); + ASTNode colIdent = (ASTNode) node.getChild(1); + + colname = colIdent.getText(); + tabname = tabident.getText(); + } + + String msrname = StringUtils.isBlank(tabname) ? colname : tabname + "." + colname; + + return isCubeMeasure(msrname); + } + public boolean isAggregateExpr(String expr) { return aggregateExprs.contains(expr == null ? null : expr.toLowerCase()); } public boolean hasAggregates() { - return !aggregateExprs.isEmpty(); + return !aggregateExprs.isEmpty() || getExprCtx().hasAggregates(); } public String getAlias(String expr) { @@ -1035,6 +1101,14 @@ public class CubeQueryContext { queriedMsrs.addAll(msrs); } + public void addQueriedExprs(Set<String> exprs) { + queriedExprs.addAll(exprs); + } + + public void addQueriedExprsWithMeasures(Set<String> exprs) { + queriedExprsWithMeasures.addAll(exprs); + } + /** * Prune candidate fact sets with respect to available candidate facts. * <p/> http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java index b7a92e7..57c91e7 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java @@ -132,15 +132,15 @@ public class CubeQueryRewriter { * removed. */ private void setupRewriters() { - // Rewrite base trees with expressions expanded - rewriters.add(new ExpressionResolver(conf)); // Resolve columns - the column alias and table alias rewriters.add(new ColumnResolver(conf)); // Rewrite base trees (groupby, having, orderby, limit) using aliases rewriters.add(new AliasReplacer(conf)); + ExpressionResolver exprResolver = new ExpressionResolver(conf); DenormalizationResolver denormResolver = new DenormalizationResolver(conf); CandidateTableResolver candidateTblResolver = new CandidateTableResolver(conf); StorageTableResolver storageTableResolver = new StorageTableResolver(conf); + rewriters.add(exprResolver); // De-normalized columns resolved rewriters.add(denormResolver); // Resolve candidate fact tables and dimension tables for columns queried @@ -160,18 +160,21 @@ public class CubeQueryRewriter { // Phase 1: resolve fact tables. rewriters.add(storageTableResolver); if (lightFactFirst) { + // Prune candidate tables for which denorm column references do not exist + rewriters.add(denormResolver); + // Prune candidate facts without any valid expressions + rewriters.add(exprResolver); rewriters.add(new LightestFactResolver(conf)); } // Phase 2: resolve fact table partitions. rewriters.add(storageTableResolver); rewriters.add(new MaxCoveringFactResolver(conf)); - if (!lightFactFirst) { - rewriters.add(new LightestFactResolver(conf)); - } // Phase 3: resolve dimension tables and partitions. rewriters.add(storageTableResolver); - // Check for candidate tables using de-normalized columns + // Prune candidate tables for which denorm column references do not exist rewriters.add(denormResolver); + // Prune candidate facts without any valid expressions + rewriters.add(exprResolver); rewriters.add(new LeastPartitionResolver(conf)); if (!lightFactFirst) { rewriters.add(new LightestFactResolver(conf)); http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java index e0f7bea..1a7b6b4 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java @@ -25,6 +25,8 @@ import java.util.*; import org.apache.lens.cube.metadata.*; import org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode; +import org.apache.lens.cube.parse.ExpressionResolver.ExprSpecContext; +import org.apache.lens.cube.parse.ExpressionResolver.ExpressionContext; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -150,11 +152,11 @@ public class DenormalizationResolver implements ContextRewriter { refCols.add(refer); // Add to optional tables if (refer.col.isChainedColumn()) { - cubeql.addOptionalDimTable(refer.col.getChainName(), refer.col.getRefColumn(), table, false); + cubeql.addOptionalDimTable(refer.col.getChainName(), table, false, refer.col.getRefColumn()); } else { for (TableReference reference : refer.col.getReferences()) { - cubeql.addOptionalDimTable(reference.getDestTable(), reference.getDestColumn(), table, false); + cubeql.addOptionalDimTable(reference.getDestTable(), table, false, reference.getDestColumn()); } } return true; @@ -317,6 +319,32 @@ public class DenormalizationResolver implements ContextRewriter { } } + private void addRefColsQueried(CubeQueryContext cubeql, TrackQueriedColumns tqc, DenormalizationContext denormCtx) { + for (Map.Entry<String, Set<String>> entry : tqc.getTblAliasToColumns().entrySet()) { + // skip default alias + if (entry.getKey() == CubeQueryContext.DEFAULT_TABLE) { + continue; + } + // skip join chain aliases + if (cubeql.getJoinchains().keySet().contains(entry.getKey().toLowerCase())) { + continue; + } + AbstractCubeTable tbl = cubeql.getCubeTableForAlias(entry.getKey()); + Set<String> columns = entry.getValue(); + for (String column : columns) { + CubeColumn col; + if (tbl instanceof CubeInterface) { + col = ((CubeInterface) tbl).getColumnByName(column); + } else { + col = ((Dimension) tbl).getColumnByName(column); + } + if (col instanceof ReferencedDimAtrribute) { + // considering all referenced dimensions to be denormalized columns + denormCtx.addReferencedCol(column, new ReferencedQueriedColumn((ReferencedDimAtrribute) col, tbl)); + } + } + } + } /** * Find all de-normalized columns, if these columns are not directly available in candidate tables, query will be * replaced with the corresponding table reference @@ -328,27 +356,13 @@ public class DenormalizationResolver implements ContextRewriter { // Adds all the reference dimensions as eligible for denorm fields denormCtx = new DenormalizationContext(cubeql); cubeql.setDeNormCtx(denormCtx); - for (Map.Entry<String, Set<String>> entry : cubeql.getTblAliasToColumns().entrySet()) { - // skip default alias - if (entry.getKey() == CubeQueryContext.DEFAULT_TABLE) { - continue; - } - // skip join chain aliases - if (cubeql.getJoinchains().keySet().contains(entry.getKey().toLowerCase())) { - continue; - } - AbstractCubeTable tbl = cubeql.getCubeTableForAlias(entry.getKey()); - Set<String> columns = entry.getValue(); - for (String column : columns) { - CubeColumn col; - if (tbl instanceof CubeInterface) { - col = ((CubeInterface) tbl).getColumnByName(column); - } else { - col = ((Dimension) tbl).getColumnByName(column); - } - if (col instanceof ReferencedDimAtrribute) { - // considering all referenced dimensions to be denormalized columns - denormCtx.addReferencedCol(column, new ReferencedQueriedColumn((ReferencedDimAtrribute) col, tbl)); + // add ref columns in cube + addRefColsQueried(cubeql, cubeql, denormCtx); + // add ref columns from expressions + for (Set<ExpressionContext> ecSet : cubeql.getExprCtx().getAllExprsQueried().values()) { + for (ExpressionContext ec : ecSet) { + for (ExprSpecContext esc : ec.getAllExprs()) { + addRefColsQueried(cubeql, esc, denormCtx); } } } http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/82553db3/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java index 5355049..f2a7039 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java @@ -19,129 +19,692 @@ package org.apache.lens.cube.parse; -import static org.apache.hadoop.hive.ql.parse.HiveParser.*; +import static org.apache.hadoop.hive.ql.parse.HiveParser.DOT; +import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier; +import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TABLE_OR_COL; + +import java.util.*; import org.apache.lens.cube.metadata.AbstractBaseTable; -import org.apache.lens.cube.metadata.DerivedCube; +import org.apache.lens.cube.metadata.AbstractCubeTable; +import org.apache.lens.cube.metadata.CubeColumn; +import org.apache.lens.cube.metadata.CubeInterface; import org.apache.lens.cube.metadata.Dimension; import org.apache.lens.cube.metadata.ExprColumn; +import org.apache.lens.cube.metadata.ExprColumn.ExprSpec; import org.apache.lens.cube.parse.HQLParser.ASTNodeVisitor; import org.apache.lens.cube.parse.HQLParser.TreeNode; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.antlr.runtime.CommonToken; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NonNull; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; + /** * Replaces expression with its AST in all query ASTs */ +@Slf4j class ExpressionResolver implements ContextRewriter { public ExpressionResolver(Configuration conf) { } - @Override - public void rewriteContext(CubeQueryContext cubeql) throws SemanticException { - resolveClause(cubeql, cubeql.getSelectAST()); - resolveClause(cubeql, cubeql.getWhereAST()); - resolveClause(cubeql, cubeql.getJoinTree()); - resolveClause(cubeql, cubeql.getGroupByAST()); - resolveClause(cubeql, cubeql.getHavingAST()); - resolveClause(cubeql, cubeql.getOrderByAST()); - - AggregateResolver.updateAggregates(cubeql.getSelectAST(), cubeql); - AggregateResolver.updateAggregates(cubeql.getHavingAST(), cubeql); - } + static class ExpressionContext { + @Getter + private final ExprColumn exprCol; + @Getter + private final AbstractBaseTable srcTable; + @Getter + private final String srcAlias; + @Getter + private Set<ExprSpecContext> allExprs = new LinkedHashSet<ExprSpecContext>(); + private Set<CandidateTable> directlyAvailableIn = new HashSet<CandidateTable>(); + private Map<CandidateTable, Set<ExprSpecContext>> evaluableExpressions = + new HashMap<CandidateTable, Set<ExprSpecContext>>(); + @Getter + private boolean hasMeasures = false; - private void resolveClause(final CubeQueryContext cubeql, ASTNode clause) throws SemanticException { - if (clause == null) { - return; + ExpressionContext(CubeQueryContext cubeql, ExprColumn exprCol, AbstractBaseTable srcTable, String srcAlias) + throws SemanticException { + this.srcTable = srcTable; + this.exprCol = exprCol; + this.srcAlias = srcAlias; + for (ExprSpec es : exprCol.getExpressionSpecs()) { + allExprs.add(new ExprSpecContext(es, cubeql)); + } + resolveColumnsAndAlias(cubeql); + } + private void resolveColumnsAndAlias(CubeQueryContext cubeql) throws SemanticException { + for (ExprSpecContext esc : allExprs) { + esc.resolveColumns(cubeql); + esc.replaceAliasInAST(cubeql, cubeql.getColToTableAlias()); + } + for (ExprSpecContext esc : allExprs) { + for (String table : esc.getTblAliasToColumns().keySet()) { + try { + if (!CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(table) && !srcAlias.equals(table)) { + cubeql.addOptionalDimTable(table, null, + false, esc.getTblAliasToColumns().get(table).toArray(new String[0])); + esc.exprDims.add((Dimension) cubeql.getCubeTableForAlias(table)); + } + } catch (HiveException e) { + throw new SemanticException(e); + } + } + } + resolveColumnsAndReplaceAlias(cubeql, allExprs, cubeql.getColToTableAlias()); } - // Traverse the tree and resolve expression columns - HQLParser.bft(clause, new ASTNodeVisitor() { - @Override - public void visit(TreeNode visited) throws SemanticException { - ASTNode node = visited.getNode(); - int childcount = node.getChildCount(); - for (int i = 0; i < childcount; i++) { - ASTNode current = (ASTNode) node.getChild(i); - if (current.getToken().getType() == TOK_TABLE_OR_COL && (node != null && node.getToken().getType() != DOT)) { - // Take child ident.totext - ASTNode ident = (ASTNode) current.getChild(0); - String column = ident.getText().toLowerCase(); - ASTNode childExpr = getExprAST(cubeql, column); - if (childExpr != null) { - node.setChild(i, replaceAlias(childExpr, cubeql)); + private void resolveColumnsAndReplaceAlias(CubeQueryContext cubeql, Set<ExprSpecContext> exprs, + Map<String, String> colToTableAlias) throws SemanticException { + Set<ExprSpecContext> nestedExpressions = new LinkedHashSet<ExprSpecContext>(); + for (ExprSpecContext esc : exprs) { + for (Map.Entry<String, Set<String>> entry : esc.tblAliasToColumns.entrySet()) { + if (entry.getKey().equals(CubeQueryContext.DEFAULT_TABLE)) { + continue; + } + AbstractBaseTable baseTable = (AbstractBaseTable)cubeql.getCubeTableForAlias(entry.getKey()); + // if (baseTable == null) { + // continue; + // } + Set<String> exprCols = new HashSet<String>(); + for (String col : entry.getValue()) { + // col is an expression + if (baseTable.getExpressionNames().contains(col)) { + exprCols.add(col); } - } else if (current.getToken().getType() == DOT) { - // This is for the case where column name is prefixed by table name - // or table alias - // For example 'select fact.id, dim2.id ...' - // Right child is the column name, left child.ident is table name - ASTNode tabident = HQLParser.findNodeByPath(current, TOK_TABLE_OR_COL, Identifier); - ASTNode colIdent = (ASTNode) current.getChild(1); + } + // get all combinations of expression replaced with inner exprs AST. + addAllNestedExpressions(cubeql, esc, baseTable, nestedExpressions, exprCols); + } + } + for (ExprSpecContext esc : nestedExpressions) { + esc.resolveColumns(cubeql); + esc.replaceAliasInAST(cubeql, colToTableAlias); + for (String table : esc.getTblAliasToColumns().keySet()) { + try { + if (!CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(table) && !srcAlias.equals(table)) { + cubeql.addOptionalDimTable(table, null, true, + esc.getTblAliasToColumns().get(table).toArray(new String[0])); + esc.exprDims.add((Dimension) cubeql.getCubeTableForAlias(table)); + } + } catch (HiveException e) { + throw new SemanticException(e); + } + } + } + exprs.addAll(nestedExpressions); + } - String column = colIdent.getText().toLowerCase(); + private void addAllNestedExpressions(CubeQueryContext cubeql, ExprSpecContext baseEsc, AbstractBaseTable baseTable, + Set<ExprSpecContext> nestedExpressions, Set<String> exprCols) throws SemanticException { + for (String col : exprCols) { + Set<ExprSpecContext> replacedExpressions = new LinkedHashSet<ExprSpecContext>(); + for (ExprSpec es : baseTable.getExpressionByName(col).getExpressionSpecs()) { + ASTNode finalAST = HQLParser.copyAST(baseEsc.getFinalAST()); + replaceColumnInAST(finalAST, col, es.getASTNode()); + ExprSpecContext replacedESC = new ExprSpecContext(baseEsc, es, finalAST, cubeql); + nestedExpressions.add(replacedESC); + replacedExpressions.add(replacedESC); + } + Set<String> remaining = new LinkedHashSet<String>(exprCols); + remaining.remove(col); + for (ExprSpecContext replacedESC : replacedExpressions) { + addAllNestedExpressions(cubeql, replacedESC, baseTable, nestedExpressions, remaining); + } + } + } - ASTNode childExpr = getExprAST(cubeql, tabident.getText().toLowerCase(), column); - if (childExpr != null) { - node.setChild(i, replaceAlias(childExpr, cubeql)); + void addDirectlyAvailable(CandidateTable cTable) { + directlyAvailableIn.add(cTable); + } + void addEvaluable(CubeQueryContext cubeql, CandidateTable cTable, ExprSpecContext esc) throws SemanticException { + Set<ExprSpecContext> evalSet = evaluableExpressions.get(cTable); + if (evalSet == null) { + evalSet = new LinkedHashSet<ExprSpecContext>(); + evaluableExpressions.put(cTable, evalSet); + // add optional dimensions involved in expressions + for (String table : esc.getTblAliasToColumns().keySet()) { + try { + if (!CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(table) && !srcAlias.equals(table)) { + cubeql.addOptionalDimTable(table, cTable, + false, esc.getTblAliasToColumns().get(table).toArray(new String[0])); + esc.exprDims.add((Dimension) cubeql.getCubeTableForAlias(table)); } + } catch (HiveException e) { + throw new SemanticException(e); } } } - - }); + evalSet.add(esc); + } + Set<ASTNode> getAllASTNodes() { + Set<ASTNode> allAST = new HashSet<ASTNode>(); + for (ExprSpecContext esc : allExprs) { + allAST.add(esc.finalAST); + } + return allAST; + } } - private ASTNode getExprAST(final CubeQueryContext cubeql, String table, String column) throws SemanticException { - if (cubeql.getQueriedTable(table) == null) { - if (!cubeql.addQueriedTable(table)) { - throw new SemanticException(ErrorMsg.NEITHER_CUBE_NOR_DIMENSION); + static class ExprSpecContext implements TrackQueriedColumns { + @Getter + private Set<ExprSpec> exprSpecs = new LinkedHashSet<ExprSpec>(); + @Getter + private ASTNode finalAST; + private Set<Dimension> exprDims = new HashSet<Dimension>(); + // for each expression store alias to columns queried + @Getter + private Map<String, Set<String>> tblAliasToColumns = new HashMap<String, Set<String>>(); + + ExprSpecContext(ExprSpec exprSpec, CubeQueryContext cubeql) throws SemanticException { + // replaces table names in expression with aliases in the query + finalAST = replaceAlias(exprSpec.getASTNode(), cubeql); + exprSpecs.add(exprSpec); + } + public ExprSpecContext(ExprSpecContext nested, ExprSpec current, ASTNode node, + CubeQueryContext cubeql) throws SemanticException { + exprSpecs.addAll(nested.exprSpecs); + exprSpecs.add(current); + finalAST = replaceAlias(node, cubeql); + } + public void replaceAliasInAST(CubeQueryContext cubeql, Map<String, String> colToTableAlias) + throws SemanticException { + AliasReplacer.extractTabAliasForCol(colToTableAlias, cubeql, this); + AliasReplacer.replaceAliases(finalAST, 0, colToTableAlias); + } + public void addColumnsQueried(String alias, String column) { + Set<String> cols = tblAliasToColumns.get(alias.toLowerCase()); + if (cols == null) { + cols = new HashSet<String>(); + tblAliasToColumns.put(alias.toLowerCase(), cols); } + cols.add(column); } - if (!(cubeql.getQueriedTable(table) instanceof AbstractBaseTable)) { + + void resolveColumns(CubeQueryContext cubeql) throws SemanticException { + // finds all columns and table aliases in the expression + ColumnResolver.getColsForTree(cubeql, finalAST, this); + } + + Date getStartTime() { + Set<Date> startTimes = new HashSet<Date>(); + for (ExprSpec es : exprSpecs) { + if (es.getStartTime() != null) { + startTimes.add(es.getStartTime()); + } + } + if (!startTimes.isEmpty()) { + return Collections.max(startTimes); + } return null; } - if (((AbstractBaseTable) cubeql.getQueriedTable(table)).getExpressionByName(column) == null) { + + Date getEndTime() { + Set<Date> endTimes = new HashSet<Date>(); + for (ExprSpec es : exprSpecs) { + if (es.getEndTime() != null) { + endTimes.add(es.getEndTime()); + } + } + if (!endTimes.isEmpty()) { + return Collections.min(endTimes); + } return null; } - return ((AbstractBaseTable) cubeql.getQueriedTable(table)).getExpressionByName(column).getAst(); + + public boolean isValidInTimeRange(final TimeRange range) { + return isValidFrom(range.getFromDate()) && isValidTill(range.getToDate()); + } + + public boolean isValidFrom(@NonNull final Date date) { + return (getStartTime() == null) ? true : date.equals(getStartTime()) || date.after(getStartTime()); + } + + public boolean isValidTill(@NonNull final Date date) { + return (getEndTime() == null) ? true : date.equals(getEndTime()) || date.before(getEndTime()); + } + + public String toString() { + return HQLParser.getString(finalAST); + } + } + + @AllArgsConstructor + @ToString + private static class PickedExpression { + private String srcAlias; + private ExprSpecContext pickedCtx; } - private ASTNode getExprAST(final CubeQueryContext cubeql, final String column) throws SemanticException { - ExprColumn expr = null; - AbstractBaseTable table = null; - if (cubeql.getCube() != null && !(cubeql.getCube() instanceof DerivedCube)) { - // no expression resolver for derived cubes - if (cubeql.getCube().getExpressionNames().contains(column.toLowerCase())) { - expr = cubeql.getCube().getExpressionByName(column); - table = (AbstractBaseTable) cubeql.getCube(); + static class ExpressionResolverContext { + @Getter + private Map<String, Set<ExpressionContext>> allExprsQueried = new HashMap<String, Set<ExpressionContext>>(); + private Map<String, Set<PickedExpression>> pickedExpressions = new HashMap<String, Set<PickedExpression>>(); + private final CubeQueryContext cubeql; + + ExpressionResolverContext(CubeQueryContext cubeql) { + this.cubeql = cubeql; + } + void addExpressionQueried(ExpressionContext expr) { + String exprCol = expr.getExprCol().getName().toLowerCase(); + Set<ExpressionContext> ecSet = allExprsQueried.get(exprCol); + if (ecSet == null) { + ecSet = new LinkedHashSet<ExpressionContext>(); + allExprsQueried.put(exprCol, ecSet); + } + ecSet.add(expr); + } + + boolean isQueriedExpression(String column) { + return allExprsQueried.containsKey(column); + } + + boolean hasAggregates() { + for (Set<ExpressionContext> ecSet : allExprsQueried.values()) { + for (ExpressionContext ec : ecSet) { + for (ExprSpecContext esc : ec.allExprs) { + if (HQLParser.isAggregateAST(esc.finalAST)) { + return true; + } + } + } + } + return false; + } + + ExpressionContext getExpressionContext(String expr, String alias) { + for (ExpressionContext ec : allExprsQueried.get(expr)) { + if (ec.getSrcAlias().equals(alias)) { + return ec; + } + } + throw new IllegalArgumentException("no expression available for " + expr + " alias:" + alias); + } + + public boolean hasMeasures(String expr, CubeInterface cube) { + String alias = cubeql.getAliasForTableName(cube.getName()); + ExpressionContext ec = getExpressionContext(expr, alias); + boolean hasMeasures = false; + for (ExprSpecContext esc : ec.allExprs) { + if (esc.getTblAliasToColumns().get(alias) != null) { + for (String cubeCol : esc.getTblAliasToColumns().get(alias)) { + if (cube.getMeasureByName(cubeCol) != null) { + hasMeasures = true; + break; + } + } + } } + ec.hasMeasures = hasMeasures; + return hasMeasures; } - if (cubeql.getDimensions() != null) { - for (Dimension dim : cubeql.getDimensions()) { - if (dim.getExpressionNames().contains(column.toLowerCase())) { - if (expr != null) { - throw new SemanticException(ErrorMsg.AMBIGOUS_DIM_COLUMN, table.getName(), dim.getName()); + + //updates all expression specs which are evaluable + public void updateEvaluables(String expr, CandidateTable cTable) + throws SemanticException { + String alias = cubeql.getAliasForTableName(cTable.getBaseTable().getName()); + ExpressionContext ec = getExpressionContext(expr, alias); + if (cTable.getColumns().contains(expr)) { + // expression is directly materialized in candidate table + ec.addDirectlyAvailable(cTable); + } + for (ExprSpecContext esc : ec.allExprs) { + if (esc.getTblAliasToColumns().get(alias) == null) { + ec.addEvaluable(cubeql, cTable, esc); + } else { + Set<String> columns = esc.getTblAliasToColumns().get(alias); + boolean isEvaluable = true; + for (String col : columns) { + if (!cTable.getColumns().contains(col.toLowerCase())) { + if (!cubeql.getDeNormCtx().addRefUsage(cTable, col, cTable.getBaseTable().getName())) { + // check if it is available as reference, if not expression is not evaluable + isEvaluable = false; + break; + } + } + } + if (isEvaluable) { + ec.addEvaluable(cubeql, cTable, esc); } - expr = dim.getExpressionByName(column); - table = dim; } } } - if (expr == null) { + + // checks if expr is evaluable + public boolean isEvaluable(String expr, CandidateTable cTable) { + ExpressionContext ec = getExpressionContext(expr, cubeql.getAliasForTableName(cTable.getBaseTable().getName())); + if (ec.directlyAvailableIn.contains(cTable)) { + return true; + } + if (ec.evaluableExpressions.get(cTable) == null) { + return false; + } + return !ec.evaluableExpressions.get(cTable).isEmpty(); + } + + /** + * + * @param exprs + * @return + */ + public boolean allNotEvaluable(Set<String> exprs, CandidateTable cTable) { + for (String expr : exprs) { + if (!isEvaluable(expr, cTable)) { + return true; + } + } + return false; + } + + public Collection<String> coveringExpressions(Set<String> exprs, CandidateTable cTable) { + Set<String> coveringSet = new HashSet<String>(); + for (String expr : exprs) { + if (isEvaluable(expr, cTable)) { + coveringSet.add(expr); + } + } + return coveringSet; + } + + /** + * Returns true if all passed expressions are evaluable + * + * @param cTable + * @param exprs + * @return + */ + public boolean allEvaluable(CandidateTable cTable, Set<String> exprs) { + for (String expr : exprs) { + if (!isEvaluable(expr, cTable)) { + return false; + } + } + return true; + } + + public Set<Dimension> rewriteExprCtx(CandidateFact cfact, Map<Dimension, CandidateDim> dimsToQuery, + boolean replaceFact) throws SemanticException { + Set<Dimension> exprDims = new HashSet<Dimension>(); + if (!allExprsQueried.isEmpty()) { + // pick expressions for fact + if (cfact != null) { + pickExpressionsForTable(cfact); + } + // pick expressions for dimensions + if (dimsToQuery != null && !dimsToQuery.isEmpty()) { + for (CandidateDim cdim : dimsToQuery.values()) { + pickExpressionsForTable(cdim); + } + } + // Replace picked expressions in all the base trees + replacePickedExpressions(cfact, replaceFact); + for (Set<PickedExpression> peSet : pickedExpressions.values()) { + for (PickedExpression pe : peSet) { + exprDims.addAll(pe.pickedCtx.exprDims); + } + } + } + pickedExpressions.clear(); + return exprDims; + } + + private void replacePickedExpressions(CandidateFact cfact, boolean replaceFact) + throws SemanticException { + if (replaceFact) { + replaceAST(cubeql, cfact.getSelectAST()); + replaceAST(cubeql, cfact.getWhereAST()); + replaceAST(cubeql, cfact.getJoinTree()); + replaceAST(cubeql, cfact.getGroupByAST()); + replaceAST(cubeql, cfact.getHavingAST()); + } else { + replaceAST(cubeql, cubeql.getSelectAST()); + replaceAST(cubeql, cubeql.getWhereAST()); + replaceAST(cubeql, cubeql.getJoinTree()); + replaceAST(cubeql, cubeql.getGroupByAST()); + replaceAST(cubeql, cubeql.getHavingAST()); + } + replaceAST(cubeql, cubeql.getOrderByAST()); + } + + private void replaceAST(final CubeQueryContext cubeql, ASTNode node) throws SemanticException { + if (node == null) { + return; + } + // Traverse the tree and resolve expression columns + HQLParser.bft(node, new ASTNodeVisitor() { + @Override + public void visit(TreeNode visited) throws SemanticException { + ASTNode node = visited.getNode(); + int childcount = node.getChildCount(); + for (int i = 0; i < childcount; i++) { + ASTNode current = (ASTNode) node.getChild(i); + if (current.getToken().getType() == DOT) { + // This is for the case where column name is prefixed by table name + // or table alias + // For example 'select fact.id, dim2.id ...' + // Right child is the column name, left child.ident is table name + ASTNode tabident = HQLParser.findNodeByPath(current, TOK_TABLE_OR_COL, Identifier); + ASTNode colIdent = (ASTNode) current.getChild(1); + String column = colIdent.getText().toLowerCase(); + + if (pickedExpressions.containsKey(column)) { + PickedExpression expr = getPickedExpression(column, tabident.getText().toLowerCase()); + if (expr != null) { + node.setChild(i, replaceAlias(expr.pickedCtx.finalAST, cubeql)); + } + } + } + } + } + }); + } + + private PickedExpression getPickedExpression(String column, String alias) { + Set<PickedExpression> peSet = pickedExpressions.get(column); + if (peSet != null && !peSet.isEmpty()) { + for (PickedExpression picked : peSet) { + if (picked.srcAlias.equals(alias)) { + return picked; + } + } + } return null; } - return expr.getAst(); + + private void pickExpressionsForTable(CandidateTable cTable) { + for (Map.Entry<String, Set<ExpressionContext>> ecEntry : allExprsQueried.entrySet()) { + Set<ExpressionContext> ecSet = ecEntry.getValue(); + for (ExpressionContext ec : ecSet) { + if (ec.getSrcTable().getName().equals(cTable.getBaseTable().getName())) { + if (!ec.directlyAvailableIn.contains(cTable)) { + if (ec.evaluableExpressions.get(cTable) != null && !ec.evaluableExpressions.get(cTable).isEmpty()) { + // pick first evaluable expression + Set<PickedExpression> peSet = pickedExpressions.get(ecEntry.getKey()); + if (peSet == null) { + peSet = new HashSet<PickedExpression>(); + pickedExpressions.put(ecEntry.getKey(), peSet); + } + peSet.add(new PickedExpression(ec.srcAlias, ec.evaluableExpressions.get(cTable).iterator().next())); + } + } + } + } + } + } + + void pruneExpressions() { + for (Set<ExpressionContext> ecSet : allExprsQueried.values()) { + for (ExpressionContext ec : ecSet) { + Set<ExprSpecContext> removedEsc = new HashSet<ExprSpecContext>(); + for(Iterator<ExprSpecContext> iterator = ec.getAllExprs().iterator(); iterator.hasNext();) { + ExprSpecContext esc = iterator.next(); + boolean removed = false; + // Go over expression dims and remove expression involving dimensions for which candidate tables are + // not there + for (Dimension exprDim : esc.exprDims) { + if (cubeql.getCandidateDims().get(exprDim) == null || cubeql.getCandidateDims().get(exprDim).isEmpty()) { + log.info("Removing expression {} as {} it does not have any candidate tables", esc, exprDim); + iterator.remove(); + removedEsc.add(esc); + removed = true; + break; + } + } + if (removed) { + continue; + } + //remove expressions which are not valid in the timerange queried + // If an expression is defined as + // ex = a + b // from t1 to t2; + // ex = c + d // from t2 to t3 + // With range queried, invalid expressions will be removed + // If range is including more than one expression, queries can be unioned as an improvement at later time. + // But for now, they are not eligible expressions + for (TimeRange range : cubeql.getTimeRanges()) { + if (!esc.isValidInTimeRange(range)) { + log.info("Removing expression " + esc + " as it is not valid in timerange queried"); + iterator.remove(); + removedEsc.add(esc); + removed = true; + break; + } + } + if (removed) { + continue; + } + // Go over expressions and remove expression containing unavailable columns in timerange + // In the example above, + // if ex = a +b ; and a is not available in timerange queried, it will be removed. + for (TimeRange range : cubeql.getTimeRanges()) { + boolean toRemove = false; + for (Map.Entry<String, Set<String>> entry : esc.getTblAliasToColumns().entrySet()) { + if (CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(entry.getKey())) { + continue; + } + AbstractBaseTable baseTable = (AbstractBaseTable) cubeql.getCubeTableForAlias(entry.getKey()); + for (String col : entry.getValue()) { + if (!baseTable.getColumnByName(col).isColumnAvailableInTimeRange(range)) { + toRemove = true; + break; + } + } + if (toRemove) { + break; + } + } + if (toRemove) { + log.info("Removing expression " + esc + " as its columns are unavailable in timerange queried"); + iterator.remove(); + removedEsc.add(esc); + removed = true; + break; + } + } + } + for (Set<ExprSpecContext> evalSet : ec.evaluableExpressions.values()) { + evalSet.removeAll(removedEsc); + } + } + } + } + } + + @Override + public void rewriteContext(CubeQueryContext cubeql) throws SemanticException { + ExpressionResolverContext exprCtx = cubeql.getExprCtx(); + if (exprCtx == null) { + exprCtx = new ExpressionResolverContext(cubeql); + cubeql.setExprCtx(exprCtx); + for (Map.Entry<String, Set<String>> entry : cubeql.getTblAliasToColumns().entrySet()) { + String alias = entry.getKey(); + // skip default alias + if (alias == CubeQueryContext.DEFAULT_TABLE) { + continue; + } + AbstractCubeTable tbl = cubeql.getCubeTableForAlias(alias); + Set<String> columns = entry.getValue(); + for (String column : columns) { + CubeColumn col; + if (tbl instanceof CubeInterface) { + col = ((CubeInterface) tbl).getColumnByName(column); + } else { + col = ((Dimension) tbl).getColumnByName(column); + } + if (col instanceof ExprColumn) { + exprCtx.addExpressionQueried(new ExpressionContext(cubeql, (ExprColumn)col, (AbstractBaseTable)tbl, alias)); + } + } + } + Set<String> exprsWithMeasures = new HashSet<String>(); + for (String expr : cubeql.getQueriedExprs()) { + if (cubeql.getExprCtx().hasMeasures(expr, cubeql.getCube())) { + // expression has measures + exprsWithMeasures.add(expr); + } + } + cubeql.addQueriedExprsWithMeasures(exprsWithMeasures); + + } else { + // prune invalid expressions + cubeql.getExprCtx().pruneExpressions(); + // prune candidate facts without any valid expressions + if (cubeql.getCube() != null && !cubeql.getCandidateFacts().isEmpty()) { + for (Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); i.hasNext();) { + CandidateFact cfact = i.next(); + for (Map.Entry<String, Set<ExpressionContext>> ecEntry : exprCtx.allExprsQueried.entrySet()) { + Set<ExpressionContext> ecSet = ecEntry.getValue(); + for (ExpressionContext ec : ecSet) { + if (ec.getSrcTable().getName().equals(cfact.getBaseTable().getName())) { + if (!ec.directlyAvailableIn.contains(cfact) + && (ec.evaluableExpressions.get(cfact) == null + || ec.evaluableExpressions.get(cfact).isEmpty())) { + log.info("Not considering fact table:{} as {} is not evaluable", cfact, ec.exprCol.getName()); + cubeql.addFactPruningMsgs(cfact.fact, + CandidateTablePruneCause.expressionNotEvaluable(ec.exprCol.getName())); + i.remove(); + } + } + } + } + } + } + // prune candidate dims without any valid expressions + if (cubeql.getDimensions() != null && !cubeql.getDimensions().isEmpty()) { + for (Dimension dim : cubeql.getDimensions()) { + for (Iterator<CandidateDim> i = cubeql.getCandidateDimTables().get(dim).iterator(); i.hasNext();) { + CandidateDim cdim = i.next(); + for (Map.Entry<String, Set<ExpressionContext>> ecEntry : exprCtx.allExprsQueried.entrySet()) { + Set<ExpressionContext> ecSet = ecEntry.getValue(); + for (ExpressionContext ec : ecSet) { + if (ec.getSrcTable().getName().equals(cdim.getBaseTable().getName())) { + if (!ec.directlyAvailableIn.contains(cdim) + && (ec.evaluableExpressions.get(cdim) == null + || ec.evaluableExpressions.get(cdim).isEmpty())) { + log.info("Not considering dim table:{} as {} is not evaluable", cdim, ec.exprCol.getName()); + cubeql.addDimPruningMsgs(dim, cdim.dimtable, + CandidateTablePruneCause.expressionNotEvaluable(ec.exprCol.getName())); + i.remove(); + } + } + } + } + } + } + } + } } - private ASTNode replaceAlias(final ASTNode expr, final CubeQueryContext cubeql) throws SemanticException { + private static ASTNode replaceAlias(final ASTNode expr, final CubeQueryContext cubeql) throws SemanticException { ASTNode finalAST = HQLParser.copyAST(expr); HQLParser.bft(finalAST, new ASTNodeVisitor() { @Override @@ -166,4 +729,43 @@ class ExpressionResolver implements ContextRewriter { }); return finalAST; } + + private static void replaceColumnInAST(ASTNode expr, final String toReplace, final ASTNode columnAST) + throws SemanticException { + if (expr == null) { + return; + } + // Traverse the tree and resolve expression columns + HQLParser.bft(expr, new ASTNodeVisitor() { + @Override + public void visit(TreeNode visited) throws SemanticException { + ASTNode node = visited.getNode(); + int childcount = node.getChildCount(); + for (int i = 0; i < childcount; i++) { + ASTNode current = (ASTNode) node.getChild(i); + if (current.getToken().getType() == TOK_TABLE_OR_COL && (node != null && node.getToken().getType() != DOT)) { + // Take child ident.totext + ASTNode ident = (ASTNode) current.getChild(0); + String column = ident.getText().toLowerCase(); + if (toReplace.equals(column)) { + node.setChild(i, HQLParser.copyAST(columnAST)); + } + } else if (current.getToken().getType() == DOT) { + // This is for the case where column name is prefixed by table name + // or table alias + // For example 'select fact.id, dim2.id ...' + // Right child is the column name, left child.ident is table name + ASTNode tabident = HQLParser.findNodeByPath(current, TOK_TABLE_OR_COL, Identifier); + ASTNode colIdent = (ASTNode) current.getChild(1); + + String column = colIdent.getText().toLowerCase(); + + if (toReplace.equals(column)) { + node.setChild(i, HQLParser.copyAST(columnAST)); + } + } + } + } + }); + } }
