http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/JoinPath.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/JoinPath.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/JoinPath.java new file mode 100644 index 0000000..48f04bb --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/JoinPath.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.metadata.join; + +import java.util.*; + +import org.apache.lens.cube.metadata.AbstractCubeTable; + +/** + * A list of table relationships that can be combined to get a join clause + */ +public class JoinPath { + final List<TableRelationship> edges; + // Store the map of a table against all columns of that table which are in the path + private Map<AbstractCubeTable, List<String>> columnsForTable = new HashMap<>(); + + public JoinPath() { + edges = new ArrayList<>(); + } + + public JoinPath(JoinPath other) { + edges = new ArrayList<>(other.edges); + } + + public void initColumnsForTable() { + if (!columnsForTable.isEmpty()) { + // already initialized + return; + } + for (TableRelationship edge : edges) { + addColumnsForEdge(edge); + } + } + + public void addEdge(TableRelationship edge) { + edges.add(edge); + } + + public boolean isEmpty() { + return edges.isEmpty(); + } + + public List<TableRelationship> getEdges() { + return edges; + } + + private void addColumnsForEdge(TableRelationship edge) { + addColumn(edge.getFromTable(), edge.getFromColumn()); + addColumn(edge.getToTable(), edge.getToColumn()); + } + + private void addColumn(AbstractCubeTable table, String column) { + if (table == null || column == null) { + return; + } + List<String> columns = columnsForTable.get(table); + if (columns == null) { + columns = new ArrayList<>(); + columnsForTable.put(table, columns); + } + columns.add(column); + } + + public List<String> getColumnsForTable(AbstractCubeTable table) { + return columnsForTable.get(table); + } + + public Set<AbstractCubeTable> getAllTables() { + return columnsForTable.keySet(); + } + + public boolean containsColumnOfTable(String column, AbstractCubeTable table) { + for (TableRelationship edge : edges) { + if ((table.equals(edge.getFromTable()) && column.equals(edge.getFromColumn())) + || table.equals(edge.getToTable()) && column.equals(edge.getToColumn())) { + return true; + } + } + return false; + } + + public String toString() { + return edges.toString(); + } +}
http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/TableRelationship.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/TableRelationship.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/TableRelationship.java new file mode 100644 index 0000000..dabb9ef --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/join/TableRelationship.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.metadata.join; + +import org.apache.lens.cube.metadata.AbstractCubeTable; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.RequiredArgsConstructor; + +/* + * An edge in the schema graph + */ +@Data +@AllArgsConstructor +@RequiredArgsConstructor +public class TableRelationship { + final String fromColumn; + final AbstractCubeTable fromTable; + final String toColumn; + final AbstractCubeTable toTable; + boolean mapsToMany = false; + + @Override + public String toString() { + return fromTable.getName() + "." + fromColumn + "->" + toTable.getName() + "." + toColumn + + (mapsToMany ? "[n]" : ""); + } + +} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/AutoJoinContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/AutoJoinContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/AutoJoinContext.java deleted file mode 100644 index 7f13c6c..0000000 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/AutoJoinContext.java +++ /dev/null @@ -1,760 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.lens.cube.parse; - -import java.util.*; - -import org.apache.lens.cube.error.LensCubeErrorCode; -import org.apache.lens.cube.metadata.*; -import org.apache.lens.server.api.error.LensException; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.ql.parse.JoinType; - -import lombok.Getter; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; - -/** - * Store join chain information resolved by join resolver - */ -@Slf4j -public class AutoJoinContext { - // Map of a joined table to list of all possible paths from that table to - // the target - private final Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths; - @Getter - // User supplied partial join conditions - private final Map<AbstractCubeTable, String> partialJoinConditions; - // True if the query contains user supplied partial join conditions - @Getter - private final boolean partialJoinChains; - @Getter - // Map of joined table to the join type (if provided by user) - private final Map<AbstractCubeTable, JoinType> tableJoinTypeMap; - - // True if joins were resolved automatically - private boolean joinsResolved; - // Target table for the auto join resolver - private final AbstractCubeTable autoJoinTarget; - // Configuration string to control join type - private String joinTypeCfg; - - // Map of a joined table to its columns which are part of any of the join - // paths. This is used in candidate table resolver - @Getter - private Map<Dimension, Map<AbstractCubeTable, List<String>>> joinPathFromColumns = - new HashMap<Dimension, Map<AbstractCubeTable, List<String>>>(); - - @Getter - private Map<Dimension, Map<AbstractCubeTable, List<String>>> joinPathToColumns = - new HashMap<Dimension, Map<AbstractCubeTable, List<String>>>(); - - // there can be separate join clause for each fact incase of multi fact queries - @Getter - Map<CandidateFact, JoinClause> factClauses = new HashMap<CandidateFact, JoinClause>(); - @Getter - @Setter - JoinClause minCostClause; - private final boolean flattenBridgeTables; - private final String bridgeTableFieldAggr; - - public AutoJoinContext(Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths, - Map<Dimension, CubeQueryContext.OptionalDimCtx> optionalDimensions, - Map<AbstractCubeTable, String> partialJoinConditions, - boolean partialJoinChains, Map<AbstractCubeTable, JoinType> tableJoinTypeMap, - AbstractCubeTable autoJoinTarget, String joinTypeCfg, boolean joinsResolved, - boolean flattenBridgeTables, String bridgeTableFieldAggr) { - this.allPaths = allPaths; - initJoinPathColumns(); - this.partialJoinConditions = partialJoinConditions; - this.partialJoinChains = partialJoinChains; - this.tableJoinTypeMap = tableJoinTypeMap; - this.autoJoinTarget = autoJoinTarget; - this.joinTypeCfg = joinTypeCfg; - this.joinsResolved = joinsResolved; - this.flattenBridgeTables = flattenBridgeTables; - this.bridgeTableFieldAggr = bridgeTableFieldAggr; - log.debug("All join paths:{}", allPaths); - log.debug("Join path from columns:{}", joinPathFromColumns); - log.debug("Join path to columns:{}", joinPathToColumns); - } - - public AbstractCubeTable getAutoJoinTarget() { - return autoJoinTarget; - } - - private JoinClause getJoinClause(CandidateFact fact) { - if (fact == null || !factClauses.containsKey(fact)) { - return minCostClause; - } - return factClauses.get(fact); - } - - // Populate map of tables to their columns which are present in any of the - // join paths - private void initJoinPathColumns() { - for (List<SchemaGraph.JoinPath> paths : allPaths.values()) { - for (int i = 0; i < paths.size(); i++) { - SchemaGraph.JoinPath jp = paths.get(i); - jp.initColumnsForTable(); - } - } - refreshJoinPathColumns(); - } - - public void refreshJoinPathColumns() { - joinPathFromColumns.clear(); - joinPathToColumns.clear(); - for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> joinPathEntry : allPaths.entrySet()) { - List<SchemaGraph.JoinPath> joinPaths = joinPathEntry.getValue(); - Map<AbstractCubeTable, List<String>> fromColPaths = joinPathFromColumns.get(joinPathEntry.getKey().getObject()); - Map<AbstractCubeTable, List<String>> toColPaths = joinPathToColumns.get(joinPathEntry.getKey().getObject()); - if (fromColPaths == null) { - fromColPaths = new HashMap<AbstractCubeTable, List<String>>(); - joinPathFromColumns.put(joinPathEntry.getKey().getObject(), fromColPaths); - } - - if (toColPaths == null) { - toColPaths = new HashMap<AbstractCubeTable, List<String>>(); - joinPathToColumns.put(joinPathEntry.getKey().getObject(), toColPaths); - } - populateJoinPathCols(joinPaths, fromColPaths, toColPaths); - } - } - - private void populateJoinPathCols(List<SchemaGraph.JoinPath> joinPaths, - Map<AbstractCubeTable, List<String>> fromPathColumns, Map<AbstractCubeTable, List<String>> toPathColumns) { - for (SchemaGraph.JoinPath path : joinPaths) { - for (SchemaGraph.TableRelationship edge : path.getEdges()) { - AbstractCubeTable fromTable = edge.getFromTable(); - String fromColumn = edge.getFromColumn(); - List<String> columnsOfFromTable = fromPathColumns.get(fromTable); - if (columnsOfFromTable == null) { - columnsOfFromTable = new ArrayList<String>(); - fromPathColumns.put(fromTable, columnsOfFromTable); - } - columnsOfFromTable.add(fromColumn); - - // Similarly populate for the 'to' table - AbstractCubeTable toTable = edge.getToTable(); - String toColumn = edge.getToColumn(); - List<String> columnsOfToTable = toPathColumns.get(toTable); - if (columnsOfToTable == null) { - columnsOfToTable = new ArrayList<String>(); - toPathColumns.put(toTable, columnsOfToTable); - } - columnsOfToTable.add(toColumn); - } - } - } - - public void removeJoinedTable(Dimension dim) { - allPaths.remove(Aliased.create(dim)); - joinPathFromColumns.remove(dim); - } - - public Map<AbstractCubeTable, String> getPartialJoinConditions() { - return partialJoinConditions; - } - - public String getFromString(String fromTable, CandidateFact fact, Set<Dimension> qdims, - Map<Dimension, CandidateDim> dimsToQuery, CubeQueryContext cubeql) throws LensException { - String fromString = fromTable; - log.info("All paths dump:{}", cubeql.getAutoJoinCtx().getAllPaths()); - if (qdims == null || qdims.isEmpty()) { - return fromString; - } - // Compute the merged join clause string for the min cost joinclause - String clause = getMergedJoinClause(cubeql, cubeql.getAutoJoinCtx().getJoinClause(fact), dimsToQuery); - - fromString += clause; - return fromString; - } - - // Some refactoring needed to account for multiple join paths - public String getMergedJoinClause(CubeQueryContext cubeql, JoinClause joinClause, - Map<Dimension, CandidateDim> dimsToQuery) { - Set<String> clauses = new LinkedHashSet<String>(); - String joinTypeStr = ""; - JoinType joinType = JoinType.INNER; - - // this flag is set to true if user has specified a partial join chain - if (!partialJoinChains) { - // User has not specified any join conditions. In this case, we rely on - // configuration for the join type - if (StringUtils.isNotBlank(joinTypeCfg)) { - joinType = JoinType.valueOf(joinTypeCfg.toUpperCase()); - joinTypeStr = JoinResolver.getJoinTypeStr(joinType); - } - } - - Iterator<JoinTree> iter = joinClause.getJoinTree().dft(); - boolean hasBridgeTable = false; - boolean initedBridgeClauses = false; - StringBuilder bridgeSelectClause = new StringBuilder(); - StringBuilder bridgeFromClause = new StringBuilder(); - StringBuilder bridgeFilterClause = new StringBuilder(); - StringBuilder bridgeJoinClause = new StringBuilder(); - StringBuilder bridgeGroupbyClause = new StringBuilder(); - - while (iter.hasNext()) { - JoinTree cur = iter.next(); - if (partialJoinChains) { - joinType = cur.getJoinType(); - joinTypeStr = JoinResolver.getJoinTypeStr(joinType); - } - SchemaGraph.TableRelationship rel = cur.parentRelationship; - String toAlias, fromAlias; - fromAlias = cur.parent.getAlias(); - toAlias = cur.getAlias(); - hasBridgeTable = flattenBridgeTables && (hasBridgeTable || rel.isMapsToMany()); - // We have to push user specified filters for the joined tables - String userFilter = null; - // Partition condition on the tables also needs to be pushed depending - // on the join - String storageFilter = null; - - if (JoinType.INNER == joinType || JoinType.LEFTOUTER == joinType || JoinType.LEFTSEMI == joinType) { - // For inner and left joins push filter of right table - userFilter = partialJoinConditions.get(rel.getToTable()); - if (partialJoinConditions.containsKey(rel.getFromTable())) { - if (StringUtils.isNotBlank(userFilter)) { - userFilter += (" AND " + partialJoinConditions.get(rel.getFromTable())); - } else { - userFilter = partialJoinConditions.get(rel.getFromTable()); - } - } - storageFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias); - dimsToQuery.get(rel.getToTable()).setWhereClauseAdded(); - } else if (JoinType.RIGHTOUTER == joinType) { - // For right outer joins, push filters of left table - userFilter = partialJoinConditions.get(rel.getFromTable()); - if (partialJoinConditions.containsKey(rel.getToTable())) { - if (StringUtils.isNotBlank(userFilter)) { - userFilter += (" AND " + partialJoinConditions.get(rel.getToTable())); - } else { - userFilter = partialJoinConditions.get(rel.getToTable()); - } - } - if (rel.getFromTable() instanceof Dimension) { - storageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias); - dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded(); - } - } else if (JoinType.FULLOUTER == joinType) { - // For full outer we need to push filters of both left and right - // tables in the join clause - String leftFilter = null, rightFilter = null; - String leftStorageFilter = null, rightStorgeFilter = null; - - if (StringUtils.isNotBlank(partialJoinConditions.get(rel.getFromTable()))) { - leftFilter = partialJoinConditions.get(rel.getFromTable()) + " and "; - } - - if (rel.getFromTable() instanceof Dimension) { - leftStorageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias); - if (StringUtils.isNotBlank((leftStorageFilter))) { - dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded(); - } - } - - if (StringUtils.isNotBlank(partialJoinConditions.get(rel.getToTable()))) { - rightFilter = partialJoinConditions.get(rel.getToTable()); - } - - rightStorgeFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias); - if (StringUtils.isNotBlank(rightStorgeFilter)) { - if (StringUtils.isNotBlank((leftStorageFilter))) { - leftStorageFilter += " and "; - } - dimsToQuery.get(rel.getToTable()).setWhereClauseAdded(); - } - - userFilter = (leftFilter == null ? "" : leftFilter) + (rightFilter == null ? "" : rightFilter); - storageFilter = - (leftStorageFilter == null ? "" : leftStorageFilter) - + (rightStorgeFilter == null ? "" : rightStorgeFilter); - } - StringBuilder clause = new StringBuilder(); - - // if a bridge table is present in the path - if (hasBridgeTable) { - // if any relation has bridge table, the clause becomes the following : - // join (" select " + joinkey + " aggr over fields from bridge table + from bridgeTable + [where user/storage - // filters] + groupby joinkey) on joincond" - // Or - // " join (select " + joinkey + " aggr over fields from table reached through bridge table + from bridge table - // join <next tables> on join condition + [and user/storage filters] + groupby joinkey) on joincond - if (!initedBridgeClauses) { - // we just found a bridge table in the path we need to initialize the clauses for subquery required for - // aggregating fields of bridge table - // initiliaze select clause with join key - bridgeSelectClause.append(" (select ").append(toAlias).append(".").append(rel.getToColumn()).append(" as ") - .append(rel.getToColumn()); - // group by join key - bridgeGroupbyClause.append(" group by ").append(toAlias).append(".").append(rel.getToColumn()); - // from clause with bridge table - bridgeFromClause.append(" from ").append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias)); - // we need to initialize filter clause with user filter clause or storgae filter if applicable - if (StringUtils.isNotBlank(userFilter)) { - bridgeFilterClause.append(userFilter); - } - if (StringUtils.isNotBlank(storageFilter)) { - if (StringUtils.isNotBlank(bridgeFilterClause.toString())) { - bridgeFilterClause.append(" and "); - } - bridgeFilterClause.append(storageFilter); - } - // initialize final join clause - bridgeJoinClause.append(" on ").append(fromAlias).append(".") - .append(rel.getFromColumn()).append(" = ").append("%s") - .append(".").append(rel.getToColumn()); - initedBridgeClauses = true; - } else { - // if bridge clauses are already inited, this is a next table getting joined with bridge table - // we will append a simple join clause - bridgeFromClause.append(joinTypeStr).append(" join "); - bridgeFromClause.append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias)); - bridgeFromClause.append(" on ").append(fromAlias).append(".") - .append(rel.getFromColumn()).append(" = ").append(toAlias) - .append(".").append(rel.getToColumn()); - - if (StringUtils.isNotBlank(userFilter)) { - bridgeFromClause.append(" and ").append(userFilter); - } - if (StringUtils.isNotBlank(storageFilter)) { - bridgeFromClause.append(" and ").append(storageFilter); - } - } - if (cubeql.getTblAliasToColumns().get(toAlias) != null - && !cubeql.getTblAliasToColumns().get(toAlias).isEmpty()) { - // there are fields selected from this table after seeing bridge table in path - // we should make subquery for this selection - clause.append(joinTypeStr).append(" join "); - clause.append(bridgeSelectClause.toString()); - for (String col : cubeql.getTblAliasToColumns().get(toAlias)) { - clause.append(",").append(bridgeTableFieldAggr).append("(").append(toAlias) - .append(".").append(col) - .append(")") - .append(" as ").append(col); - } - String bridgeFrom = bridgeFromClause.toString(); - clause.append(bridgeFrom); - String bridgeFilter = bridgeFilterClause.toString(); - if (StringUtils.isNotBlank(bridgeFilter)) { - if (bridgeFrom.contains(" join ")) { - clause.append(" and "); - } else { - clause.append(" where"); - } - clause.append(bridgeFilter.toString()); - } - clause.append(bridgeGroupbyClause.toString()); - clause.append(") ").append(toAlias); - clause.append(String.format(bridgeJoinClause.toString(), toAlias)); - clauses.add(clause.toString()); - } - if (cur.getSubtrees().isEmpty()) { - // clear bridge flags and builders, as there are no more clauses in this tree. - hasBridgeTable = false; - initedBridgeClauses = false; - bridgeSelectClause.setLength(0); - bridgeFromClause.setLength(0); - bridgeFilterClause.setLength(0); - bridgeJoinClause.setLength(0); - bridgeGroupbyClause.setLength(0); - } - } else { - // Simple join clause is : - // jointype + " join " + destTable + " on " + joincond + [" and" + userfilter] + ["and" + storageFilter] - clause.append(joinTypeStr).append(" join "); - //Add storage table name followed by alias - clause.append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias)); - clause.append(" on ").append(fromAlias).append(".") - .append(rel.getFromColumn()).append(" = ").append(toAlias) - .append(".").append(rel.getToColumn()); - - if (StringUtils.isNotBlank(userFilter)) { - clause.append(" and ").append(userFilter); - } - if (StringUtils.isNotBlank(storageFilter)) { - clause.append(" and ").append(storageFilter); - } - clauses.add(clause.toString()); - } - } - return StringUtils.join(clauses, ""); - } - - public Set<Dimension> getDimsOnPath(Map<Aliased<Dimension>, List<SchemaGraph.TableRelationship>> joinChain, - Set<Dimension> qdims) { - Set<Dimension> dimsOnPath = new HashSet<Dimension>(); - for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.TableRelationship>> entry : joinChain.entrySet()) { - List<SchemaGraph.TableRelationship> chain = entry.getValue(); - Dimension table = entry.getKey().getObject(); - - // check if join with this dimension is required - if (!qdims.contains(table)) { - continue; - } - - for (int i = chain.size() - 1; i >= 0; i--) { - SchemaGraph.TableRelationship rel = chain.get(i); - dimsOnPath.add((Dimension) rel.getToTable()); - } - } - return dimsOnPath; - } - - private String getStorageFilter(Map<Dimension, CandidateDim> dimsToQuery, AbstractCubeTable table, String alias) { - String whereClause = ""; - if (dimsToQuery != null && dimsToQuery.get(table) != null) { - if (StringUtils.isNotBlank(dimsToQuery.get(table).getWhereClause())) { - whereClause = dimsToQuery.get(table).getWhereClause(); - if (alias != null) { - whereClause = StorageUtil.getWhereClause(whereClause, alias); - } - } - } - return whereClause; - } - - /** - * @return the joinsResolved - */ - public boolean isJoinsResolved() { - return joinsResolved; - } - - // Includes both queried join paths and optional join paths - public Set<String> getAllJoinPathColumnsOfTable(AbstractCubeTable table) { - Set<String> allPaths = new HashSet<String>(); - for (Map<AbstractCubeTable, List<String>> optPaths : joinPathFromColumns.values()) { - if (optPaths.get(table) != null) { - allPaths.addAll(optPaths.get(table)); - } - } - - for (Map<AbstractCubeTable, List<String>> optPaths : joinPathToColumns.values()) { - if (optPaths.get(table) != null) { - allPaths.addAll(optPaths.get(table)); - } - } - - return allPaths; - } - - public void pruneAllPaths(CubeInterface cube, final Set<CandidateFact> cfacts, - final Map<Dimension, CandidateDim> dimsToQuery) { - // Remove join paths which cannot be satisfied by the resolved candidate - // fact and dimension tables - if (cfacts != null) { - // include columns from all picked facts - Set<String> factColumns = new HashSet<String>(); - for (CandidateFact cfact : cfacts) { - factColumns.addAll(cfact.getColumns()); - } - - for (List<SchemaGraph.JoinPath> paths : allPaths.values()) { - for (int i = 0; i < paths.size(); i++) { - SchemaGraph.JoinPath jp = paths.get(i); - List<String> cubeCols = jp.getColumnsForTable((AbstractCubeTable) cube); - if (cubeCols != null && !factColumns.containsAll(cubeCols)) { - // This path requires some columns from the cube which are not - // present in the candidate fact - // Remove this path - log.info("Removing join path:{} as columns :{} dont exist", jp, cubeCols); - paths.remove(i); - i--; - } - } - } - pruneEmptyPaths(allPaths); - } - pruneAllPaths(dimsToQuery); - } - - /** - * Prunes allPaths by removing paths which contain columns that are not present in any candidate dims. - * - * @param candidateDims - */ - public void pruneAllPathsForCandidateDims(Map<Dimension, Set<CandidateDim>> candidateDims) { - Map<Dimension, Set<String>> dimColumns = new HashMap<Dimension, Set<String>>(); - // populate all columns present in candidate dims for each dimension - for (Map.Entry<Dimension, Set<CandidateDim>> entry : candidateDims.entrySet()) { - Dimension dim = entry.getKey(); - Set<String> allColumns = new HashSet<String>(); - for (CandidateDim cdim : entry.getValue()) { - allColumns.addAll(cdim.getColumns()); - } - dimColumns.put(dim, allColumns); - } - for (List<SchemaGraph.JoinPath> paths : allPaths.values()) { - for (int i = 0; i < paths.size(); i++) { - SchemaGraph.JoinPath jp = paths.get(i); - for (AbstractCubeTable refTable : jp.getAllTables()) { - List<String> cols = jp.getColumnsForTable(refTable); - if (refTable instanceof Dimension) { - if (cols != null && (dimColumns.get(refTable) == null || !dimColumns.get(refTable).containsAll(cols))) { - // This path requires some columns from the cube which are not present in any candidate dim - // Remove this path - log.info("Removing join path:{} as columns :{} dont exist", jp, cols); - paths.remove(i); - i--; - break; - } - } - } - } - } - pruneEmptyPaths(allPaths); - } - - private void pruneEmptyPaths(Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths) { - Iterator<Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>>> iter = allPaths.entrySet().iterator(); - while (iter.hasNext()) { - Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> entry = iter.next(); - if (entry.getValue().isEmpty()) { - iter.remove(); - } - } - } - - private Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> pruneFactPaths(CubeInterface cube, - final CandidateFact cfact) { - Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> prunedPaths - = new HashMap<Aliased<Dimension>, List<SchemaGraph.JoinPath>>(); - // Remove join paths which cannot be satisfied by the candidate fact - for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> ppaths : allPaths.entrySet()) { - prunedPaths.put(ppaths.getKey(), new ArrayList<SchemaGraph.JoinPath>(ppaths.getValue())); - List<SchemaGraph.JoinPath> paths = prunedPaths.get(ppaths.getKey()); - for (int i = 0; i < paths.size(); i++) { - SchemaGraph.JoinPath jp = paths.get(i); - List<String> cubeCols = jp.getColumnsForTable((AbstractCubeTable) cube); - if (cubeCols != null && !cfact.getColumns().containsAll(cubeCols)) { - // This path requires some columns from the cube which are not - // present in the candidate fact - // Remove this path - log.info("Removing join path:{} as columns :{} dont exist", jp, cubeCols); - paths.remove(i); - i--; - } - } - } - pruneEmptyPaths(prunedPaths); - return prunedPaths; - } - - private void pruneAllPaths(final Map<Dimension, CandidateDim> dimsToQuery) { - // Remove join paths which cannot be satisfied by the resolved dimension - // tables - if (dimsToQuery != null && !dimsToQuery.isEmpty()) { - for (CandidateDim candidateDim : dimsToQuery.values()) { - Set<String> dimCols = candidateDim.dimtable.getAllFieldNames(); - for (List<SchemaGraph.JoinPath> paths : allPaths.values()) { - for (int i = 0; i < paths.size(); i++) { - SchemaGraph.JoinPath jp = paths.get(i); - List<String> candidateDimCols = jp.getColumnsForTable(candidateDim.getBaseTable()); - if (candidateDimCols != null && !dimCols.containsAll(candidateDimCols)) { - // This path requires some columns from the dimension which are - // not present in the candidate dim - // Remove this path - log.info("Removing join path:{} as columns :{} dont exist", jp, candidateDimCols); - paths.remove(i); - i--; - } - } - } - } - pruneEmptyPaths(allPaths); - } - } - - /** - * There can be multiple join paths between a dimension and the target. Set of all possible join clauses is the - * cartesian product of join paths of all dimensions - */ - private Iterator<JoinClause> getJoinClausesForAllPaths(final CandidateFact fact, - final Set<Dimension> qdims, final CubeQueryContext cubeql) { - Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths; - // if fact is passed only look at paths possible from fact to dims - if (fact != null) { - allPaths = pruneFactPaths(cubeql.getCube(), fact); - } else { - allPaths = new LinkedHashMap<Aliased<Dimension>, List<SchemaGraph.JoinPath>>(this.allPaths); - } - // prune allPaths with qdims - log.info("pruning allPaths before generating all permutations."); - log.info("allPaths: {}", allPaths); - log.info("qdims: {}", qdims); - pruneAllPathsWithQueriedDims(allPaths, qdims); - - // Number of paths in each path set - final int[] groupSizes = new int[allPaths.values().size()]; - // Total number of elements in the cartesian product - int numSamples = 1; - // All path sets - final List<List<SchemaGraph.JoinPath>> pathSets = new ArrayList<List<SchemaGraph.JoinPath>>(); - // Dimension corresponding to the path sets - final List<Aliased<Dimension>> dimensions = new ArrayList<Aliased<Dimension>>(groupSizes.length); - - int i = 0; - for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> entry : allPaths.entrySet()) { - dimensions.add(entry.getKey()); - List<SchemaGraph.JoinPath> group = entry.getValue(); - pathSets.add(group); - groupSizes[i] = group.size(); - numSamples *= groupSizes[i]; - i++; - } - - final int[] selection = new int[groupSizes.length]; - final int MAX_SAMPLE_COUNT = numSamples; - - // Return a lazy iterator over all possible join chains - return new Iterator<JoinClause>() { - int sample = 0; - - @Override - public boolean hasNext() { - return sample < MAX_SAMPLE_COUNT; - } - - @Override - public JoinClause next() { - Map<Aliased<Dimension>, List<SchemaGraph.TableRelationship>> chain - = new LinkedHashMap<Aliased<Dimension>, List<SchemaGraph.TableRelationship>>(); - //generate next permutation. - for (int i = groupSizes.length - 1, base = sample; i >= 0; base /= groupSizes[i], i--) { - selection[i] = base % groupSizes[i]; - } - for (int i = 0; i < selection.length; i++) { - int selectedPath = selection[i]; - List<SchemaGraph.TableRelationship> path = pathSets.get(i).get(selectedPath).getEdges(); - chain.put(dimensions.get(i), path); - } - - Set<Dimension> dimsOnPath = getDimsOnPath(chain, qdims); - - sample++; - // Cost of join = number of tables joined in the clause - return new JoinClause(cubeql, chain, dimsOnPath); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Cannot remove elements!"); - } - }; - } - - /** - * Given allPaths, it will remove entries where key is a non-join chain dimension and not contained in qdims - * - * @param allPaths - * @param qdims - */ - private void pruneAllPathsWithQueriedDims(Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths, - Set<Dimension> qdims) { - Iterator<Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>>> iter = allPaths.entrySet().iterator(); - while (iter.hasNext()) { - Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> cur = iter.next(); - if (!qdims.contains(cur.getKey().getObject())) { - log.info("removing from allPaths: {}", cur); - iter.remove(); - } - } - } - - public Set<Dimension> pickOptionalTables(final CandidateFact fact, - Set<Dimension> qdims, CubeQueryContext cubeql) throws LensException { - // Find the min cost join clause and add dimensions in the clause as optional dimensions - Set<Dimension> joiningOptionalTables = new HashSet<Dimension>(); - if (qdims == null) { - return joiningOptionalTables; - } - // find least cost path - Iterator<JoinClause> itr = getJoinClausesForAllPaths(fact, qdims, cubeql); - JoinClause minCostClause = null; - while (itr.hasNext()) { - JoinClause clause = itr.next(); - if (minCostClause == null || minCostClause.getCost() > clause.getCost()) { - minCostClause = clause; - } - } - - if (minCostClause == null) { - throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(), - qdims.toString(), autoJoinTarget.getName()); - } - - log.info("Fact: {} minCostClause:{}", fact, minCostClause); - if (fact != null) { - cubeql.getAutoJoinCtx().getFactClauses().put(fact, minCostClause); - } else { - cubeql.getAutoJoinCtx().setMinCostClause(minCostClause); - } - for (Dimension dim : minCostClause.getDimsInPath()) { - if (!qdims.contains(dim)) { - joiningOptionalTables.add(dim); - } - } - - minCostClause.initChainColumns(); - // prune candidate dims of joiningOptionalTables wrt joinging columns - for (Dimension dim : joiningOptionalTables) { - for (Iterator<CandidateDim> i = cubeql.getCandidateDimTables().get(dim).iterator(); i.hasNext();) { - CandidateDim cdim = i.next(); - CubeDimensionTable dimtable = cdim.dimtable; - if (!cdim.getColumns().containsAll(minCostClause.chainColumns.get(dim))) { - i.remove(); - log.info("Not considering dimtable:{} as its columns are not part of any join paths. Join columns:{}", - dimtable, minCostClause.chainColumns.get(dim)); - cubeql.addDimPruningMsgs(dim, cdim.dimtable, - CandidateTablePruneCause.noColumnPartOfAJoinPath(minCostClause.chainColumns.get(dim))); - } - } - if (cubeql.getCandidateDimTables().get(dim).size() == 0) { - throw new LensException(LensCubeErrorCode.NO_DIM_HAS_COLUMN.getLensErrorInfo(), dim.getName(), - minCostClause.chainColumns.get(dim).toString()); - } - } - - return joiningOptionalTables; - } - - public Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> getAllPaths() { - return allPaths; - } - - public boolean isReachableDim(Dimension dim) { - Aliased<Dimension> aliased = Aliased.create(dim); - return isReachableDim(aliased); - } - - public boolean isReachableDim(Dimension dim, String alias) { - Aliased<Dimension> aliased = Aliased.create(dim, alias); - return isReachableDim(aliased); - } - - private boolean isReachableDim(Aliased<Dimension> aliased) { - return allPaths.containsKey(aliased) && !allPaths.get(aliased).isEmpty(); - } -} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java index 64dff16..4dcdbcf 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java @@ -18,9 +18,7 @@ */ package org.apache.lens.cube.parse; -import java.util.Collection; -import java.util.Collections; -import java.util.Set; +import java.util.*; import org.apache.lens.cube.metadata.CubeDimensionTable; import org.apache.lens.cube.metadata.Dimension; @@ -45,15 +43,19 @@ public class CandidateDim implements CandidateTable { @Setter private String whereClause; private boolean dbResolved = false; - private boolean whereClauseAdded = false; + private Map<String, Boolean> whereClauseAdded = new HashMap<>(); private Dimension baseTable; public boolean isWhereClauseAdded() { - return whereClauseAdded; + return !whereClauseAdded.isEmpty(); } - public void setWhereClauseAdded() { - this.whereClauseAdded = true; + public boolean isWhereClauseAdded(String alias) { + return whereClauseAdded.get(alias) == null ? false : whereClauseAdded.get(alias); + } + + public void setWhereClauseAdded(String alias) { + this.whereClauseAdded.put(alias, true); } CandidateDim(CubeDimensionTable dimtable, Dimension dim) { http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java index 1fd1d17..06c2a0b 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java @@ -34,6 +34,8 @@ import org.apache.lens.cube.error.NoCandidateDimAvailableException; import org.apache.lens.cube.error.NoCandidateFactAvailableException; import org.apache.lens.cube.metadata.*; import org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode; +import org.apache.lens.cube.parse.join.AutoJoinContext; +import org.apache.lens.cube.parse.join.JoinUtils; import org.apache.lens.server.api.error.LensException; import org.apache.commons.lang.StringUtils; @@ -387,7 +389,7 @@ public class CubeQueryContext implements TrackQueriedColumns, QueryAST { // required by a candidate table to get a denormalized field from reference // or required in a join chain @ToString - static class OptionalDimCtx { + public static class OptionalDimCtx { OptionalDimCtx() { } @@ -407,44 +409,40 @@ public class CubeQueryContext implements TrackQueriedColumns, QueryAST { public void addOptionalDimTable(String alias, CandidateTable candidate, boolean isRequiredInJoin, String cubeCol, boolean isRef, String... cols) throws LensException { - addOptionalDimTable(alias, candidate, isRequiredInJoin, cubeCol, true, null, cols); + addOptionalDimTable(alias, candidate, isRequiredInJoin, cubeCol, isRef, null, cols); } private void addOptionalDimTable(String alias, CandidateTable candidate, boolean isRequiredInJoin, String cubeCol, boolean isRef, String tableAlias, String... cols) throws LensException { alias = alias.toLowerCase(); - try { - if (!addQueriedTable(alias, true)) { - throw new SemanticException("Could not add queried table or chain:" + alias); - } - Dimension dim = (Dimension) cubeTbls.get(alias); - OptionalDimCtx optDim = optionalDimensions.get(dim); - if (optDim == null) { - optDim = new OptionalDimCtx(); - optionalDimensions.put(dim, optDim); - } - if (cols != null && candidate != null) { - for (String col : cols) { - optDim.colQueried.add(col); - } - optDim.requiredForCandidates.add(candidate); - } - if (cubeCol != null) { - if (isRef) { - updateRefColDim(cubeCol, dim); - } else { - updateExprColDim(tableAlias, cubeCol, dim); - } - } - if (!optDim.isRequiredInJoinChain) { - optDim.isRequiredInJoinChain = isRequiredInJoin; + if (!addQueriedTable(alias, true)) { + throw new LensException(LensCubeErrorCode.QUERIED_TABLE_NOT_FOUND.getLensErrorInfo(), alias); + } + Dimension dim = (Dimension) cubeTbls.get(alias); + OptionalDimCtx optDim = optionalDimensions.get(dim); + if (optDim == null) { + optDim = new OptionalDimCtx(); + optionalDimensions.put(dim, optDim); + } + if (cols != null && candidate != null) { + for (String col : cols) { + optDim.colQueried.add(col); } - if (log.isDebugEnabled()) { - log.debug("Adding optional dimension:{} optDim:{} {} isRef:{}", dim , optDim, - (cubeCol == null ? "" : " for column:" + cubeCol), isRef); + optDim.requiredForCandidates.add(candidate); + } + if (cubeCol != null) { + if (isRef) { + updateRefColDim(cubeCol, dim); + } else { + updateExprColDim(tableAlias, cubeCol, dim); } - } catch (HiveException e) { - throw new LensException(e); + } + if (!optDim.isRequiredInJoinChain) { + optDim.isRequiredInJoinChain = isRequiredInJoin; + } + if (log.isDebugEnabled()) { + log.debug("Adding optional dimension:{} optDim:{} {} isRef:{}", dim, optDim, + (cubeCol == null ? "" : " for column:" + cubeCol), isRef); } } @@ -684,10 +682,13 @@ public class CubeQueryContext implements TrackQueriedColumns, QueryAST { String fromString; if (getJoinAST() == null) { if (cube != null) { + if (dimensions.size() > 0) { + throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); + } fromString = fact.getStorageString(getAliasForTableName(cube.getName())); } else { if (dimensions.size() != 1) { - throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAIABLE.getLensErrorInfo()); + throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); } Dimension dim = dimensions.iterator().next(); fromString = dimsToQuery.get(dim).getStorageString(getAliasForTableName(dim.getName())); @@ -702,7 +703,7 @@ public class CubeQueryContext implements TrackQueriedColumns, QueryAST { private void getQLString(QBJoinTree joinTree, StringBuilder builder, CandidateFact fact, Map<Dimension, CandidateDim> dimsToQuery) throws LensException { - String joiningTable = null; + List<String> joiningTables = new ArrayList<>(); if (joinTree.getBaseSrc()[0] == null) { if (joinTree.getJoinSrc() != null) { getQLString(joinTree.getJoinSrc(), builder, fact, dimsToQuery); @@ -710,12 +711,10 @@ public class CubeQueryContext implements TrackQueriedColumns, QueryAST { } else { // (joinTree.getBaseSrc()[0] != null){ String alias = joinTree.getBaseSrc()[0].toLowerCase(); builder.append(getStorageStringWithAlias(fact, dimsToQuery, alias)); - if (joinTree.getJoinCond()[0].getJoinType().equals(JoinType.RIGHTOUTER)) { - joiningTable = alias; - } + joiningTables.add(alias); } if (joinTree.getJoinCond() != null) { - builder.append(JoinResolver.getJoinTypeStr(joinTree.getJoinCond()[0].getJoinType())); + builder.append(JoinUtils.getJoinTypeStr(joinTree.getJoinCond()[0].getJoinType())); builder.append(" JOIN "); } if (joinTree.getBaseSrc()[1] == null) { @@ -725,22 +724,24 @@ public class CubeQueryContext implements TrackQueriedColumns, QueryAST { } else { // (joinTree.getBaseSrc()[1] != null){ String alias = joinTree.getBaseSrc()[1].toLowerCase(); builder.append(getStorageStringWithAlias(fact, dimsToQuery, alias)); - if (joinTree.getJoinCond()[0].getJoinType().equals(JoinType.LEFTOUTER)) { - joiningTable = alias; - } + joiningTables.add(alias); } String joinCond = joinConds.get(joinTree); if (joinCond != null) { builder.append(" ON "); builder.append(joinCond); - if (joiningTable != null) { - // assuming the joining table to be dimension table - DimOnlyHQLContext.appendWhereClause(builder, getWhereClauseWithAlias(dimsToQuery, joiningTable), true); - dimsToQuery.get(cubeTbls.get(joiningTable)).setWhereClauseAdded(); + // joining tables will contain all tables involved in joins. + // we need to push storage filters for Dimensions into join conditions, thus the following code + // takes care of the same. + for (String joiningTable : joiningTables) { + if (cubeTbls.get(joiningTable) instanceof Dimension) { + DimOnlyHQLContext.appendWhereClause(builder, getWhereClauseWithAlias(dimsToQuery, joiningTable), true); + dimsToQuery.get(cubeTbls.get(joiningTable)).setWhereClauseAdded(joiningTable); + } } } else { - throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAIABLE.getLensErrorInfo()); + throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); } } @@ -1062,7 +1063,6 @@ public class CubeQueryContext implements TrackQueriedColumns, QueryAST { return isCubeMeasure(msrname); } - public boolean isAggregateExpr(String expr) { return aggregateExprs.contains(expr == null ? null : expr.toLowerCase()); } http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java index 5c8bd84..c83b9ac 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java @@ -25,7 +25,7 @@ import java.util.*; import org.apache.lens.cube.error.LensCubeErrorCode; import org.apache.lens.cube.metadata.*; -import org.apache.lens.cube.metadata.ReferencedDimAtrribute.ChainRefCol; +import org.apache.lens.cube.metadata.ReferencedDimAttribute.ChainRefCol; import org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode; import org.apache.lens.cube.parse.ExpressionResolver.ExprSpecContext; import org.apache.lens.cube.parse.ExpressionResolver.ExpressionContext; @@ -54,15 +54,13 @@ public class DenormalizationResolver implements ContextRewriter { @ToString public static class ReferencedQueriedColumn { - ReferencedDimAtrribute col; + ReferencedDimAttribute col; AbstractCubeTable srcTable; - transient List<TableReference> references = new ArrayList<>(); transient List<ChainRefCol> chainRefCols = new ArrayList<>(); - ReferencedQueriedColumn(ReferencedDimAtrribute col, AbstractCubeTable srcTable) { + ReferencedQueriedColumn(ReferencedDimAttribute col, AbstractCubeTable srcTable) { this.col = col; this.srcTable = srcTable; - references.addAll(col.getReferences()); chainRefCols.addAll(col.getChainRefColumns()); } } @@ -151,16 +149,9 @@ public class DenormalizationResolver implements ContextRewriter { } refCols.add(refer); // Add to optional tables - if (refer.col.isChainedColumn()) { - for (ChainRefCol refCol : refer.col.getChainRefColumns()) { - cubeql.addOptionalDimTable(refCol.getChainName(), table, false, refer.col.getName(), true, - refCol.getRefColumn()); - } - } else { - for (TableReference reference : refer.col.getReferences()) { - cubeql.addOptionalDimTable(reference.getDestTable(), table, false, refer.col.getName(), true, - reference.getDestColumn()); - } + for (ChainRefCol refCol : refer.col.getChainRefColumns()) { + cubeql.addOptionalDimTable(refCol.getChainName(), table, false, refer.col.getName(), true, + refCol.getRefColumn()); } return true; } @@ -240,42 +231,23 @@ public class DenormalizationResolver implements ContextRewriter { private void pickColumnsForTable(String tbl) throws LensException { if (tableToRefCols.containsKey(tbl)) { for (ReferencedQueriedColumn refered : tableToRefCols.get(tbl)) { - if (!refered.col.isChainedColumn()) { - Iterator<TableReference> iter = refered.references.iterator(); - while (iter.hasNext()) { - // remove unreachable references - TableReference reference = iter.next(); - if (!cubeql.getAutoJoinCtx().isReachableDim( - (Dimension) cubeql.getCubeTableForAlias(reference.getDestTable()))) { - iter.remove(); - } - } - if (refered.references.isEmpty()) { - throw new LensException(LensCubeErrorCode.NO_REF_COL_AVAILABLE.getLensErrorInfo(), refered); - } - PickedReference picked = new PickedReference(refered.references.iterator().next(), - cubeql.getAliasForTableName(refered.srcTable.getName()), tbl); - addPickedReference(refered.col.getName(), picked); - pickedRefs.add(picked); - } else { - Iterator<ChainRefCol> iter = refered.chainRefCols.iterator(); - while (iter.hasNext()) { - // remove unreachable references - ChainRefCol reference = iter.next(); - if (!cubeql.getAutoJoinCtx().isReachableDim( - (Dimension) cubeql.getCubeTableForAlias(reference.getChainName()), reference.getChainName())) { - iter.remove(); - } - } - if (refered.chainRefCols.isEmpty()) { - throw new LensException("No chain reference column available for " + refered); + Iterator<ChainRefCol> iter = refered.chainRefCols.iterator(); + while (iter.hasNext()) { + // remove unreachable references + ChainRefCol reference = iter.next(); + if (!cubeql.getAutoJoinCtx().isReachableDim( + (Dimension) cubeql.getCubeTableForAlias(reference.getChainName()), reference.getChainName())) { + iter.remove(); } - PickedReference picked = - new PickedReference(refered.chainRefCols.iterator().next(), - cubeql.getAliasForTableName(refered.srcTable.getName()), tbl); - addPickedReference(refered.col.getName(), picked); - pickedRefs.add(picked); } + if (refered.chainRefCols.isEmpty()) { + throw new LensException(LensCubeErrorCode.NO_REF_COL_AVAILABLE.getLensErrorInfo(), refered.col.getName()); + } + PickedReference picked = + new PickedReference(refered.chainRefCols.iterator().next(), + cubeql.getAliasForTableName(refered.srcTable.getName()), tbl); + addPickedReference(refered.col.getName(), picked); + pickedRefs.add(picked); } } } @@ -348,9 +320,9 @@ public class DenormalizationResolver implements ContextRewriter { } else { col = ((Dimension) tbl).getColumnByName(column); } - if (col instanceof ReferencedDimAtrribute) { + if (col instanceof ReferencedDimAttribute) { // considering all referenced dimensions to be denormalized columns - denormCtx.addReferencedCol(column, new ReferencedQueriedColumn((ReferencedDimAtrribute) col, tbl)); + denormCtx.addReferencedCol(column, new ReferencedQueriedColumn((ReferencedDimAttribute) col, tbl)); } } } http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java index b253b94..318c82a 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java @@ -105,9 +105,9 @@ abstract class DimHQLContext extends SimpleHQLContext { boolean added = (originalWhere != null); for (Dimension dim : queriedDims) { CandidateDim cdim = dimsToQuery.get(dim); + String alias = query.getAliasForTableName(dim.getName()); if (!cdim.isWhereClauseAdded() && !StringUtils.isBlank(cdim.getWhereClause())) { - appendWhereClause(whereBuf, StorageUtil.getWhereClause(cdim, query.getAliasForTableName(dim.getName())), - added); + appendWhereClause(whereBuf, StorageUtil.getWhereClause(cdim, alias), added); added = true; } } http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java index ab7a6d8..36ee9d4 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/FieldValidator.java @@ -24,8 +24,8 @@ import org.apache.lens.cube.error.ConflictingFields; import org.apache.lens.cube.error.FieldsCannotBeQueriedTogetherException; import org.apache.lens.cube.metadata.CubeInterface; import org.apache.lens.cube.metadata.DerivedCube; -import org.apache.lens.cube.metadata.ReferencedDimAtrribute; -import org.apache.lens.cube.metadata.ReferencedDimAtrribute.ChainRefCol; +import org.apache.lens.cube.metadata.ReferencedDimAttribute; +import org.apache.lens.cube.metadata.ReferencedDimAttribute.ChainRefCol; import org.apache.lens.cube.parse.ExpressionResolver.ExprSpecContext; import org.apache.lens.server.api.error.LensException; @@ -166,9 +166,8 @@ public class FieldValidator implements ContextRewriter { // If this is a referenced dim attribute leading to a chain, then instead of adding this // column, we add the source columns of the chain. - if (cube.getDimAttributeByName(colName) instanceof ReferencedDimAtrribute - && ((ReferencedDimAtrribute) cube.getDimAttributeByName(colName)).isChainedColumn()) { - ReferencedDimAtrribute rdim = (ReferencedDimAtrribute) cube.getDimAttributeByName(colName); + if (cube.getDimAttributeByName(colName) instanceof ReferencedDimAttribute) { + ReferencedDimAttribute rdim = (ReferencedDimAttribute) cube.getDimAttributeByName(colName); for (ChainRefCol refCol : rdim.getChainRefColumns()) { chainSourceColumns.addAll(cube.getChainByName(refCol.getChainName()).getSourceColumns()); } http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java index bfb65c7..b1deb07 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java @@ -206,7 +206,7 @@ public final class HQLParser { } System.out.print(node.getText() + " [" + tokenMapping.get(node.getToken().getType()) + "]"); - System.out.print(" (l" + level + "c" + child + "p" + node.getCharPositionInLine() +")"); + System.out.print(" (l" + level + "c" + child + "p" + node.getCharPositionInLine() + ")"); if (node.getChildCount() > 0) { System.out.println(" {"); http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinClause.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinClause.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinClause.java deleted file mode 100644 index d9a8249..0000000 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinClause.java +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.lens.cube.parse; - -import java.util.*; - -import org.apache.lens.cube.metadata.AbstractCubeTable; -import org.apache.lens.cube.metadata.Dimension; -import org.apache.lens.cube.metadata.SchemaGraph; - -import org.apache.hadoop.hive.ql.parse.JoinType; - -import lombok.Getter; -import lombok.ToString; - -@ToString -public class JoinClause implements Comparable<JoinClause> { - private final int cost; - // all dimensions in path except target - @Getter - private final Set<Dimension> dimsInPath; - private CubeQueryContext cubeql; - private final Map<Aliased<Dimension>, List<SchemaGraph.TableRelationship>> chain; - @Getter - private final JoinTree joinTree; - transient Map<AbstractCubeTable, Set<String>> chainColumns = new HashMap<AbstractCubeTable, Set<String>>(); - - public JoinClause(CubeQueryContext cubeql, Map<Aliased<Dimension>, - List<SchemaGraph.TableRelationship>> chain, Set<Dimension> dimsInPath) { - this.cubeql = cubeql; - this.chain = chain; - this.joinTree = mergeJoinChains(chain); - this.cost = joinTree.getNumEdges(); - this.dimsInPath = dimsInPath; - } - - void initChainColumns() { - for (List<SchemaGraph.TableRelationship> path : chain.values()) { - for (SchemaGraph.TableRelationship edge : path) { - Set<String> fcols = chainColumns.get(edge.getFromTable()); - if (fcols == null) { - fcols = new HashSet<String>(); - chainColumns.put(edge.getFromTable(), fcols); - } - fcols.add(edge.getFromColumn()); - - Set<String> tocols = chainColumns.get(edge.getToTable()); - if (tocols == null) { - tocols = new HashSet<String>(); - chainColumns.put(edge.getToTable(), tocols); - } - tocols.add(edge.getToColumn()); - } - } - } - - public int getCost() { - return cost; - } - - @Override - public int compareTo(JoinClause joinClause) { - return cost - joinClause.getCost(); - } - - /** - * Takes chains and merges them in the form of a tree. If two chains have some common path till some table and - * bifurcate from there, then in the chain, both paths will have the common path but the resultant tree will have - * single path from root(cube) to that table and paths will bifurcate from there. - * <p/> - * For example, citystate = [basecube.cityid=citydim.id], [citydim.stateid=statedim.id] - * cityzip = [basecube.cityid=citydim.id], [citydim.zipcode=zipdim.code] - * <p/> - * Without merging, the behaviour is like this: - * <p/> - * <p/> - * (basecube.cityid=citydim.id) (citydim.stateid=statedim.id) - * _____________________________citydim____________________________________statedim - * | - * basecube------| - * |_____________________________citydim____________________________________zipdim - * - * (basecube.cityid=citydim.id) (citydim.zipcode=zipdim.code) - * - * <p/> - * Merging will result in a tree like following - * <p/> (citydim.stateid=statedim.id) - * <p/> ________________________________ statedim - * (basecube.cityid=citydim.id) | - * basecube-------------------------------citydim---- | - * |________________________________ zipdim - * - * (citydim.zipcode=zipdim.code) - * - * <p/> - * Doing this will reduce the number of joins wherever possible. - * - * @param chain Joins in Linear format. - * @return Joins in Tree format - */ - public JoinTree mergeJoinChains(Map<Aliased<Dimension>, List<SchemaGraph.TableRelationship>> chain) { - Map<String, Integer> aliasUsage = new HashMap<String, Integer>(); - JoinTree root = JoinTree.createRoot(); - for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.TableRelationship>> entry : chain.entrySet()) { - JoinTree current = root; - // Last element in this list is link from cube to first dimension - for (int i = entry.getValue().size() - 1; i >= 0; i--) { - // Adds a child if needed, or returns a child already existing corresponding to the given link. - current = current.addChild(entry.getValue().get(i), cubeql, aliasUsage); - if (cubeql.getAutoJoinCtx().isPartialJoinChains()) { - JoinType joinType = cubeql.getAutoJoinCtx().getTableJoinTypeMap().get(entry.getKey().getObject()); - //This ensures if (sub)paths are same, but join type is not same, merging will not happen. - current.setJoinType(joinType); - } - } - // This is a destination table. Decide alias separately. e.g. chainname - // nullcheck is necessary because dimensions can be destinations too. In that case getAlias() == null - if (entry.getKey().getAlias() != null) { - current.setAlias(entry.getKey().getAlias()); - } - } - if (root.getSubtrees().size() > 0) { - root.setAlias(cubeql.getAliasForTableName( - root.getSubtrees().keySet().iterator().next().getFromTable().getName())); - } - return root; - } -} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java index de3a16e..b861bb6 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java @@ -24,14 +24,16 @@ import java.util.*; import org.apache.lens.cube.error.LensCubeErrorCode; import org.apache.lens.cube.metadata.*; -import org.apache.lens.cube.metadata.SchemaGraph.TableRelationship; -import org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode; +import org.apache.lens.cube.metadata.join.JoinPath; +import org.apache.lens.cube.parse.join.AutoJoinContext; import org.apache.lens.server.api.error.LensException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.*; +import com.google.common.collect.Sets; + import lombok.extern.slf4j.Slf4j; /** @@ -39,41 +41,15 @@ import lombok.extern.slf4j.Slf4j; */ @Slf4j class JoinResolver implements ContextRewriter { - - private Map<AbstractCubeTable, String> partialJoinConditions; private Map<AbstractCubeTable, JoinType> tableJoinTypeMap; - private boolean partialJoinChain; private AbstractCubeTable target; private HashMap<Dimension, List<JoinChain>> dimensionInJoinChain = new HashMap<Dimension, List<JoinChain>>(); public JoinResolver(Configuration conf) { } - static String getJoinTypeStr(JoinType joinType) { - if (joinType == null) { - return ""; - } - switch (joinType) { - case FULLOUTER: - return " full outer"; - case INNER: - return " inner"; - case LEFTOUTER: - return " left outer"; - case LEFTSEMI: - return " left semi"; - case UNIQUE: - return " unique"; - case RIGHTOUTER: - return " right outer"; - default: - return ""; - } - } - @Override public void rewriteContext(CubeQueryContext cubeql) throws LensException { - partialJoinConditions = new HashMap<AbstractCubeTable, String>(); tableJoinTypeMap = new HashMap<AbstractCubeTable, JoinType>(); try { resolveJoins(cubeql); @@ -86,9 +62,25 @@ class JoinResolver implements ContextRewriter { QB cubeQB = cubeql.getQb(); boolean joinResolverDisabled = cubeql.getConf().getBoolean(CubeQueryConfUtil.DISABLE_AUTO_JOINS, CubeQueryConfUtil.DEFAULT_DISABLE_AUTO_JOINS); + + if (!joinResolverDisabled && (!cubeql.getNonChainedDimensions().isEmpty() && cubeql.hasCubeInQuery()) + || ((cubeql.getNonChainedDimensions().size() > 1) && !cubeql.hasCubeInQuery())) { + log.warn("Disabling auto join resolver as there are direct dimensions queried"); + joinResolverDisabled = true; + } if (joinResolverDisabled) { if (cubeql.getJoinAST() != null) { cubeQB.setQbJoinTree(genJoinTree(cubeql.getJoinAST(), cubeql)); + } else { + if (cubeql.hasCubeInQuery()) { + if (!cubeql.getNonChainedDimensions().isEmpty()) { + throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); + } + } else { + if (cubeql.getNonChainedDimensions().size() > 1) { + throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); + } + } } } else { autoResolveJoins(cubeql); @@ -118,37 +110,28 @@ class JoinResolver implements ContextRewriter { * @throws HiveException */ private void autoResolveJoins(CubeQueryContext cubeql) throws LensException, HiveException { - // Check if this query needs a join - - // A join is needed if there is a cube and at least one dimension, or, 0 - // cubes and more than one - // dimensions + if (cubeql.getJoinchains().isEmpty()) { + // Joins not required + log.info("No dimension tables to resolve and no join chains present!"); + return; + } processJoinChains(cubeql); - Set<Dimension> dimensions = cubeql.getNonChainedDimensions(); - // Add dimensions specified in the partial join tree - ASTNode joinClause = cubeql.getQb().getParseInfo().getJoinExpr(); - if (joinClause == null) { + // Find the target + if (cubeql.hasCubeInQuery()) { // Only cube in the query - if (cubeql.hasCubeInQuery()) { - target = (AbstractCubeTable) cubeql.getCube(); - } else { - String targetDimAlias = cubeql.getQb().getTabAliases().iterator().next(); - String targetDimTable = cubeql.getQb().getTabNameForAlias(targetDimAlias); - if (targetDimTable == null) { - log.warn("Null table for alias {}", targetDimAlias); - return; - } - target = cubeql.getMetastoreClient().getDimension(targetDimTable); + target = (AbstractCubeTable) cubeql.getCube(); + } else { + String targetDimAlias = cubeql.getQb().getTabAliases().iterator().next(); + String targetDimTable = cubeql.getQb().getTabNameForAlias(targetDimAlias); + if (targetDimTable == null) { + log.warn("Null table for alias {}", targetDimAlias); + throw new LensException(LensCubeErrorCode.JOIN_TARGET_NOT_CUBE_TABLE.getLensErrorInfo(), targetDimAlias); + } + target = cubeql.getMetastoreClient().getDimension(targetDimTable); + if (target == null) { + log.warn("Can't resolve joins for null target"); + throw new LensException(LensCubeErrorCode.JOIN_TARGET_NOT_CUBE_TABLE.getLensErrorInfo(), targetDimTable); } - } - searchDimensionTables(cubeql.getMetastoreClient(), joinClause); - if (target == null) { - log.warn("Can't resolve joins for null target"); - return; - } - - Set<Dimension> dimTables = new HashSet<Dimension>(dimensions); - for (AbstractCubeTable partiallyJoinedTable : partialJoinConditions.keySet()) { - dimTables.add((Dimension) partiallyJoinedTable); } for (JoinChain chain : cubeql.getJoinchains().values()) { @@ -157,84 +140,14 @@ class JoinResolver implements ContextRewriter { } } - // Remove target - dimTables.remove(target); - if (dimTables.isEmpty() && cubeql.getJoinchains().isEmpty()) { - // Joins not required - log.info("No dimension tables to resolve and no join chains present!"); - return; - } - + Map<Aliased<Dimension>, List<JoinPath>> multipleJoinPaths = new LinkedHashMap<>(); - SchemaGraph graph = cubeql.getMetastoreClient().getSchemaGraph(); - Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> multipleJoinPaths = - new LinkedHashMap<Aliased<Dimension>, List<SchemaGraph.JoinPath>>(); - - // Resolve join path for each dimension accessed in the query - for (Dimension joinee : dimTables) { - if (dimensionInJoinChain.get(joinee) == null) { - // Find all possible join paths - SchemaGraph.GraphSearch search = new SchemaGraph.GraphSearch(joinee, target, graph); - List<SchemaGraph.JoinPath> joinPaths = search.findAllPathsToTarget(); - if (joinPaths != null && !joinPaths.isEmpty()) { - Aliased<Dimension> aliasedJoinee = Aliased.create(joinee); - multipleJoinPaths.put(aliasedJoinee, new ArrayList<SchemaGraph.JoinPath>(search.findAllPathsToTarget())); - addOptionalTables(cubeql, multipleJoinPaths.get(aliasedJoinee), cubeql.getDimensions().contains(joinee)); - } else { - // No link to cube from this dim, can't proceed with query - if (log.isDebugEnabled()) { - graph.print(); - } - log.warn("No join path between {} and {}", joinee.getName(), target.getName()); - if (cubeql.getDimensions().contains(joinee)) { - throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(), - joinee.getName(), target.getName()); - } else { - // if joinee is optional dim table, remove those candidate facts - Set<CandidateTable> candidates = cubeql.getOptionalDimensionMap().get(joinee).requiredForCandidates; - for (CandidateTable candidate : candidates) { - if (candidate instanceof CandidateFact) { - if (cubeql.getCandidateFacts().contains(candidate)) { - log.info("Not considering fact:{} as there is no join path to {}", candidate, joinee); - cubeql.getCandidateFacts().remove(candidate); - cubeql.addFactPruningMsgs(((CandidateFact) candidate).fact, new CandidateTablePruneCause( - CandidateTablePruneCode.COLUMN_NOT_FOUND)); - } - } else if (cubeql.getCandidateDimTables().containsKey(((CandidateDim) candidate).getBaseTable())) { - log.info("Not considering dimtable:{} as there is no join path to {}", candidate, joinee); - cubeql.getCandidateDimTables().get(((CandidateDim) candidate).getBaseTable()).remove(candidate); - cubeql.addDimPruningMsgs( - (Dimension) candidate.getBaseTable(), (CubeDimensionTable) candidate.getTable(), - new CandidateTablePruneCause(CandidateTablePruneCode.COLUMN_NOT_FOUND) - ); - } - } - } - } - } else if (dimensionInJoinChain.get(joinee).size() > 1) { - throw new LensException("Table " + joinee.getName() + " has " - +dimensionInJoinChain.get(joinee).size() + " different paths through joinchains " - +"(" + dimensionInJoinChain.get(joinee) + ")" - +" used in query. Couldn't determine which one to use"); - } else { - // the case when dimension is used only once in all joinchains. - if (isJoinchainDestination(cubeql, joinee)) { - throw new LensException("Table " + joinee.getName() + " is getting accessed via two different names: " - + "[" + dimensionInJoinChain.get(joinee).get(0).getName() + ", " + joinee.getName() + "]"); - } - // table is accessed with chain and no chain - if (cubeql.getNonChainedDimensions().contains(joinee)) { - throw new LensException("Table " + joinee.getName() + " is getting accessed via joinchain: " - + dimensionInJoinChain.get(joinee).get(0).getName() + " and no chain at all"); - } - } - } // populate paths from joinchains for (JoinChain chain : cubeql.getJoinchains().values()) { Dimension dimension = cubeql.getMetastoreClient().getDimension(chain.getDestTable()); Aliased<Dimension> aliasedDimension = Aliased.create(dimension, chain.getName()); if (multipleJoinPaths.get(aliasedDimension) == null) { - multipleJoinPaths.put(aliasedDimension, new ArrayList<SchemaGraph.JoinPath>()); + multipleJoinPaths.put(aliasedDimension, new ArrayList<JoinPath>()); } multipleJoinPaths.get(aliasedDimension).addAll( chain.getRelationEdges(cubeql.getMetastoreClient())); @@ -243,98 +156,15 @@ class JoinResolver implements ContextRewriter { CubeQueryConfUtil.DEFAULT_ENABLE_FLATTENING_FOR_BRIDGETABLES); String bridgeTableFieldAggr = cubeql.getConf().get(CubeQueryConfUtil.BRIDGE_TABLE_FIELD_AGGREGATOR, CubeQueryConfUtil.DEFAULT_BRIDGE_TABLE_FIELD_AGGREGATOR); + Set<Dimension> requiredDimensions = Sets.newHashSet(cubeql.getDimensions()); + requiredDimensions.removeAll(cubeql.getOptionalDimensions()); AutoJoinContext joinCtx = - new AutoJoinContext(multipleJoinPaths, cubeql.optionalDimensions, partialJoinConditions, partialJoinChain, + new AutoJoinContext(multipleJoinPaths, requiredDimensions, tableJoinTypeMap, target, cubeql.getConf().get(CubeQueryConfUtil.JOIN_TYPE_KEY), true, flattenBridgeTables, bridgeTableFieldAggr); cubeql.setAutoJoinCtx(joinCtx); } - private boolean isJoinchainDestination(CubeQueryContext cubeql, Dimension dimension) { - for (JoinChain chain : cubeql.getJoinchains().values()) { - if (chain.getDestTable().equalsIgnoreCase(dimension.getName())) { - return true; - } - } - return false; - } - - private void addOptionalTables(CubeQueryContext cubeql, List<SchemaGraph.JoinPath> joinPathList, boolean required) - throws LensException { - for (SchemaGraph.JoinPath joinPath : joinPathList) { - for (TableRelationship rel : joinPath.getEdges()) { - // Add the joined tables to the queries table sets so that they are - // resolved in candidate resolver - cubeql.addOptionalJoinDimTable(rel.getToTable().getName(), required); - } - } - } - - private void setTarget(CubeMetastoreClient metastore, ASTNode node) throws HiveException, LensException { - String targetTableName = HQLParser.getString(HQLParser.findNodeByPath(node, TOK_TABNAME, Identifier)); - if (metastore.isDimension(targetTableName)) { - target = metastore.getDimension(targetTableName); - } else if (metastore.isCube(targetTableName)) { - target = (AbstractCubeTable) metastore.getCube(targetTableName); - } else { - throw new LensException(LensCubeErrorCode.JOIN_TARGET_NOT_CUBE_TABLE.getLensErrorInfo(), targetTableName); - } - } - - private void searchDimensionTables(CubeMetastoreClient metastore, ASTNode node) throws HiveException, LensException { - if (node == null) { - return; - } - // User has specified join conditions partially. We need to store join - // conditions as well as join types - partialJoinChain = true; - if (isJoinToken(node)) { - ASTNode left = (ASTNode) node.getChild(0); - ASTNode right = (ASTNode) node.getChild(1); - // Get table name and - - String tableName = HQLParser.getString(HQLParser.findNodeByPath(right, TOK_TABNAME, Identifier)); - - Dimension dimension = metastore.getDimension(tableName); - String joinCond = ""; - if (node.getChildCount() > 2) { - // User has specified a join condition for filter pushdown. - joinCond = HQLParser.getString((ASTNode) node.getChild(2)); - } - partialJoinConditions.put(dimension, joinCond); - tableJoinTypeMap.put(dimension, getJoinType(node)); - if (isJoinToken(left)) { - searchDimensionTables(metastore, left); - } else { - if (left.getToken().getType() == TOK_TABREF) { - setTarget(metastore, left); - } - } - } else if (node.getToken().getType() == TOK_TABREF) { - setTarget(metastore, node); - } - - } - - private JoinType getJoinType(ASTNode node) { - switch (node.getToken().getType()) { - case TOK_LEFTOUTERJOIN: - return JoinType.LEFTOUTER; - case TOK_LEFTSEMIJOIN: - return JoinType.LEFTSEMI; - case TOK_RIGHTOUTERJOIN: - return JoinType.RIGHTOUTER; - case TOK_FULLOUTERJOIN: - return JoinType.FULLOUTER; - case TOK_JOIN: - return JoinType.INNER; - case TOK_UNIQUEJOIN: - return JoinType.UNIQUE; - default: - return JoinType.INNER; - } - } - // Recursively find out join conditions private QBJoinTree genJoinTree(ASTNode joinParseTree, CubeQueryContext cubeql) throws LensException { QBJoinTree joinTree = new QBJoinTree(); @@ -418,7 +248,7 @@ class JoinResolver implements ContextRewriter { } children[1] = alias; joinTree.setBaseSrc(children); - // remember rhs table for semijoin + // remember rhs table for semi join if (!joinTree.getNoSemiJoin()) { joinTree.addRHSSemijoin(alias); } @@ -431,7 +261,7 @@ class JoinResolver implements ContextRewriter { cubeql.setJoinCond(joinTree, HQLParser.getString(joinCond)); } else { // No join condition specified. this should be an error - throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAIABLE.getLensErrorInfo()); + throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo()); } return joinTree; } http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinTree.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinTree.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinTree.java deleted file mode 100644 index 5a294af..0000000 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinTree.java +++ /dev/null @@ -1,164 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.lens.cube.parse; - -import java.util.*; - -import org.apache.lens.cube.metadata.AbstractCubeTable; -import org.apache.lens.cube.metadata.SchemaGraph; - -import org.apache.hadoop.hive.ql.parse.JoinType; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.ToString; - -@Data -@ToString(exclude = "parent") -@EqualsAndHashCode(exclude = "parent") -public class JoinTree { - //parent of the node - JoinTree parent; - // current table is parentRelationship.destTable; - SchemaGraph.TableRelationship parentRelationship; - // Alias for the join clause - String alias; - private Map<SchemaGraph.TableRelationship, JoinTree> subtrees = - new LinkedHashMap<SchemaGraph.TableRelationship, JoinTree>(); - // Number of nodes from root to this node. depth of root is 0. Unused for now. - private int depthFromRoot; - // join type of the current table. - JoinType joinType; - - public static JoinTree createRoot() { - return new JoinTree(null, null, 0); - } - - public JoinTree(JoinTree parent, SchemaGraph.TableRelationship tableRelationship, - int depthFromRoot) { - this.parent = parent; - this.parentRelationship = tableRelationship; - this.depthFromRoot = depthFromRoot; - } - - public JoinTree addChild(SchemaGraph.TableRelationship tableRelationship, - CubeQueryContext cubeql, Map<String, Integer> aliasUsage) { - if (getSubtrees().get(tableRelationship) == null) { - JoinTree current = new JoinTree(this, tableRelationship, - this.depthFromRoot + 1); - // Set alias. Need to compute only when new node is being created. - // The following code ensures that For intermediate tables, aliases are given - // in the order citydim, citydim_0, citydim_1, ... - // And for destination tables, an alias will be decided from here but might be - // overridden outside this function. - AbstractCubeTable destTable = tableRelationship.getToTable(); - current.setAlias(cubeql.getAliasForTableName(destTable.getName())); - if (aliasUsage.get(current.getAlias()) == null) { - aliasUsage.put(current.getAlias(), 0); - } else { - aliasUsage.put(current.getAlias(), aliasUsage.get(current.getAlias()) + 1); - current.setAlias(current.getAlias() + "_" + (aliasUsage.get(current.getAlias()) - 1)); - } - getSubtrees().put(tableRelationship, current); - } - return getSubtrees().get(tableRelationship); - } - - // Recursive computation of number of edges. - public int getNumEdges() { - int ret = 0; - for (JoinTree tree : getSubtrees().values()) { - ret += 1; - ret += tree.getNumEdges(); - } - return ret; - } - - public boolean isLeaf() { - return getSubtrees().isEmpty(); - } - - // Breadth First Traversal. Unused currently. - public Iterator<JoinTree> bft() { - return new Iterator<JoinTree>() { - List<JoinTree> remaining = new ArrayList<JoinTree>() { - { - addAll(getSubtrees().values()); - } - }; - - @Override - public boolean hasNext() { - return remaining.isEmpty(); - } - - @Override - public JoinTree next() { - JoinTree retval = remaining.remove(0); - remaining.addAll(retval.getSubtrees().values()); - return retval; - } - - @Override - public void remove() { - throw new RuntimeException("Not implemented"); - } - }; - } - - // Depth first traversal of the tree. Used in forming join string. - public Iterator<JoinTree> dft() { - return new Iterator<JoinTree>() { - Stack<JoinTree> joinTreeStack = new Stack<JoinTree>() { - { - addAll(getSubtrees().values()); - } - }; - - @Override - public boolean hasNext() { - return !joinTreeStack.isEmpty(); - } - - @Override - public JoinTree next() { - JoinTree retval = joinTreeStack.pop(); - joinTreeStack.addAll(retval.getSubtrees().values()); - return retval; - } - - @Override - public void remove() { - throw new RuntimeException("Not implemented"); - } - }; - } - - public Set<JoinTree> leaves() { - Set<JoinTree> leaves = new HashSet<JoinTree>(); - Iterator<JoinTree> dft = dft(); - while (dft.hasNext()) { - JoinTree cur = dft.next(); - if (cur.isLeaf()) { - leaves.add(cur); - } - } - return leaves; - } -} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java index 67b3f40..f9636d1 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java @@ -23,6 +23,8 @@ import java.util.*; import org.apache.lens.cube.metadata.FactPartition; import org.apache.lens.cube.metadata.StorageConstants; +import org.apache.commons.lang.StringUtils; + public final class StorageUtil { private StorageUtil() { @@ -153,6 +155,10 @@ public final class StorageUtil { } public static String getWhereClause(CandidateDim dim, String alias) { - return getWhereClause(dim.getWhereClause(), alias); + if (!dim.isWhereClauseAdded(alias) && !StringUtils.isBlank(dim.getWhereClause())) { + return getWhereClause(dim.getWhereClause(), alias); + } else { + return null; + } } }
