http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java index 1a83d09..33ec9d9 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java @@ -28,7 +28,9 @@ import org.apache.lens.cube.error.ColUnAvailableInTimeRange; import org.apache.lens.cube.error.ColUnAvailableInTimeRangeException; import org.apache.lens.cube.error.LensCubeErrorCode; import org.apache.lens.cube.metadata.*; +import org.apache.lens.cube.metadata.join.JoinPath; import org.apache.lens.cube.parse.DenormalizationResolver.ReferencedQueriedColumn; +import org.apache.lens.cube.parse.join.AutoJoinContext; import org.apache.lens.server.api.error.LensException; import org.apache.commons.lang.StringUtils; @@ -197,14 +199,14 @@ class TimerangeResolver implements ContextRewriter { if (!column.isColumnAvailableInTimeRange(range)) { log.info("Timerange queried is not in column life for {}, Removing join paths containing the column", column); // Remove join paths containing this column - Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths = joinContext.getAllPaths(); + Map<Aliased<Dimension>, List<JoinPath>> allPaths = joinContext.getAllPaths(); for (Aliased<Dimension> dimension : allPaths.keySet()) { - List<SchemaGraph.JoinPath> joinPaths = allPaths.get(dimension); - Iterator<SchemaGraph.JoinPath> joinPathIterator = joinPaths.iterator(); + List<JoinPath> joinPaths = allPaths.get(dimension); + Iterator<JoinPath> joinPathIterator = joinPaths.iterator(); while (joinPathIterator.hasNext()) { - SchemaGraph.JoinPath path = joinPathIterator.next(); + JoinPath path = joinPathIterator.next(); if (path.containsColumnOfTable(col, (AbstractCubeTable) cubeql.getCube())) { log.info("Removing join path: {} as columns :{} is not available in the range", path, col); joinPathIterator.remove();
http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java new file mode 100644 index 0000000..993955a --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java @@ -0,0 +1,719 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse.join; + +import java.util.*; + +import org.apache.lens.cube.error.LensCubeErrorCode; +import org.apache.lens.cube.metadata.*; +import org.apache.lens.cube.metadata.join.JoinPath; +import org.apache.lens.cube.metadata.join.TableRelationship; +import org.apache.lens.cube.parse.*; +import org.apache.lens.server.api.error.LensException; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.ql.parse.JoinType; + +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; + +/** + * Store join chain information resolved by join resolver + */ +@Slf4j +public class AutoJoinContext { + // Map of a joined table to list of all possible paths from that table to + // the target + private final Map<Aliased<Dimension>, List<JoinPath>> allPaths; + private Set<Dimension> requiredDimensions; + @Getter + // Map of joined table to the join type (if provided by user) + private final Map<AbstractCubeTable, JoinType> tableJoinTypeMap; + + // True if joins were resolved automatically + private boolean joinsResolved; + // Target table for the auto join resolver + private final AbstractCubeTable autoJoinTarget; + // Configuration string to control join type + private String joinTypeCfg; + + // Map of a joined table to its columns which are part of any of the join + // paths. This is used in candidate table resolver + @Getter + private Map<Dimension, Map<AbstractCubeTable, List<String>>> joinPathFromColumns = new HashMap<>(); + + @Getter + private Map<Dimension, Map<AbstractCubeTable, List<String>>> joinPathToColumns = new HashMap<>(); + + // there can be separate join clause for each fact in-case of multi fact queries + @Getter + Map<CandidateFact, JoinClause> factClauses = new HashMap<>(); + @Getter + @Setter + JoinClause minCostClause; + private final boolean flattenBridgeTables; + private final String bridgeTableFieldAggr; + + public AutoJoinContext(Map<Aliased<Dimension>, List<JoinPath>> allPaths, + Set<Dimension> requiredDimensions, + Map<AbstractCubeTable, JoinType> tableJoinTypeMap, + AbstractCubeTable autoJoinTarget, String joinTypeCfg, boolean joinsResolved, + boolean flattenBridgeTables, String bridgeTableFieldAggr) { + this.allPaths = allPaths; + this.requiredDimensions = requiredDimensions; + initJoinPathColumns(); + this.tableJoinTypeMap = tableJoinTypeMap; + this.autoJoinTarget = autoJoinTarget; + this.joinTypeCfg = joinTypeCfg; + this.joinsResolved = joinsResolved; + this.flattenBridgeTables = flattenBridgeTables; + this.bridgeTableFieldAggr = bridgeTableFieldAggr; + log.debug("All join paths:{}", allPaths); + log.debug("Join path from columns:{}", joinPathFromColumns); + log.debug("Join path to columns:{}", joinPathToColumns); + } + + public AbstractCubeTable getAutoJoinTarget() { + return autoJoinTarget; + } + + private JoinClause getJoinClause(CandidateFact fact) { + if (fact == null || !factClauses.containsKey(fact)) { + return minCostClause; + } + return factClauses.get(fact); + } + + // Populate map of tables to their columns which are present in any of the + // join paths + private void initJoinPathColumns() { + for (List<JoinPath> paths : allPaths.values()) { + for (int i = 0; i < paths.size(); i++) { + JoinPath jp = paths.get(i); + jp.initColumnsForTable(); + } + } + refreshJoinPathColumns(); + } + + public void refreshJoinPathColumns() { + joinPathFromColumns.clear(); + joinPathToColumns.clear(); + for (Map.Entry<Aliased<Dimension>, List<JoinPath>> joinPathEntry : allPaths.entrySet()) { + List<JoinPath> joinPaths = joinPathEntry.getValue(); + Map<AbstractCubeTable, List<String>> fromColPaths = joinPathFromColumns.get(joinPathEntry.getKey().getObject()); + Map<AbstractCubeTable, List<String>> toColPaths = joinPathToColumns.get(joinPathEntry.getKey().getObject()); + if (fromColPaths == null) { + fromColPaths = new HashMap<>(); + joinPathFromColumns.put(joinPathEntry.getKey().getObject(), fromColPaths); + } + + if (toColPaths == null) { + toColPaths = new HashMap<>(); + joinPathToColumns.put(joinPathEntry.getKey().getObject(), toColPaths); + } + populateJoinPathCols(joinPaths, fromColPaths, toColPaths); + } + } + + private void populateJoinPathCols(List<JoinPath> joinPaths, + Map<AbstractCubeTable, List<String>> fromPathColumns, Map<AbstractCubeTable, List<String>> toPathColumns) { + for (JoinPath path : joinPaths) { + for (TableRelationship edge : path.getEdges()) { + AbstractCubeTable fromTable = edge.getFromTable(); + String fromColumn = edge.getFromColumn(); + List<String> columnsOfFromTable = fromPathColumns.get(fromTable); + if (columnsOfFromTable == null) { + columnsOfFromTable = new ArrayList<>(); + fromPathColumns.put(fromTable, columnsOfFromTable); + } + columnsOfFromTable.add(fromColumn); + + // Similarly populate for the 'to' table + AbstractCubeTable toTable = edge.getToTable(); + String toColumn = edge.getToColumn(); + List<String> columnsOfToTable = toPathColumns.get(toTable); + if (columnsOfToTable == null) { + columnsOfToTable = new ArrayList<>(); + toPathColumns.put(toTable, columnsOfToTable); + } + columnsOfToTable.add(toColumn); + } + } + } + + public void removeJoinedTable(Dimension dim) { + allPaths.remove(Aliased.create(dim)); + joinPathFromColumns.remove(dim); + } + + public String getFromString(String fromTable, CandidateFact fact, Set<Dimension> qdims, + Map<Dimension, CandidateDim> dimsToQuery, CubeQueryContext cubeql) throws LensException { + String fromString = fromTable; + log.info("All paths dump:{}", cubeql.getAutoJoinCtx().getAllPaths()); + if (qdims == null || qdims.isEmpty()) { + return fromString; + } + // Compute the merged join clause string for the min cost joinClause + String clause = getMergedJoinClause(cubeql, cubeql.getAutoJoinCtx().getJoinClause(fact), dimsToQuery); + + fromString += clause; + return fromString; + } + + // Some refactoring needed to account for multiple join paths + public String getMergedJoinClause(CubeQueryContext cubeql, JoinClause joinClause, + Map<Dimension, CandidateDim> dimsToQuery) { + Set<String> clauses = new LinkedHashSet<>(); + String joinTypeStr = ""; + JoinType joinType = JoinType.INNER; + + if (StringUtils.isNotBlank(joinTypeCfg)) { + joinType = JoinType.valueOf(joinTypeCfg.toUpperCase()); + joinTypeStr = JoinUtils.getJoinTypeStr(joinType); + } + + Iterator<JoinTree> iter = joinClause.getJoinTree().dft(); + boolean hasBridgeTable = false; + boolean initedBridgeClauses = false; + StringBuilder bridgeSelectClause = new StringBuilder(); + StringBuilder bridgeFromClause = new StringBuilder(); + StringBuilder bridgeFilterClause = new StringBuilder(); + StringBuilder bridgeJoinClause = new StringBuilder(); + StringBuilder bridgeGroupbyClause = new StringBuilder(); + + while (iter.hasNext()) { + JoinTree cur = iter.next(); + TableRelationship rel = cur.parentRelationship; + String toAlias, fromAlias; + fromAlias = cur.parent.getAlias(); + toAlias = cur.getAlias(); + hasBridgeTable = flattenBridgeTables && (hasBridgeTable || rel.isMapsToMany()); + // We have to push user specified filters for the joined tables + String userFilter = null; + // Partition condition on the tables also needs to be pushed depending + // on the join + String storageFilter = null; + + if (JoinType.INNER == joinType || JoinType.LEFTOUTER == joinType || JoinType.LEFTSEMI == joinType) { + // For inner and left joins push filter of right table + storageFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias); + dimsToQuery.get(rel.getToTable()).setWhereClauseAdded(toAlias); + } else if (JoinType.RIGHTOUTER == joinType) { + // For right outer joins, push filters of left table + if (rel.getFromTable() instanceof Dimension) { + storageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias); + dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded(fromAlias); + } + } else if (JoinType.FULLOUTER == joinType) { + // For full outer we need to push filters of both left and right + // tables in the join clause + String leftFilter = null, rightFilter = null; + String leftStorageFilter = null, rightStorgeFilter = null; + + if (rel.getFromTable() instanceof Dimension) { + leftStorageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias); + if (StringUtils.isNotBlank((leftStorageFilter))) { + dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded(fromAlias); + } + } + + rightStorgeFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias); + if (StringUtils.isNotBlank(rightStorgeFilter)) { + if (StringUtils.isNotBlank((leftStorageFilter))) { + leftStorageFilter += " and "; + } + dimsToQuery.get(rel.getToTable()).setWhereClauseAdded(toAlias); + } + + userFilter = (leftFilter == null ? "" : leftFilter) + (rightFilter == null ? "" : rightFilter); + storageFilter = + (leftStorageFilter == null ? "" : leftStorageFilter) + + (rightStorgeFilter == null ? "" : rightStorgeFilter); + } + StringBuilder clause = new StringBuilder(); + + // if a bridge table is present in the path + if (hasBridgeTable) { + // if any relation has bridge table, the clause becomes the following : + // join (" select " + joinkey + " aggr over fields from bridge table + from bridgeTable + [where user/storage + // filters] + groupby joinkey) on joincond" + // Or + // " join (select " + joinkey + " aggr over fields from table reached through bridge table + from bridge table + // join <next tables> on join condition + [and user/storage filters] + groupby joinkey) on joincond + if (!initedBridgeClauses) { + // we just found a bridge table in the path we need to initialize the clauses for subquery required for + // aggregating fields of bridge table + // initiliaze select clause with join key + bridgeSelectClause.append(" (select ").append(toAlias).append(".").append(rel.getToColumn()).append(" as ") + .append(rel.getToColumn()); + // group by join key + bridgeGroupbyClause.append(" group by ").append(toAlias).append(".").append(rel.getToColumn()); + // from clause with bridge table + bridgeFromClause.append(" from ").append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias)); + // we need to initialize filter clause with user filter clause or storage filter if applicable + if (StringUtils.isNotBlank(userFilter)) { + bridgeFilterClause.append(userFilter); + } + if (StringUtils.isNotBlank(storageFilter)) { + if (StringUtils.isNotBlank(bridgeFilterClause.toString())) { + bridgeFilterClause.append(" and "); + } + bridgeFilterClause.append(storageFilter); + } + // initialize final join clause + bridgeJoinClause.append(" on ").append(fromAlias).append(".") + .append(rel.getFromColumn()).append(" = ").append("%s") + .append(".").append(rel.getToColumn()); + initedBridgeClauses = true; + } else { + // if bridge clauses are already inited, this is a next table getting joined with bridge table + // we will append a simple join clause + bridgeFromClause.append(joinTypeStr).append(" join "); + bridgeFromClause.append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias)); + bridgeFromClause.append(" on ").append(fromAlias).append(".") + .append(rel.getFromColumn()).append(" = ").append(toAlias) + .append(".").append(rel.getToColumn()); + + if (StringUtils.isNotBlank(userFilter)) { + bridgeFromClause.append(" and ").append(userFilter); + } + if (StringUtils.isNotBlank(storageFilter)) { + bridgeFromClause.append(" and ").append(storageFilter); + } + } + if (cubeql.getTblAliasToColumns().get(toAlias) != null + && !cubeql.getTblAliasToColumns().get(toAlias).isEmpty()) { + // there are fields selected from this table after seeing bridge table in path + // we should make subQuery for this selection + clause.append(joinTypeStr).append(" join "); + clause.append(bridgeSelectClause.toString()); + for (String col : cubeql.getTblAliasToColumns().get(toAlias)) { + clause.append(",").append(bridgeTableFieldAggr).append("(").append(toAlias) + .append(".").append(col) + .append(")") + .append(" as ").append(col); + } + String bridgeFrom = bridgeFromClause.toString(); + clause.append(bridgeFrom); + String bridgeFilter = bridgeFilterClause.toString(); + if (StringUtils.isNotBlank(bridgeFilter)) { + if (bridgeFrom.contains(" join ")) { + clause.append(" and "); + } else { + clause.append(" where"); + } + clause.append(bridgeFilter); + } + clause.append(bridgeGroupbyClause.toString()); + clause.append(") ").append(toAlias); + clause.append(String.format(bridgeJoinClause.toString(), toAlias)); + clauses.add(clause.toString()); + } + if (cur.getSubtrees().isEmpty()) { + // clear bridge flags and builders, as there are no more clauses in this tree. + hasBridgeTable = false; + initedBridgeClauses = false; + bridgeSelectClause.setLength(0); + bridgeFromClause.setLength(0); + bridgeFilterClause.setLength(0); + bridgeJoinClause.setLength(0); + bridgeGroupbyClause.setLength(0); + } + } else { + // Simple join clause is : + // joinType + " join " + destTable + " on " + joinCond + [" and" + userFilter] + ["and" + storageFilter] + clause.append(joinTypeStr).append(" join "); + //Add storage table name followed by alias + clause.append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias)); + clause.append(" on ").append(fromAlias).append(".") + .append(rel.getFromColumn()).append(" = ").append(toAlias) + .append(".").append(rel.getToColumn()); + + if (StringUtils.isNotBlank(userFilter)) { + clause.append(" and ").append(userFilter); + } + if (StringUtils.isNotBlank(storageFilter)) { + clause.append(" and ").append(storageFilter); + } + clauses.add(clause.toString()); + } + } + return StringUtils.join(clauses, ""); + } + + public Set<Dimension> getDimsOnPath(Map<Aliased<Dimension>, List<TableRelationship>> joinChain, + Set<Dimension> qdims) { + Set<Dimension> dimsOnPath = new HashSet<>(); + for (Map.Entry<Aliased<Dimension>, List<TableRelationship>> entry : joinChain.entrySet()) { + List<TableRelationship> chain = entry.getValue(); + Dimension table = entry.getKey().getObject(); + + // check if join with this dimension is required + if (!qdims.contains(table)) { + continue; + } + + for (int i = chain.size() - 1; i >= 0; i--) { + TableRelationship rel = chain.get(i); + dimsOnPath.add((Dimension) rel.getToTable()); + } + } + return dimsOnPath; + } + + private String getStorageFilter(Map<Dimension, CandidateDim> dimsToQuery, AbstractCubeTable table, String alias) { + String whereClause = ""; + if (dimsToQuery != null && dimsToQuery.get(table) != null) { + if (StringUtils.isNotBlank(dimsToQuery.get(table).getWhereClause())) { + whereClause = dimsToQuery.get(table).getWhereClause(); + if (alias != null) { + whereClause = StorageUtil.getWhereClause(whereClause, alias); + } + } + } + return whereClause; + } + + /** + * @return the joinsResolved + */ + public boolean isJoinsResolved() { + return joinsResolved; + } + + // Includes both queried join paths and optional join paths + public Set<String> getAllJoinPathColumnsOfTable(AbstractCubeTable table) { + Set<String> allPaths = new HashSet<>(); + for (Map<AbstractCubeTable, List<String>> optPaths : joinPathFromColumns.values()) { + if (optPaths.get(table) != null) { + allPaths.addAll(optPaths.get(table)); + } + } + + for (Map<AbstractCubeTable, List<String>> optPaths : joinPathToColumns.values()) { + if (optPaths.get(table) != null) { + allPaths.addAll(optPaths.get(table)); + } + } + + return allPaths; + } + + public void pruneAllPaths(CubeInterface cube, final Set<CandidateFact> cfacts, + final Map<Dimension, CandidateDim> dimsToQuery) throws LensException { + // Remove join paths which cannot be satisfied by the resolved candidate + // fact and dimension tables + if (cfacts != null) { + // include columns from all picked facts + Set<String> factColumns = new HashSet<>(); + for (CandidateFact cFact : cfacts) { + factColumns.addAll(cFact.getColumns()); + } + + for (List<JoinPath> paths : allPaths.values()) { + for (int i = 0; i < paths.size(); i++) { + JoinPath jp = paths.get(i); + List<String> cubeCols = jp.getColumnsForTable((AbstractCubeTable) cube); + if (cubeCols != null && !factColumns.containsAll(cubeCols)) { + // This path requires some columns from the cube which are not + // present in the candidate fact + // Remove this path + log.info("Removing join path:{} as columns :{} dont exist", jp, cubeCols); + paths.remove(i); + i--; + } + } + } + pruneEmptyPaths(allPaths); + } + pruneAllPaths(dimsToQuery); + } + + /** + * Prunes allPaths by removing paths which contain columns that are not present in any candidate dims. + * + * @param candidateDims candidate dimensions + */ + public void pruneAllPathsForCandidateDims(Map<Dimension, Set<CandidateDim>> candidateDims) throws LensException { + Map<Dimension, Set<String>> dimColumns = new HashMap<>(); + // populate all columns present in candidate dims for each dimension + for (Map.Entry<Dimension, Set<CandidateDim>> entry : candidateDims.entrySet()) { + Dimension dim = entry.getKey(); + Set<String> allColumns = new HashSet<>(); + for (CandidateDim cdim : entry.getValue()) { + allColumns.addAll(cdim.getColumns()); + } + dimColumns.put(dim, allColumns); + } + for (List<JoinPath> paths : allPaths.values()) { + for (int i = 0; i < paths.size(); i++) { + JoinPath jp = paths.get(i); + for (AbstractCubeTable refTable : jp.getAllTables()) { + List<String> cols = jp.getColumnsForTable(refTable); + if (refTable instanceof Dimension) { + if (cols != null && (dimColumns.get(refTable) == null || !dimColumns.get(refTable).containsAll(cols))) { + // This path requires some columns from the cube which are not present in any candidate dim + // Remove this path + log.info("Removing join path:{} as columns :{} don't exist", jp, cols); + paths.remove(i); + i--; + break; + } + } + } + } + } + pruneEmptyPaths(allPaths); + } + + private void pruneEmptyPaths(Map<Aliased<Dimension>, List<JoinPath>> allPaths) throws LensException { + Iterator<Map.Entry<Aliased<Dimension>, List<JoinPath>>> iter = allPaths.entrySet().iterator(); + Set<Dimension> noPathDims = new HashSet<>(); + while (iter.hasNext()) { + Map.Entry<Aliased<Dimension>, List<JoinPath>> entry = iter.next(); + if (entry.getValue().isEmpty()) { + noPathDims.add(entry.getKey().getObject()); + iter.remove(); + } + } + noPathDims.retainAll(requiredDimensions); + + if (!noPathDims.isEmpty()) { + throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(), autoJoinTarget.getName(), + noPathDims.toString()); + } + } + + private Map<Aliased<Dimension>, List<JoinPath>> pruneFactPaths(CubeInterface cube, + final CandidateFact cFact) throws LensException { + Map<Aliased<Dimension>, List<JoinPath>> prunedPaths = new HashMap<>(); + // Remove join paths which cannot be satisfied by the candidate fact + for (Map.Entry<Aliased<Dimension>, List<JoinPath>> ppaths : allPaths.entrySet()) { + prunedPaths.put(ppaths.getKey(), new ArrayList<>(ppaths.getValue())); + List<JoinPath> paths = prunedPaths.get(ppaths.getKey()); + for (int i = 0; i < paths.size(); i++) { + JoinPath jp = paths.get(i); + List<String> cubeCols = jp.getColumnsForTable((AbstractCubeTable) cube); + if (cubeCols != null && !cFact.getColumns().containsAll(cubeCols)) { + // This path requires some columns from the cube which are not + // present in the candidate fact + // Remove this path + log.info("Removing join path:{} as columns :{} don't exist", jp, cubeCols); + paths.remove(i); + i--; + } + } + } + pruneEmptyPaths(prunedPaths); + return prunedPaths; + } + + private void pruneAllPaths(final Map<Dimension, CandidateDim> dimsToQuery) throws LensException { + // Remove join paths which cannot be satisfied by the resolved dimension + // tables + if (dimsToQuery != null && !dimsToQuery.isEmpty()) { + for (CandidateDim candidateDim : dimsToQuery.values()) { + Set<String> dimCols = candidateDim.getTable().getAllFieldNames(); + for (List<JoinPath> paths : allPaths.values()) { + for (int i = 0; i < paths.size(); i++) { + JoinPath jp = paths.get(i); + List<String> candidateDimCols = jp.getColumnsForTable(candidateDim.getBaseTable()); + if (candidateDimCols != null && !dimCols.containsAll(candidateDimCols)) { + // This path requires some columns from the dimension which are + // not present in the candidate dim + // Remove this path + log.info("Removing join path:{} as columns :{} dont exist", jp, candidateDimCols); + paths.remove(i); + i--; + } + } + } + } + pruneEmptyPaths(allPaths); + } + } + + /** + * There can be multiple join paths between a dimension and the target. Set of all possible join clauses is the + * cartesian product of join paths of all dimensions + */ + private Iterator<JoinClause> getJoinClausesForAllPaths(final CandidateFact fact, + final Set<Dimension> qDims, final CubeQueryContext cubeql) throws LensException { + Map<Aliased<Dimension>, List<JoinPath>> allPaths; + // if fact is passed only look at paths possible from fact to dims + if (fact != null) { + allPaths = pruneFactPaths(cubeql.getCube(), fact); + } else { + allPaths = new LinkedHashMap<>(this.allPaths); + } + // prune allPaths with qdims + pruneAllPathsWithQueriedDims(allPaths, qDims); + + // Number of paths in each path set + final int[] groupSizes = new int[allPaths.values().size()]; + // Total number of elements in the cartesian product + int numSamples = 1; + // All path sets + final List<List<JoinPath>> pathSets = new ArrayList<>(); + // Dimension corresponding to the path sets + final List<Aliased<Dimension>> dimensions = new ArrayList<>(groupSizes.length); + + int i = 0; + for (Map.Entry<Aliased<Dimension>, List<JoinPath>> entry : allPaths.entrySet()) { + dimensions.add(entry.getKey()); + List<JoinPath> group = entry.getValue(); + pathSets.add(group); + groupSizes[i] = group.size(); + numSamples *= groupSizes[i]; + i++; + } + + final int[] selection = new int[groupSizes.length]; + final int MAX_SAMPLE_COUNT = numSamples; + + // Return a lazy iterator over all possible join chains + return new Iterator<JoinClause>() { + int sample = 0; + + @Override + public boolean hasNext() { + return sample < MAX_SAMPLE_COUNT; + } + + @Override + public JoinClause next() { + Map<Aliased<Dimension>, List<TableRelationship>> chain = new LinkedHashMap<>(); + //generate next permutation. + for (int i = groupSizes.length - 1, base = sample; i >= 0; base /= groupSizes[i], i--) { + selection[i] = base % groupSizes[i]; + } + for (int i = 0; i < selection.length; i++) { + int selectedPath = selection[i]; + List<TableRelationship> path = pathSets.get(i).get(selectedPath).getEdges(); + chain.put(dimensions.get(i), path); + } + + Set<Dimension> dimsOnPath = getDimsOnPath(chain, qDims); + + sample++; + // Cost of join = number of tables joined in the clause + return new JoinClause(cubeql, chain, dimsOnPath); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Cannot remove elements!"); + } + }; + } + + /** + * Given allPaths, it will remove entries where key is a non-join chain dimension and not contained in qdims + * + * @param allPaths All join paths + * @param qDims queried dimensions + */ + private void pruneAllPathsWithQueriedDims(Map<Aliased<Dimension>, List<JoinPath>> allPaths, + Set<Dimension> qDims) { + Iterator<Map.Entry<Aliased<Dimension>, List<JoinPath>>> iterator = allPaths.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry<Aliased<Dimension>, List<JoinPath>> cur = iterator.next(); + if (!qDims.contains(cur.getKey().getObject())) { + log.info("removing from allPaths: {}", cur); + iterator.remove(); + } + } + } + + public Set<Dimension> pickOptionalTables(final CandidateFact fact, + Set<Dimension> qdims, CubeQueryContext cubeql) throws LensException { + // Find the min cost join clause and add dimensions in the clause as optional dimensions + Set<Dimension> joiningOptionalTables = new HashSet<>(); + if (qdims == null) { + return joiningOptionalTables; + } + // find least cost path + Iterator<JoinClause> itr = getJoinClausesForAllPaths(fact, qdims, cubeql); + JoinClause minCostClause = null; + while (itr.hasNext()) { + JoinClause clause = itr.next(); + if (minCostClause == null || minCostClause.getCost() > clause.getCost()) { + minCostClause = clause; + } + } + + if (minCostClause == null) { + throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(), + qdims.toString(), autoJoinTarget.getName()); + } + + log.info("Fact: {} minCostClause:{}", fact, minCostClause); + if (fact != null) { + cubeql.getAutoJoinCtx().getFactClauses().put(fact, minCostClause); + } else { + cubeql.getAutoJoinCtx().setMinCostClause(minCostClause); + } + for (Dimension dim : minCostClause.getDimsInPath()) { + if (!qdims.contains(dim)) { + joiningOptionalTables.add(dim); + } + } + + minCostClause.initChainColumns(); + // prune candidate dims of joiningOptionalTables wrt joining columns + for (Dimension dim : joiningOptionalTables) { + for (Iterator<CandidateDim> i = cubeql.getCandidateDimTables().get(dim).iterator(); i.hasNext();) { + CandidateDim cDim = i.next(); + if (!cDim.getColumns().containsAll(minCostClause.chainColumns.get(dim))) { + i.remove(); + log.info("Not considering dimTable:{} as its columns are not part of any join paths. Join columns:{}", + cDim.getTable(), minCostClause.chainColumns.get(dim)); + cubeql.addDimPruningMsgs(dim, cDim.getTable(), + CandidateTablePruneCause.noColumnPartOfAJoinPath(minCostClause.chainColumns.get(dim))); + } + } + if (cubeql.getCandidateDimTables().get(dim).size() == 0) { + throw new LensException(LensCubeErrorCode.NO_DIM_HAS_COLUMN.getLensErrorInfo(), dim.getName(), + minCostClause.chainColumns.get(dim).toString()); + } + } + + return joiningOptionalTables; + } + + public Map<Aliased<Dimension>, List<JoinPath>> getAllPaths() { + return allPaths; + } + + public boolean isReachableDim(Dimension dim) { + Aliased<Dimension> aliased = Aliased.create(dim); + return isReachableDim(aliased); + } + + public boolean isReachableDim(Dimension dim, String alias) { + Aliased<Dimension> aliased = Aliased.create(dim, alias); + return isReachableDim(aliased); + } + + private boolean isReachableDim(Aliased<Dimension> aliased) { + return allPaths.containsKey(aliased) && !allPaths.get(aliased).isEmpty(); + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinClause.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinClause.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinClause.java new file mode 100644 index 0000000..acc9d5c --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinClause.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse.join; + +import java.util.*; + +import org.apache.lens.cube.metadata.AbstractCubeTable; +import org.apache.lens.cube.metadata.Dimension; +import org.apache.lens.cube.metadata.join.TableRelationship; +import org.apache.lens.cube.parse.Aliased; +import org.apache.lens.cube.parse.CubeQueryContext; + +import lombok.Getter; +import lombok.ToString; + +@ToString +public class JoinClause implements Comparable<JoinClause> { + private final int cost; + // all dimensions in path except target + @Getter + private final Set<Dimension> dimsInPath; + private CubeQueryContext cubeql; + private final Map<Aliased<Dimension>, List<TableRelationship>> chain; + @Getter + private final JoinTree joinTree; + transient Map<AbstractCubeTable, Set<String>> chainColumns = new HashMap<>(); + + public JoinClause(CubeQueryContext cubeql, Map<Aliased<Dimension>, + List<TableRelationship>> chain, Set<Dimension> dimsInPath) { + this.cubeql = cubeql; + this.chain = chain; + this.joinTree = mergeJoinChains(chain); + this.cost = joinTree.getNumEdges(); + this.dimsInPath = dimsInPath; + } + + void initChainColumns() { + for (List<TableRelationship> path : chain.values()) { + for (TableRelationship edge : path) { + Set<String> fcols = chainColumns.get(edge.getFromTable()); + if (fcols == null) { + fcols = new HashSet<>(); + chainColumns.put(edge.getFromTable(), fcols); + } + fcols.add(edge.getFromColumn()); + + Set<String> tocols = chainColumns.get(edge.getToTable()); + if (tocols == null) { + tocols = new HashSet<>(); + chainColumns.put(edge.getToTable(), tocols); + } + tocols.add(edge.getToColumn()); + } + } + } + + public int getCost() { + return cost; + } + + @Override + public int compareTo(JoinClause joinClause) { + return cost - joinClause.getCost(); + } + + /** + * Takes chains and merges them in the form of a tree. If two chains have some common path till some table and + * bifurcate from there, then in the chain, both paths will have the common path but the resultant tree will have + * single path from root(cube) to that table and paths will bifurcate from there. + * <p/> + * For example, citystate = [basecube.cityid=citydim.id], [citydim.stateid=statedim.id] + * cityzip = [basecube.cityid=citydim.id], [citydim.zipcode=zipdim.code] + * <p/> + * Without merging, the behaviour is like this: + * <p/> + * <p/> + * (basecube.cityid=citydim.id) (citydim.stateid=statedim.id) + * _____________________________citydim____________________________________statedim + * | + * basecube------| + * |_____________________________citydim____________________________________zipdim + * + * (basecube.cityid=citydim.id) (citydim.zipcode=zipdim.code) + * + * <p/> + * Merging will result in a tree like following + * <p/> (citydim.stateid=statedim.id) + * <p/> ________________________________ statedim + * (basecube.cityid=citydim.id) | + * basecube-------------------------------citydim---- | + * |________________________________ zipdim + * + * (citydim.zipcode=zipdim.code) + * + * <p/> + * Doing this will reduce the number of joins wherever possible. + * + * @param chain Joins in Linear format. + * @return Joins in Tree format + */ + public JoinTree mergeJoinChains(Map<Aliased<Dimension>, List<TableRelationship>> chain) { + Map<String, Integer> aliasUsage = new HashMap<>(); + JoinTree root = JoinTree.createRoot(); + for (Map.Entry<Aliased<Dimension>, List<TableRelationship>> entry : chain.entrySet()) { + JoinTree current = root; + // Last element in this list is link from cube to first dimension + for (int i = entry.getValue().size() - 1; i >= 0; i--) { + // Adds a child if needed, or returns a child already existing corresponding to the given link. + current = current.addChild(entry.getValue().get(i), cubeql, aliasUsage); + } + // This is a destination table. Decide alias separately. e.g. chainname + // nullcheck is necessary because dimensions can be destinations too. In that case getAlias() == null + if (entry.getKey().getAlias() != null) { + current.setAlias(entry.getKey().getAlias()); + } + } + if (root.getSubtrees().size() > 0) { + root.setAlias(cubeql.getAliasForTableName( + root.getSubtrees().keySet().iterator().next().getFromTable().getName())); + } + return root; + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinTree.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinTree.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinTree.java new file mode 100644 index 0000000..197847c --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinTree.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse.join; + +import java.util.*; + +import org.apache.lens.cube.metadata.AbstractCubeTable; +import org.apache.lens.cube.metadata.join.TableRelationship; +import org.apache.lens.cube.parse.CubeQueryContext; + +import org.apache.hadoop.hive.ql.parse.JoinType; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +@Data +@ToString(exclude = "parent") +@EqualsAndHashCode(exclude = "parent") +public class JoinTree { + //parent of the node + JoinTree parent; + // current table is parentRelationship.destTable; + TableRelationship parentRelationship; + // Alias for the join clause + String alias; + private Map<TableRelationship, JoinTree> subtrees = new LinkedHashMap<>(); + // Number of nodes from root to this node. depth of root is 0. Unused for now. + private int depthFromRoot; + // join type of the current table. + JoinType joinType; + + public static JoinTree createRoot() { + return new JoinTree(null, null, 0); + } + + public JoinTree(JoinTree parent, TableRelationship tableRelationship, + int depthFromRoot) { + this.parent = parent; + this.parentRelationship = tableRelationship; + this.depthFromRoot = depthFromRoot; + } + + public JoinTree addChild(TableRelationship tableRelationship, + CubeQueryContext query, Map<String, Integer> aliasUsage) { + if (getSubtrees().get(tableRelationship) == null) { + JoinTree current = new JoinTree(this, tableRelationship, + this.depthFromRoot + 1); + // Set alias. Need to compute only when new node is being created. + // The following code ensures that For intermediate tables, aliases are given + // in the order cityDim, cityDim_0, cityDim_1, ... + // And for destination tables, an alias will be decided from here but might be + // overridden outside this function. + AbstractCubeTable destTable = tableRelationship.getToTable(); + current.setAlias(query.getAliasForTableName(destTable.getName())); + if (aliasUsage.get(current.getAlias()) == null) { + aliasUsage.put(current.getAlias(), 0); + } else { + aliasUsage.put(current.getAlias(), aliasUsage.get(current.getAlias()) + 1); + current.setAlias(current.getAlias() + "_" + (aliasUsage.get(current.getAlias()) - 1)); + } + getSubtrees().put(tableRelationship, current); + } + return getSubtrees().get(tableRelationship); + } + + // Recursive computation of number of edges. + public int getNumEdges() { + int ret = 0; + for (JoinTree tree : getSubtrees().values()) { + ret += 1; + ret += tree.getNumEdges(); + } + return ret; + } + + public boolean isLeaf() { + return getSubtrees().isEmpty(); + } + + // Breadth First Traversal. Unused currently. + public Iterator<JoinTree> bft() { + return new Iterator<JoinTree>() { + List<JoinTree> remaining = new ArrayList<JoinTree>() { + { + addAll(getSubtrees().values()); + } + }; + + @Override + public boolean hasNext() { + return remaining.isEmpty(); + } + + @Override + public JoinTree next() { + JoinTree retVal = remaining.remove(0); + remaining.addAll(retVal.getSubtrees().values()); + return retVal; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented"); + } + }; + } + + // Depth first traversal of the tree. Used in forming join string. + public Iterator<JoinTree> dft() { + return new Iterator<JoinTree>() { + Stack<JoinTree> joinTreeStack = new Stack<JoinTree>() { + { + addAll(getSubtrees().values()); + } + }; + + @Override + public boolean hasNext() { + return !joinTreeStack.isEmpty(); + } + + @Override + public JoinTree next() { + JoinTree retVal = joinTreeStack.pop(); + joinTreeStack.addAll(retVal.getSubtrees().values()); + return retVal; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented"); + } + }; + } + + public Set<JoinTree> leaves() { + Set<JoinTree> leaves = new HashSet<>(); + Iterator<JoinTree> dft = dft(); + while (dft.hasNext()) { + JoinTree cur = dft.next(); + if (cur.isLeaf()) { + leaves.add(cur); + } + } + return leaves; + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinUtils.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinUtils.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinUtils.java new file mode 100644 index 0000000..4efa67b --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/JoinUtils.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse.join; + +import org.apache.hadoop.hive.ql.parse.JoinType; + +public class JoinUtils { + + private JoinUtils() { + } + + public static String getJoinTypeStr(JoinType joinType) { + if (joinType == null) { + return ""; + } + switch (joinType) { + case FULLOUTER: + return " full outer"; + case INNER: + return " inner"; + case LEFTOUTER: + return " left outer"; + case LEFTSEMI: + return " left semi"; + case UNIQUE: + return " unique"; + case RIGHTOUTER: + return " right outer"; + default: + return ""; + } + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/908530f5/lens-cube/src/test/java/org/apache/lens/cube/metadata/TestCubeMetastoreClient.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/metadata/TestCubeMetastoreClient.java b/lens-cube/src/test/java/org/apache/lens/cube/metadata/TestCubeMetastoreClient.java index 1638825..0c4871c 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/metadata/TestCubeMetastoreClient.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/metadata/TestCubeMetastoreClient.java @@ -30,7 +30,7 @@ import java.util.*; import org.apache.lens.cube.error.LensCubeErrorCode; import org.apache.lens.cube.metadata.ExprColumn.ExprSpec; -import org.apache.lens.cube.metadata.ReferencedDimAtrribute.ChainRefCol; +import org.apache.lens.cube.metadata.ReferencedDimAttribute.ChainRefCol; import org.apache.lens.cube.metadata.timeline.EndsAndHolesPartitionTimeline; import org.apache.lens.cube.metadata.timeline.PartitionTimeline; import org.apache.lens.cube.metadata.timeline.StoreAllPartitionTimeline; @@ -147,7 +147,7 @@ public class TestCubeMetastoreClient { CubeMetastoreClient.close(); } - private static void defineCube(String cubeName, String cubeNameWithProps, String derivedCubeName, + private static void defineCube(final String cubeName, String cubeNameWithProps, String derivedCubeName, String derivedCubeNameWithProps) throws LensException { cubeMeasures = new HashSet<>(); cubeMeasures.add(new ColumnMeasure( @@ -177,21 +177,16 @@ public class TestCubeMetastoreClient { } cubeDimensions = new HashSet<>(); List<CubeDimAttribute> locationHierarchy = new ArrayList<>(); - locationHierarchy.add(new ReferencedDimAtrribute(new FieldSchema("zipcode", "int", "zip"), "Zip refer", - new TableReference("zipdim", "zipcode"))); - locationHierarchy.add(new ReferencedDimAtrribute(new FieldSchema("cityid", "int", "city"), "City refer", - new TableReference("citydim", "id"))); - locationHierarchy.add(new ReferencedDimAtrribute(new FieldSchema("stateid", "int", "state"), "State refer", - new TableReference("statedim", "id"))); - locationHierarchy.add(new ReferencedDimAtrribute(new FieldSchema("countryid", "int", "country"), "Country refer", - new TableReference("countrydim", "id"))); + locationHierarchy.add(new BaseDimAttribute(new FieldSchema("zipcode", "int", "zip"))); + locationHierarchy.add(new BaseDimAttribute(new FieldSchema("cityid", "int", "city"))); + locationHierarchy.add(new BaseDimAttribute(new FieldSchema("stateid", "int", "state"))); + locationHierarchy.add(new BaseDimAttribute(new FieldSchema("countryid", "int", "country"))); List<String> regions = Arrays.asList("APAC", "EMEA", "USA"); locationHierarchy.add(new BaseDimAttribute(new FieldSchema("regionname", "string", "region"), "regionname", null, null, null, null, regions)); cubeDimensions.add(new HierarchicalDimAttribute("location", "location hierarchy", locationHierarchy)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim1", "string", "basedim"))); - cubeDimensions.add(new ReferencedDimAtrribute(new FieldSchema("dim2", "id", "ref dim"), "Dim2 refer", - new TableReference("testdim2", "id"))); + cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim2", "id", "ref dim"), "Dim2 refer", null, null, null)); Set<CubeDimAttribute> dummyDimAttributes = Sets.newHashSet(); for (int i = 0; i < 5000; i++) { dummyDimAttributes.add(new BaseDimAttribute(new FieldSchema("dummy_dim" + i, "string", "dummy dim " + i), @@ -224,15 +219,14 @@ public class TestCubeMetastoreClient { "SUBSTR EXPR", expr1, expr2)); List<CubeDimAttribute> locationHierarchyWithStartTime = new ArrayList<>(); - locationHierarchyWithStartTime.add(new ReferencedDimAtrribute(new FieldSchema("zipcode2", "int", "zip"), - "Zip refer2", new TableReference("zipdim", "zipcode"), NOW, NOW, - 100.0, true, 1000L)); - locationHierarchyWithStartTime.add(new ReferencedDimAtrribute(new FieldSchema("cityid2", "int", "city"), - "City refer2", new TableReference("citydim", "id"), NOW, null, null)); - locationHierarchyWithStartTime.add(new ReferencedDimAtrribute(new FieldSchema("stateid2", "int", "state"), - "state refer2", new TableReference("statedim", "id"), NOW, null, 100.0)); - locationHierarchyWithStartTime.add(new ReferencedDimAtrribute(new FieldSchema("countryid2", "int", "country"), - "Country refer2", new TableReference("countrydim", "id"), null, null, null)); + locationHierarchyWithStartTime.add(new BaseDimAttribute(new FieldSchema("zipcode2", "int", "zip"), + "Zip refer2", NOW, NOW, 100.0, 1000L)); + locationHierarchyWithStartTime.add(new BaseDimAttribute(new FieldSchema("cityid2", "int", "city"), + "City refer2", NOW, null, null)); + locationHierarchyWithStartTime.add(new BaseDimAttribute(new FieldSchema("stateid2", "int", "state"), + "state refer2", NOW, null, 100.0)); + locationHierarchyWithStartTime.add(new BaseDimAttribute(new FieldSchema("countryid2", "int", "country"), + "Country refer2", null, null, null)); locationHierarchyWithStartTime.add(new BaseDimAttribute(new FieldSchema("regionname2", "string", "region"), "regionname2", null, null, null, null, regions)); @@ -240,19 +234,10 @@ public class TestCubeMetastoreClient { .add(new HierarchicalDimAttribute("location2", "localtion hierarchy2", locationHierarchyWithStartTime)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim1startTime", "string", "basedim"), "Dim With starttime", NOW, null, 100.0)); - cubeDimensions.add(new ReferencedDimAtrribute(new FieldSchema("dim2start", "string", "ref dim"), - "Dim2 with starttime", new TableReference("testdim2", "id"), - NOW, NOW, 100.0)); + cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim2start", "string", "ref dim"), + "Dim2 with starttime", NOW, NOW, 100.0)); - List<TableReference> multiRefs = new ArrayList<>(); - multiRefs.add(new TableReference("testdim2", "id")); - multiRefs.add(new TableReference("testdim3", "id")); - multiRefs.add(new TableReference("testdim4", "id")); - - cubeDimensions.add(new ReferencedDimAtrribute(new FieldSchema("dim3", "string", "multi ref dim"), "Dim3 refer", - multiRefs)); - cubeDimensions.add(new ReferencedDimAtrribute(new FieldSchema("dim3start", "string", "multi ref dim"), - "Dim3 with starttime", multiRefs, NOW, null, 100.0)); + cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim3", "string", "multi ref dim"))); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("region", "string", "region dim"), "region", null, null, null, null, regions)); @@ -278,7 +263,81 @@ public class TestCubeMetastoreClient { chain.add(new TableReference("citydim", "id")); cityChain.addPath(chain); joinChains.add(cityChain); - cubeDimensions.add(new ReferencedDimAtrribute(new FieldSchema("zipcityname", "string", "zip city name"), + joinChains.add(new JoinChain("cubeState", "cube-state", "state thru cube") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "stateid")); + add(new TableReference("statedim", "id")); + } + }); + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "stateid2")); + add(new TableReference("statedim", "id")); + } + }); + } + }); + joinChains.add(new JoinChain("cubeCountry", "cube-country", "country thru cube") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "countryid")); + add(new TableReference("countrydim", "id")); + } + }); + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "countryid2")); + add(new TableReference("countrydim", "id")); + } + }); + } + }); + joinChains.add(new JoinChain("dim2chain", "cube-dim2", "state thru cube") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "dim2")); + add(new TableReference("testdim2", "id")); + } + }); + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "dim2start")); + add(new TableReference("testdim2", "id")); + } + }); + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "dim3")); + add(new TableReference("testdim2", "id")); + } + }); + } + }); + joinChains.add(new JoinChain("dim3chain", "cube-dim3", "state thru cube") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "dim3")); + add(new TableReference("testdim3", "id")); + } + }); + } + }); + joinChains.add(new JoinChain("dim4chain", "cube-dim4", "state thru cube") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference(cubeName, "dim3")); + add(new TableReference("testdim4", "id")); + } + }); + } + }); + cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("zipcityname", "string", "zip city name"), "Zip city name", "cityFromZip", "name", null, null, null)); cubeMeasures.addAll(dummyMeasure); cubeDimensions.addAll(dummyDimAttributes); @@ -302,45 +361,118 @@ public class TestCubeMetastoreClient { } private static void defineUberDims() throws LensException { + Map<String, String> dimProps = new HashMap<>(); // Define zip dimension zipAttrs.add(new BaseDimAttribute(new FieldSchema("zipcode", "int", "code"))); zipAttrs.add(new BaseDimAttribute(new FieldSchema("f1", "string", "field1"))); zipAttrs.add(new BaseDimAttribute(new FieldSchema("f2", "string", "field1"))); - List<TableReference> stateRefs = Lists.newArrayList(new TableReference("statedim", "id"), - new TableReference("stateWeatherDim", "id")); - zipAttrs.add(new ReferencedDimAtrribute(new FieldSchema("stateid", "int", "state id"), "State refer", stateRefs)); - zipAttrs.add(new ReferencedDimAtrribute(new FieldSchema("cityid", "int", "city id"), "City refer", - new TableReference("citydim", "id"))); - zipAttrs.add(new ReferencedDimAtrribute(new FieldSchema("countryid", "int", "country id"), "Country refer", - new TableReference("countrydim", "id"))); - zipDim = new Dimension("zipdim", zipAttrs); + zipAttrs.add(new BaseDimAttribute(new FieldSchema("stateid", "int", "state id"), "State refer", null, null, null)); + zipAttrs.add(new BaseDimAttribute(new FieldSchema("cityid", "int", "city id"), "City refer", null, null, null)); + zipAttrs.add(new BaseDimAttribute(new FieldSchema("countryid", "int", "country id"), "Country refer", null, null, + null)); + zipAttrs.add(new ReferencedDimAttribute(new FieldSchema("statename", "name", "state name"), "State Name", + "zipstate", "name", null, null, null)); + + Set<JoinChain> joinChains = new HashSet<>(); + joinChains.add(new JoinChain("zipCity", "zip-city", "city thru zip") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference("zipdim", "cityid")); + add(new TableReference("citydim", "id")); + } + }); + } + }); + joinChains.add(new JoinChain("zipState", "zip-state", "state thru zip") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference("zipdim", "stateid")); + add(new TableReference("statedim", "id")); + } + }); + } + }); + joinChains.add(new JoinChain("zipCountry", "zip-country", "country thru zip") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference("zipdim", "countryid")); + add(new TableReference("countrydim", "id")); + } + }); + } + }); + zipDim = new Dimension("zipdim", zipAttrs, null, joinChains, dimProps, 0L); // Define city table + joinChains = new HashSet<>(); + dimProps = new HashMap<>(); cityAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); cityAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "city name"))); - cityAttrs.add(new ReferencedDimAtrribute(new FieldSchema("stateid", "int", "state id"), "State refer", - new TableReference("statedim", "id"))); + cityAttrs.add(new BaseDimAttribute(new FieldSchema("stateid", "int", "state id"), "State refer", null, null, null)); + cityAttrs.add(new ReferencedDimAttribute(new FieldSchema("statename", "name", "state name"), "State Name", + "citystate", "name", null, null, null)); dimExpressions.add(new ExprColumn(new FieldSchema("stateAndCountry", "String", "state and country together"), - "State and Country", new ExprSpec("concat(statedim.name, \":\", countrydim.name)", null, null), + "State and Country", new ExprSpec("concat(cityState.name, \":\", cityCountry.name)", null, null), new ExprSpec("state_and_country", null, null))); dimExpressions.add(new ExprColumn(new FieldSchema("CityAddress", "string", "city with state and city and zip"), - "City Address", "concat(citydim.name, \":\", statedim.name, \":\", countrydim.name, \":\", zipcode.code)")); - Map<String, String> dimProps = getHashMap(getDimTimedDimensionKey("citydim"), getDatePartitionKey()); - cityDim = new Dimension("citydim", cityAttrs, dimExpressions, dimProps, 0L); + "City Address", "concat(citydim.name, \":\", cityState.name, \":\", cityCountry.name, \":\", zipcode.code)")); + dimProps.put(MetastoreUtil.getDimTimedDimensionKey("citydim"), TestCubeMetastoreClient.getDatePartitionKey()); + + + joinChains.add(new JoinChain("cityState", "city-state", "state thru city") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference("citydim", "stateid")); + add(new TableReference("statedim", "id")); + } + }); + } + }); + joinChains.add(new JoinChain("cityCountry", "city-state", "country thru city") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference("citydim", "stateid")); + add(new TableReference("statedim", "id")); + add(new TableReference("statedim", "countryid")); + add(new TableReference("countrydim", "id")); + } + }); + } + }); + cityDim = new Dimension("citydim", cityAttrs, dimExpressions, joinChains, dimProps, 0L); // Define state table + joinChains = new HashSet<>(); + dimProps = new HashMap<>(); stateAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "state id"), "State ID", null, null, null)); stateAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "state name"))); stateAttrs.add(new BaseDimAttribute(new FieldSchema("capital", "string", "state capital"))); - stateAttrs.add(new ReferencedDimAtrribute(new FieldSchema("countryid", "int", "country id"), "Country refer", - new TableReference("countrydim", "id"))); + stateAttrs.add(new BaseDimAttribute(new FieldSchema("countryid", "int", "country id"), "Country refer", null, + null, null)); + stateAttrs.add(new ReferencedDimAttribute(new FieldSchema("countryname", "name", "country name"), "country Name", + "statecountry", "name", null, null, null)); + joinChains.add(new JoinChain("stateCountry", "state country", "country thru state") { + { + addPath(new ArrayList<TableReference>() { + { + add(new TableReference("statedim", "countryid")); + add(new TableReference("countrydim", "id")); + } + }); + } + }); stateDim = new Dimension("statedim", stateAttrs); countryAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "country id"))); countryAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "country name"))); countryAttrs.add(new BaseDimAttribute(new FieldSchema("capital", "string", "country capital"))); countryAttrs.add(new BaseDimAttribute(new FieldSchema("region", "string", "region name"))); - countryDim = new Dimension("countrydim", stateAttrs); + countryDim = new Dimension("countrydim", countryAttrs); } @@ -380,9 +512,9 @@ public class TestCubeMetastoreClient { assertTrue(client.tableExists(stateDim.getName())); assertTrue(client.tableExists(countryDim.getName())); - validateDim(zipDim, zipAttrs, "zipcode", "stateid"); - validateDim(cityDim, cityAttrs, "id", "stateid"); - validateDim(stateDim, stateAttrs, "id", "countryid"); + validateDim(zipDim, zipAttrs, "zipcode", "statename"); + validateDim(cityDim, cityAttrs, "id", "statename"); + validateDim(stateDim, stateAttrs, "id", "countryname"); validateDim(countryDim, countryAttrs, "id", null); // validate expression in citydim @@ -396,7 +528,7 @@ public class TestCubeMetastoreClient { ExprColumn stateCountryExpr = new ExprColumn(new FieldSchema("stateAndCountry", "String", "state and country together with hiphen as separator"), "State and Country", - "concat(statedim.name, \"-\", countrydim.name)"); + "concat(citystate.name, \"-\", citycountry.name)"); ExprSpec expr1 = new ExprSpec(); expr1.setExpr("concat(countrydim.name, \"-\", countrydim.name)"); stateCountryExpr.addExpression(expr1); @@ -420,13 +552,12 @@ public class TestCubeMetastoreClient { ExprColumn stateAndCountryActual = city.getExpressionByName("stateAndCountry"); assertNotNull(stateAndCountryActual.getExpressions()); assertEquals(2, stateAndCountryActual.getExpressions().size()); - assertTrue(stateAndCountryActual.getExpressions().contains("concat(statedim.name, \"-\", countrydim.name)")); - assertTrue(stateAndCountryActual.getExpressions() - .contains("concat(countrydim.name, \"-\", countrydim.name)")); + assertTrue(stateAndCountryActual.getExpressions().contains("concat(citystate.name, \"-\", citycountry.name)")); + assertTrue(stateAndCountryActual.getExpressions().contains("concat(countrydim.name, \"-\", countrydim.name)")); assertNotNull(city.getExpressionByName("stateAndCountry")); assertEquals(city.getExpressionByName("stateAndCountry").getExpr(), - "concat(statedim.name, \"-\", countrydim.name)"); + "concat(citystate.name, \"-\", citycountry.name)"); stateAndCountryActual.removeExpression("concat(countrydim.name, \"-\", countrydim.name)"); city.alterExpression(stateAndCountryActual); @@ -434,7 +565,6 @@ public class TestCubeMetastoreClient { Dimension cityAltered = client.getDimension(city.getName()); assertEquals(1, cityAltered.getExpressionByName("stateAndCountry").getExpressions().size()); - List<TableReference> chain = new ArrayList<>(); chain.add(new TableReference("zipdim", "cityid")); chain.add(new TableReference("citydim", "id")); @@ -449,12 +579,11 @@ public class TestCubeMetastoreClient { Dimension toAlter = new Dimension(tbl); toAlter.alterAttribute(new BaseDimAttribute(new FieldSchema("newZipDim", "int", "new dim added"), null, null, null, null, 1000L)); - toAlter.alterAttribute(new ReferencedDimAtrribute(new FieldSchema("newRefDim", "int", "new ref-dim added"), - "New city ref", new TableReference("citydim", "id"))); + toAlter.alterAttribute(new ReferencedDimAttribute(new FieldSchema("newRefDim", "int", "new ref-dim added"), + "New city ref", "cubecity", "name", null, null, null)); toAlter.alterAttribute(new BaseDimAttribute(new FieldSchema("f2", "varchar", "modified field"))); - List<TableReference> stateRefs = Lists.newArrayList(new TableReference("statedim", "id")); - toAlter.alterAttribute(new ReferencedDimAtrribute(new FieldSchema("stateid", "int", "state id"), "State refer", - stateRefs)); + toAlter.alterAttribute(new BaseDimAttribute(new FieldSchema("stateid", "int", "state id"), "State refer altered", + null, null, null)); toAlter.removeAttribute("f1"); toAlter.getProperties().put("alter.prop", "altered"); toAlter.alterExpression(new ExprColumn(new FieldSchema("formattedcode", "string", "formatted zipcode"), @@ -464,7 +593,6 @@ public class TestCubeMetastoreClient { client.alterDimension(zipDim.getName(), toAlter); Dimension altered = client.getDimension(zipDim.getName()); - assertEquals(toAlter, altered); assertNotNull(altered.getAttributeByName("newZipDim")); assertNotNull(altered.getAttributeByName("newRefDim")); @@ -481,26 +609,24 @@ public class TestCubeMetastoreClient { assertEquals((((BaseDimAttribute) newzipdim).getNumOfDistinctValues().get()), Long.valueOf(1000)); CubeDimAttribute newrefdim = altered.getAttributeByName("newRefDim"); - assertTrue(newrefdim instanceof ReferencedDimAtrribute); - assertEquals(((ReferencedDimAtrribute) newrefdim).getReferences().size(), 1); - assertEquals(((ReferencedDimAtrribute) newrefdim).getReferences().get(0).getDestTable(), cityDim.getName()); - assertEquals(((ReferencedDimAtrribute) newrefdim).getReferences().get(0).getDestColumn(), "id"); + assertTrue(newrefdim instanceof ReferencedDimAttribute); + assertEquals(((ReferencedDimAttribute) newrefdim).getChainRefColumns().size(), 1); + assertEquals(((ReferencedDimAttribute) newrefdim).getChainRefColumns().get(0).getChainName(), "cubecity"); + assertEquals(((ReferencedDimAttribute) newrefdim).getChainRefColumns().get(0).getRefColumn(), "name"); CubeDimAttribute f2 = altered.getAttributeByName("f2"); assertTrue(f2 instanceof BaseDimAttribute); assertEquals(((BaseDimAttribute) f2).getType(), "varchar"); CubeDimAttribute stateid = altered.getAttributeByName("stateid"); - assertTrue(stateid instanceof ReferencedDimAtrribute); - assertEquals(((ReferencedDimAtrribute) stateid).getReferences().size(), 1); - assertEquals(((ReferencedDimAtrribute) stateid).getReferences().get(0).getDestTable(), stateDim.getName()); - assertEquals(((ReferencedDimAtrribute) stateid).getReferences().get(0).getDestColumn(), "id"); + assertTrue(stateid instanceof BaseDimAttribute); + assertEquals(stateid.getDisplayString(), "State refer altered"); assertEquals(altered.getProperties().get("alter.prop"), "altered"); assertEquals(altered.getChainByName("stateFromZip"), zipState); - assertEquals(altered.getJoinChains().size(), 1); + assertEquals(altered.getJoinChains().size(), 4); JoinChain zipchain = altered.getChainByName("stateFromZip"); assertEquals(zipchain.getDisplayString(), "Zip State"); assertEquals(zipchain.getDescription(), "zip State desc"); @@ -518,14 +644,14 @@ public class TestCubeMetastoreClient { Table dimTbl = client.getHiveTable(udim.getName()); assertTrue(client.isDimension(dimTbl)); Dimension dim = new Dimension(dimTbl); - assertTrue(udim.equals(dim)); + assertTrue(udim.equals(dim), "Equals failed for " + dim.getName()); assertTrue(udim.equals(client.getDimension(udim.getName()))); assertEquals(dim.getAttributes().size(), attrs.size()); assertNotNull(dim.getAttributeByName(basedim)); assertTrue(dim.getAttributeByName(basedim) instanceof BaseDimAttribute); if (referdim != null) { assertNotNull(dim.getAttributeByName(referdim)); - assertTrue(dim.getAttributeByName(referdim) instanceof ReferencedDimAtrribute); + assertTrue(dim.getAttributeByName(referdim) instanceof ReferencedDimAttribute); } assertEquals(udim.getAttributeNames().size() + udim.getExpressionNames().size(), dim.getAllFieldNames().size()); } @@ -575,7 +701,7 @@ public class TestCubeMetastoreClient { assertTrue(cube2.getJoinChainNames().contains("cityfromzip")); assertTrue(cube2.getJoinChainNames().contains("city")); assertFalse(cube2.getJoinChains().isEmpty()); - assertEquals(cube2.getJoinChains().size(), 2); + assertEquals(cube2.getJoinChains().size(), 7); JoinChain zipchain = cube2.getChainByName("cityfromzip"); assertEquals(zipchain.getDisplayString(), "Zip City"); assertEquals(zipchain.getDescription(), "zip city desc"); @@ -598,7 +724,7 @@ public class TestCubeMetastoreClient { assertEquals(citychain.getPaths().get(0).getReferences().get(0).toString(), "testmetastorecube.cityid"); assertEquals(citychain.getPaths().get(0).getReferences().get(1).toString(), "citydim.id"); assertNotNull(cube2.getDimAttributeByName("zipcityname")); - ChainRefCol zipCityChain = ((ReferencedDimAtrribute) cube2.getDimAttributeByName("zipcityname")) + ChainRefCol zipCityChain = ((ReferencedDimAttribute) cube2.getDimAttributeByName("zipcityname")) .getChainRefColumns().get(0); assertEquals(zipCityChain.getChainName(), "cityfromzip"); assertEquals(zipCityChain.getRefColumn(), "name");