Initial chnages for Union
Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/b6f0cc3d Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/b6f0cc3d Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/b6f0cc3d Branch: refs/heads/lens-1381 Commit: b6f0cc3d4d55342e811c55c06ad2c10b62a0feb9 Parents: fe66131 Author: Puneet Gupta,Sushil Mohanty and Lavkesh Lahngir <[email protected]> Authored: Wed Dec 21 16:58:23 2016 +0530 Committer: Puneet <[email protected]> Committed: Wed Dec 21 17:05:41 2016 +0530 ---------------------------------------------------------------------- .../lens/cube/metadata/FactPartition.java | 2 + .../lens/cube/parse/AggregateResolver.java | 21 +- .../org/apache/lens/cube/parse/Candidate.java | 139 ++++ .../parse/CandidateCoveringSetsResolver.java | 259 ++++++ .../apache/lens/cube/parse/CandidateDim.java | 13 +- .../apache/lens/cube/parse/CandidateFact.java | 11 + .../apache/lens/cube/parse/CandidateTable.java | 7 +- .../cube/parse/CandidateTablePruneCause.java | 60 +- .../lens/cube/parse/CandidateTableResolver.java | 238 ++---- .../apache/lens/cube/parse/CandidateUtil.java | 208 +++++ .../lens/cube/parse/CubeQueryContext.java | 63 +- .../lens/cube/parse/CubeQueryRewriter.java | 8 + .../cube/parse/DenormalizationResolver.java | 27 +- .../lens/cube/parse/ExpressionResolver.java | 36 +- .../apache/lens/cube/parse/JoinCandidate.java | 119 +++ .../apache/lens/cube/parse/JoinResolver.java | 14 +- .../lens/cube/parse/LightestFactResolver.java | 28 +- .../cube/parse/MaxCoveringFactResolver.java | 4 + .../org/apache/lens/cube/parse/PruneCauses.java | 6 +- .../lens/cube/parse/QueriedPhraseContext.java | 58 +- .../org/apache/lens/cube/parse/QueryAST.java | 2 + .../lens/cube/parse/StorageCandidate.java | 560 +++++++++++++ .../lens/cube/parse/StorageTableResolver.java | 793 ++++--------------- .../org/apache/lens/cube/parse/StorageUtil.java | 128 ++- .../lens/cube/parse/TimeRangeChecker.java | 23 +- .../apache/lens/cube/parse/UnionCandidate.java | 247 ++++++ .../lens/cube/parse/UnionQueryWriter.java | 33 + .../lens/cube/parse/join/AutoJoinContext.java | 12 + .../apache/lens/driver/cube/RewriterPlan.java | 22 +- .../apache/lens/cube/parse/CubeTestSetup.java | 116 +++ .../cube/parse/TestUnionAndJoinCandidates.java | 65 ++ 31 files changed, 2385 insertions(+), 937 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/metadata/FactPartition.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/FactPartition.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/FactPartition.java index 1694b80..6a8e0c1 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/FactPartition.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/FactPartition.java @@ -39,6 +39,8 @@ public class FactPartition implements Comparable<FactPartition> { private final Set<String> storageTables = new LinkedHashSet<String>(); @Getter private final UpdatePeriod period; + + //TODO union : this is never set . Do we need this ?s @Getter @Setter private FactPartition containingPart; http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java index 9658100..79f38da 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java @@ -71,21 +71,24 @@ class AggregateResolver implements ContextRewriter { || hasMeasuresNotInDefaultAggregates(cubeql, cubeql.getHavingAST(), null, aggregateResolverDisabled) || hasMeasures(cubeql, cubeql.getWhereAST()) || hasMeasures(cubeql, cubeql.getGroupByAST()) || hasMeasures(cubeql, cubeql.getOrderByAST())) { - Iterator<CandidateFact> factItr = cubeql.getCandidateFacts().iterator(); - while (factItr.hasNext()) { - CandidateFact candidate = factItr.next(); - if (candidate.fact.isAggregated()) { - cubeql.addFactPruningMsgs(candidate.fact, - CandidateTablePruneCause.missingDefaultAggregate()); - factItr.remove(); + //TODO union : Note : Pending : cube segmentation design may change the above assumption and Set<Candidate> can contain and mix of StorageCandidate and UnionSegmentCandidate. This step can then ignore UnionSegmentCandidate + Iterator<Candidate> candItr = cubeql.getCandidates().iterator(); + while (candItr.hasNext()) { + Candidate candidate = candItr.next(); + if (candidate instanceof StorageCandidate) { + StorageCandidate sc = (StorageCandidate) candidate; + if (sc.getFact().isAggregated()) { + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.missingDefaultAggregate()); + candItr.remove(); + } + } else { + throw new LensException("Not a storage candidate!!"); } } nonDefaultAggregates = true; log.info("Query has non default aggregates, no aggregate resolution will be done"); } - cubeql.pruneCandidateFactSet(CandidateTablePruneCode.MISSING_DEFAULT_AGGREGATE); - if (nonDefaultAggregates || aggregateResolverDisabled) { return; } http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java new file mode 100644 index 0000000..0d0ddb7 --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java @@ -0,0 +1,139 @@ +package org.apache.lens.cube.parse; + +import java.util.Collection; +import java.util.Date; +import java.util.Map; +import java.util.Set; + +import org.apache.lens.cube.metadata.Dimension; +import org.apache.lens.cube.metadata.FactPartition; +import org.apache.lens.cube.metadata.TimeRange; +import org.apache.lens.server.api.error.LensException; + +import org.apache.hadoop.hive.ql.parse.ASTNode; + +/** + * This interface represents candidates that are involved in different phases of query rewriting. + * At the lowest level, Candidate is represented by a StorageCandidate that has a fact on a storage + * and other joined dimensions (if any) that are required to answer the query or part of the query. + * At a higher level Candidate can also be a Join or a Union Candidate representing join or union + * between other candidates + * + * Different Re-writers will work on applicable candidates to produce a final candidate which will be used + * for generating the re-written query. + */ +public interface Candidate { + + /** + * Returns String representation of this Candidate + * TODO decide if this method should be moved to QueryAST instead + * + * @return + */ + String toHQL(); + + /** + * Returns Query AST + * + * @return + */ + QueryAST getQueryAst(); + + /** + * Returns all the fact columns + * + * @return + */ + Collection<String> getColumns(); + + /** + * Start Time for this candidate (calculated based on schema) + * + * @return + */ + Date getStartTime(); + + /** + * End Time for this candidate (calculated based on schema) + * + * @return + */ + Date getEndTime(); + + /** + * Returns the cost of this candidate + * + * @return + */ + double getCost(); + + /** + * Alias used for this candidate. + * + * @return + */ + String getAlias(); + + /** + * Returns true if this candidate contains the given candidate + * + * @param candidate + * @return + */ + boolean contains(Candidate candidate); + + /** + * Returns child candidates of this candidate if any. + * Note: StorageCandidate will return null + * @return + */ + Collection<Candidate> getChildren(); + + + /** + * Calculates if this candidate can answer the query for given time range based on actual data registered with + * the underlying candidate storages. This method will also update any internal candidate data structures that are + * required for writing the re-written query and to answer {@link #getParticipatingPartitions()}. + * + * @param timeRange : TimeRange to check completeness for. TimeRange consists of start time, end time and the + * partition column + * @param failOnPartialData : fail fast if the candidate can answer the query only partially + * @return true if this Candidate can answer query for the given time range. + */ + boolean evaluateCompleteness(TimeRange timeRange, boolean failOnPartialData) + throws LensException; + + /** + * Returns the set of fact partitions that will participate in this candidate. + * Note: This method can be called only after call to + * {@link #evaluateCompleteness(TimeRange, boolean)} + * + * @return + */ + Set<FactPartition> getParticipatingPartitions(); + + /** + * TODO union: in case of join , one of the candidates should be able to answer the mesaure expression + * TODO union: In case of union, all the candidates should answer the expression + * TODO union : add isExpresionEvaluable() to Candidate + * + * @param expr + * @return + */ + boolean isExpressionEvaluable(ExpressionResolver.ExpressionContext expr); + + // Moved to CandidateUtil boolean isValidForTimeRange(TimeRange timeRange); + // Moved to CandidateUtil boolean isExpressionAnswerable(ASTNode node, CubeQueryContext context) throws LensException; + // NO caller Set<String> getTimePartCols(CubeQueryContext query) throws LensException; + + //TODO add methods to update AST in this candidate in this class of in CandidateUtil. + //void updateFromString(CubeQueryContext query) throws LensException; + + //void updateASTs(CubeQueryContext cubeql) throws LensException; + + //void addToHaving(ASTNode ast) throws LensException; + + //Used Having push down flow + //String addAndGetAliasFromSelect(ASTNode ast, AliasDecider aliasDecider); + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java new file mode 100644 index 0000000..e961427 --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java @@ -0,0 +1,259 @@ +package org.apache.lens.cube.parse; + +import com.google.common.collect.Lists; +import lombok.extern.slf4j.Slf4j; +import org.apache.hadoop.conf.Configuration; +import org.apache.lens.cube.error.LensCubeErrorCode; +import org.apache.lens.cube.metadata.TimeRange; + +import org.apache.lens.server.api.error.LensException; + +import java.util.*; + +@Slf4j +public class CandidateCoveringSetsResolver implements ContextRewriter { + + private List<Candidate> finalCandidates = new ArrayList<>(); + private int unionCandidatealiasCounter = 0; + private int joinCandidatealiasCounter = 0; + + public CandidateCoveringSetsResolver(Configuration conf) { + } + + @Override + public void rewriteContext(CubeQueryContext cubeql) throws LensException { + + Set<QueriedPhraseContext> queriedMsrs = new HashSet<>(); + for (QueriedPhraseContext qur : cubeql.getQueriedPhrases()) { + if (qur.hasMeasures(cubeql)) { + queriedMsrs.add(qur); + } + } + // if no measures are queried, add all StorageCandidates individually as single covering sets + if (queriedMsrs.isEmpty()) { + finalCandidates.addAll(cubeql.getCandidates()); + } + + List<Candidate> unionSet = resolveRangeCoveringFactSet(cubeql, cubeql.getTimeRanges(), queriedMsrs); + List<List<Candidate>> measureCoveringSets = resolveJoinCandidates(unionSet, queriedMsrs, cubeql); + updateFinalCandidates(measureCoveringSets); + log.info("Covering candidate sets :{}", finalCandidates); + + String msrString = CandidateUtil.getColumns(queriedMsrs).toString(); + if (finalCandidates.isEmpty()) { + throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), msrString); + } + // update final candidate sets + cubeql.getCandidates().clear(); + cubeql.getCandidates().addAll(finalCandidates); + // TODO : we might need to prune if we maintian two data structures in CubeQueryContext. + //cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.columnNotFound(getColumns(queriedMsrs))); + //if (cubeql.getCandidates().size() == 0) { + // throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), msrString); + // } + } + + private Candidate createJoinCandidateFromUnionCandidates(List<Candidate> ucs) { + Candidate cand; + if (ucs.size() >= 2) { + Candidate first = ucs.get(0); + Candidate second = ucs.get(1); + cand = new JoinCandidate(first, second, "jc" + joinCandidatealiasCounter++); + for (int i = 2; i < ucs.size(); i++) { + cand = new JoinCandidate(cand, ucs.get(i), "jc" + joinCandidatealiasCounter++); + } + } else { + cand = ucs.get(0); + } + return cand; + } + + private void updateFinalCandidates(List<List<Candidate>> jcs) { + int aliasCounter = 0; + for (Iterator<List<Candidate>> itr = jcs.iterator(); itr.hasNext(); ) { + List<Candidate> jc = itr.next(); + if (jc.size() == 1 && jc.iterator().next().getChildren().size() == 1) { + finalCandidates.add(jc.iterator().next().getChildren().iterator().next()); + } else { + finalCandidates.add(createJoinCandidateFromUnionCandidates(jc)); + } + } + } + + private boolean isCandidateCoveringTimeRanges(UnionCandidate uc, List<TimeRange> ranges) { + for (Iterator<TimeRange> itr = ranges.iterator(); itr.hasNext(); ) { + TimeRange range = itr.next(); + if (!CandidateUtil.isTimeRangeCovered(uc.getChildren(), range.getFromDate(), range.getToDate())) { + return false; + } + } + return true; + } + + private void pruneUnionCandidatesNotCoveringAllRanges(List<UnionCandidate> ucs, List<TimeRange> ranges) { + for (Iterator<UnionCandidate> itr = ucs.iterator(); itr.hasNext(); ) { + UnionCandidate uc = itr.next(); + if (!isCandidateCoveringTimeRanges(uc, ranges)) { + itr.remove(); + } + } + } + + private List<Candidate> resolveRangeCoveringFactSet(CubeQueryContext cubeql, List<TimeRange> ranges, + Set<QueriedPhraseContext> queriedMsrs) throws LensException { + // All Candidates + List<Candidate> allCandidates = new ArrayList<Candidate>(cubeql.getCandidates()); + // Partially valid candidates + List<Candidate> allCandidatesPartiallyValid = new ArrayList<>(); + List<Candidate> candidateSet = new ArrayList<>(); + for (Candidate cand : allCandidates) { + // Assuming initial list of candidates populated are StorageCandidate + if (cand instanceof StorageCandidate) { + StorageCandidate sc = (StorageCandidate) cand; + if (CandidateUtil.isValidForTimeRanges(sc, ranges)) { + candidateSet.add(sc); + continue; + } else if (CandidateUtil.isPartiallyValidForTimeRanges(sc, ranges)) { + allCandidatesPartiallyValid.add(CandidateUtil.cloneStorageCandidate(sc)); + } + } else { + throw new LensException("Not a StorageCandidate!!"); + } + } + // Get all covering fact sets + List<UnionCandidate> unionCoveringSet = + getCombinations(new ArrayList<Candidate>(allCandidatesPartiallyValid)); + // Sort the Collection based on no of elements + Collections.sort(unionCoveringSet, new CandidateUtil.UnionCandidateComparator<UnionCandidate>()); + // prune non covering sets + pruneUnionCandidatesNotCoveringAllRanges(unionCoveringSet, ranges); + // prune candidate set which doesn't contain any common measure i + pruneUnionCoveringSetWithoutAnyCommonMeasure(unionCoveringSet, queriedMsrs, cubeql); + // prune redundant covering sets + pruneRedundantUnionCoveringSets(unionCoveringSet); + // pruing done in the previous steps, now create union candidates + candidateSet.addAll(unionCoveringSet); + return candidateSet ; + + } + + private boolean isMeasureAnswerablebyUnionCandidate(QueriedPhraseContext msr, Candidate uc, + CubeQueryContext cubeql) throws LensException { + // Candidate is a single StorageCandidate + if (uc.getChildren() == null ) { + if (!msr.isEvaluable(cubeql, (StorageCandidate) uc)) { + return false; + } + } else { + for (Candidate cand : uc.getChildren()) { + if (!msr.isEvaluable(cubeql, (StorageCandidate) cand)) { + return false; + } + } + } + return true; + } + + private void pruneUnionCoveringSetWithoutAnyCommonMeasure(List<UnionCandidate> ucs, + Set<QueriedPhraseContext> queriedMsrs, + CubeQueryContext cubeql) throws LensException { + for (ListIterator<UnionCandidate> itr = ucs.listIterator(); itr.hasNext(); ) { + boolean toRemove = true; + UnionCandidate uc = itr.next(); + for (QueriedPhraseContext msr : queriedMsrs) { + if (isMeasureAnswerablebyUnionCandidate(msr, uc, cubeql)) { + toRemove = false; + break; + } + } + if (toRemove) { + itr.remove(); + } + } + } + + private void pruneRedundantUnionCoveringSets(List<UnionCandidate> candidates) { + for (int i = 0; i < candidates.size(); i++) { + UnionCandidate current = candidates.get(i); + int j = i + 1; + for (ListIterator<UnionCandidate> itr = candidates.listIterator(j); itr.hasNext(); ) { + UnionCandidate next = itr.next(); + if (next.getChildren().containsAll(current.getChildren())) { + itr.remove(); + } + } + } + } + + public List<UnionCandidate> getCombinations(final List<Candidate> candidates) { + int aliasCounter = 0; + List<UnionCandidate> combinations = new LinkedList<UnionCandidate>(); + int size = candidates.size(); + int threshold = Double.valueOf(Math.pow(2, size)).intValue() - 1; + + for (int i = 1; i <= threshold; ++i) { + LinkedList<Candidate> individualCombinationList = new LinkedList<Candidate>(); + int count = size - 1; + int clonedI = i; + while (count >= 0) { + if ((clonedI & 1) != 0) { + individualCombinationList.addFirst(candidates.get(count)); + } + clonedI = clonedI >>> 1; + --count; + } + combinations.add(new UnionCandidate(individualCombinationList, "uc" + unionCandidatealiasCounter++ )); + } + return combinations; + } + + private List<List<Candidate>> resolveJoinCandidates(List<Candidate> unionCandidates, + Set<QueriedPhraseContext> msrs, + CubeQueryContext cubeql) throws LensException { + List<List<Candidate>> msrCoveringSets = new ArrayList<>(); + List<Candidate> ucSet = new ArrayList<>(unionCandidates); + boolean evaluable = false; + // Check if a single set can answer all the measures and exprsWithMeasures + for (Iterator<Candidate> i = ucSet.iterator(); i.hasNext(); ) { + Candidate uc = i.next(); + for (QueriedPhraseContext msr : msrs) { + evaluable = isMeasureAnswerablebyUnionCandidate(msr, uc, cubeql) ? true : false; + if (!evaluable) { + break; + } + } + if (evaluable) { + // single set can answer all the measures as an UnionCandidate + List<Candidate> one = new ArrayList<>(); + one.add(uc); + msrCoveringSets.add(one); + i.remove(); + } + } + // Sets that contain all measures or no measures are removed from iteration. + // find other facts + for (Iterator<Candidate> i = ucSet.iterator(); i.hasNext(); ) { + Candidate uc = i.next(); + i.remove(); + // find the remaining measures in other facts + if (i.hasNext()) { + Set<QueriedPhraseContext> remainingMsrs = new HashSet<>(msrs); + Set<QueriedPhraseContext> coveredMsrs = CandidateUtil.coveredMeasures(uc, msrs, cubeql); + remainingMsrs.removeAll(coveredMsrs); + + List<List<Candidate>> coveringSets = resolveJoinCandidates(ucSet, remainingMsrs, cubeql); + if (!coveringSets.isEmpty()) { + for (List<Candidate> candSet : coveringSets) { + candSet.add(uc); + msrCoveringSets.add(candSet); + } + } else { + log.info("Couldnt find any set containing remaining measures:{} {} in {}", remainingMsrs, + ucSet); + } + } + } + log.info("Covering set {} for measures {} with factsPassed {}", msrCoveringSets, msrs, ucSet); + return msrCoveringSets; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java index 4dcdbcf..0dde72d 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java @@ -38,7 +38,7 @@ public class CandidateDim implements CandidateTable { final CubeDimensionTable dimtable; @Getter @Setter - private String storageTable; + private String storageName; @Getter @Setter private String whereClause; @@ -73,11 +73,11 @@ public class CandidateDim implements CandidateTable { String database = SessionState.get().getCurrentDatabase(); // Add database name prefix for non default database if (StringUtils.isNotBlank(database) && !"default".equalsIgnoreCase(database)) { - storageTable = database + "." + storageTable; + storageName = database + "." + storageName; } dbResolved = true; } - return storageTable + " " + alias; + return storageName + " " + alias; } @Override @@ -124,12 +124,7 @@ public class CandidateDim implements CandidateTable { } @Override - public Set<String> getStorageTables() { - return Collections.singleton(storageTable); - } - - @Override - public Set<String> getPartsQueried() { + public Set<String> getParticipatingPartitions() { if (StringUtils.isBlank(whereClause)) { return Collections.emptySet(); } http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java index b42262d..18478f8 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java @@ -39,6 +39,7 @@ import com.google.common.collect.Sets; import lombok.Getter; import lombok.Setter; +//TODO union : delete this class and use Candidate and StorageCandidtae /** * Holds context of a candidate fact table. */ @@ -110,6 +111,11 @@ public class CandidateFact implements CandidateTable, QueryAST { return columns; } + @Override + public Set<?> getParticipatingPartitions() { + return null; + } + public boolean isValidForTimeRange(TimeRange timeRange) { return (!timeRange.getFromDate().before(fact.getStartTime())) && (!timeRange.getToDate().after(fact.getEndTime())); } @@ -241,6 +247,11 @@ public class CandidateFact implements CandidateTable, QueryAST { return StringUtils.join(storageTables, ",") + " " + alias; } + @Override + public String getStorageName() { + return null; + } + public void setStorageTables(Set<String> storageTables) { String database = SessionState.get().getCurrentDatabase(); // Add database name prefix for non default database http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java index e001ca4..5863c1c 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTable.java @@ -37,10 +37,10 @@ public interface CandidateTable { String getStorageString(String alias); /** - * Get storage tables corresponding to this candidate + * Get storage table corresponding to this candidate * @return */ - Set<String> getStorageTables(); + String getStorageName(); /** * Get candidate table @@ -73,5 +73,6 @@ public interface CandidateTable { /** * Get partitions queried */ - Set<?> getPartsQueried(); + //TODO union: Name changed + Set<?> getParticipatingPartitions(); } http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java index 2ad6e20..41814f0 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java @@ -38,6 +38,7 @@ import lombok.NoArgsConstructor; @JsonWriteNullProperties(false) @Data @NoArgsConstructor +//TODO union: Since we are working on StoargeCandidates now, we might need some chnages here public class CandidateTablePruneCause { public enum CandidateTablePruneCode { @@ -158,8 +159,25 @@ public class CandidateTablePruneCause { } return new String[]{incompletePartitions.toString()}; } - }; + }, + // Moved from Stoarge causes + INVALID_STORAGE("Invalid Storage"), + // storage table does not exist + STOARGE_TABLE_DOES_NOT_EXIST("Storage table does not exist"), + // storage has no update periods queried + MISSING_UPDATE_PERIODS("Storage has no update periods"), + // no candidate update periods, update period cause will have why each + // update period is not a candidate + NO_CANDIDATE_UPDATE_PERIODS("Storage update periods are not candidate"), + // storage table has no partitions queried + NO_PARTITIONS("Storage table has no partitions"), + // partition column does not exist + PART_COL_DOES_NOT_EXIST("Partition column does not exist"), + // Range is not supported by this storage table + TIME_RANGE_NOT_ANSWERABLE("Range not answerable"), + // storage is not supported by execution engine + UNSUPPORTED_STORAGE("Unsupported Storage"); String errorFormat; @@ -180,6 +198,8 @@ public class CandidateTablePruneCause { } } + //TODO union : Remove this enum. All values moved to CandidateTablePruneCode + @Deprecated public enum SkipStorageCode { // invalid storage table INVALID, @@ -210,16 +230,21 @@ public class CandidateTablePruneCause { @JsonWriteNullProperties(false) @Data @NoArgsConstructor + //TODO union:deprecate this sub class + @Deprecated public static class SkipStorageCause { private SkipStorageCode cause; // update period to skip cause private Map<String, SkipUpdatePeriodCode> updatePeriodRejectionCause; + private List<String> nonExistantPartCols; + @Deprecated public SkipStorageCause(SkipStorageCode cause) { this.cause = cause; } + @Deprecated public static SkipStorageCause partColDoesNotExist(String... partCols) { SkipStorageCause ret = new SkipStorageCause(SkipStorageCode.PART_COL_DOES_NOT_EXIST); ret.nonExistantPartCols = new ArrayList<String>(); @@ -229,6 +254,7 @@ public class CandidateTablePruneCause { return ret; } + @Deprecated public static SkipStorageCause noCandidateUpdatePeriod(Map<String, SkipUpdatePeriodCode> causes) { SkipStorageCause ret = new SkipStorageCause(SkipStorageCode.NO_CANDIDATE_PERIODS); ret.updatePeriodRejectionCause = causes; @@ -263,6 +289,11 @@ public class CandidateTablePruneCause { // ranges in which fact is invalid private List<TimeRange> invalidRanges; + private List<String> nonExistantPartCols; + + private Map<String, SkipUpdatePeriodCode> updatePeriodRejectionCause; + + public CandidateTablePruneCause(CandidateTablePruneCode cause) { this.cause = cause; } @@ -338,7 +369,9 @@ public class CandidateTablePruneCause { return cause; } - public static CandidateTablePruneCause noCandidateStorages(Map<String, SkipStorageCause> storageCauses) { + //TDOO union : Remove this method + @Deprecated + public static CandidateTablePruneCause noCandidateStorages(Map<String, SkipStorageCause> storageCauses) { CandidateTablePruneCause cause = new CandidateTablePruneCause(NO_CANDIDATE_STORAGES); cause.setStorageCauses(new HashMap<String, SkipStorageCause>()); for (Map.Entry<String, SkipStorageCause> entry : storageCauses.entrySet()) { @@ -354,4 +387,27 @@ public class CandidateTablePruneCause { cause.setColumnsMissingDefaultAggregate(Lists.newArrayList(names)); return cause; } + + /** + * Queried partition columns are not present in this Storage Candidate + * @param missingPartitionColumns + * @return + */ + public static CandidateTablePruneCause partitionColumnsMissing(final List<String> missingPartitionColumns) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(PART_COL_DOES_NOT_EXIST); + cause.nonExistantPartCols = missingPartitionColumns; + return cause; + } + + /** + * All update periods of this Stoarge Candidate are rejected. + * @param updatePeriodRejectionCause + * @return + */ + public static CandidateTablePruneCause updatePeriodsRejected( + final Map<String, SkipUpdatePeriodCode> updatePeriodRejectionCause) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(NO_CANDIDATE_UPDATE_PERIODS); + cause.updatePeriodRejectionCause = updatePeriodRejectionCause; + return cause; + } } http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java index e7fc557..dd098b1 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java @@ -33,7 +33,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import com.google.common.collect.Sets; - import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -59,6 +58,7 @@ class CandidateTableResolver implements ContextRewriter { public void rewriteContext(CubeQueryContext cubeql) throws LensException { if (checkForQueriedColumns) { log.debug("Dump queried columns:{}", cubeql.getTblAliasToColumns()); + //TODO union : create StoargeCandidate s now in populateCandidateTables populateCandidateTables(cubeql); resolveCandidateFactTables(cubeql); resolveCandidateDimTables(cubeql); @@ -88,6 +88,7 @@ class CandidateTableResolver implements ContextRewriter { } private void populateCandidateTables(CubeQueryContext cubeql) throws LensException { + int aliasCounter = 0; if (cubeql.getCube() != null) { List<CubeFactTable> factTables = cubeql.getMetastoreClient().getAllFacts(cubeql.getCube()); if (factTables.isEmpty()) { @@ -95,10 +96,11 @@ class CandidateTableResolver implements ContextRewriter { cubeql.getCube().getName() + " does not have any facts"); } for (CubeFactTable fact : factTables) { - CandidateFact cfact = new CandidateFact(fact, cubeql.getCube()); - cubeql.getCandidateFacts().add(cfact); + StorageCandidate sc = new StorageCandidate(cubeql.getCube(), fact, + fact.getStorages().iterator().next(), "sc" + aliasCounter++, cubeql); + cubeql.getCandidates().add(sc); } - log.info("Populated candidate facts: {}", cubeql.getCandidateFacts()); + log.info("Populated storage candidates: {}", cubeql.getCandidates()); } if (cubeql.getDimensions().size() != 0) { @@ -158,10 +160,10 @@ class CandidateTableResolver implements ContextRewriter { OptionalDimCtx optdim = cubeql.getOptionalDimensionMap().remove(dim); // remove all the depending candidate table as well for (CandidateTable candidate : optdim.requiredForCandidates) { - if (candidate instanceof CandidateFact) { - log.info("Not considering fact:{} as refered table does not have any valid dimtables", candidate); + if (candidate instanceof StorageCandidate) { + log.info("Not considering storage candidate:{} as refered table does not have any valid dimtables", candidate); cubeql.getCandidateFacts().remove(candidate); - cubeql.addFactPruningMsgs(((CandidateFact) candidate).fact, new CandidateTablePruneCause( + cubeql.addStoragePruningMsg(((StorageCandidate) candidate), new CandidateTablePruneCause( CandidateTablePruneCode.INVALID_DENORM_TABLE)); } else { log.info("Not considering dimtable:{} as refered table does not have any valid dimtables", candidate); @@ -176,20 +178,20 @@ class CandidateTableResolver implements ContextRewriter { } } - public static boolean isColumnAvailableInRange(final TimeRange range, Date startTime, Date endTime) { + private static boolean isColumnAvailableInRange(final TimeRange range, Date startTime, Date endTime) { return (isColumnAvailableFrom(range.getFromDate(), startTime) && isColumnAvailableTill(range.getToDate(), endTime)); } - public static boolean isColumnAvailableFrom(@NonNull final Date date, Date startTime) { + private static boolean isColumnAvailableFrom(@NonNull final Date date, Date startTime) { return (startTime == null) ? true : date.equals(startTime) || date.after(startTime); } - public static boolean isColumnAvailableTill(@NonNull final Date date, Date endTime) { + private static boolean isColumnAvailableTill(@NonNull final Date date, Date endTime) { return (endTime == null) ? true : date.equals(endTime) || date.before(endTime); } - public static boolean isFactColumnValidForRange(CubeQueryContext cubeql, CandidateTable cfact, String col) { + private static boolean isFactColumnValidForRange(CubeQueryContext cubeql, CandidateTable cfact, String col) { for(TimeRange range : cubeql.getTimeRanges()) { if (!isColumnAvailableInRange(range, getFactColumnStartTime(cfact, col), getFactColumnEndTime(cfact, col))) { return false; @@ -198,7 +200,7 @@ class CandidateTableResolver implements ContextRewriter { return true; } - public static Date getFactColumnStartTime(CandidateTable table, String factCol) { + private static Date getFactColumnStartTime(CandidateTable table, String factCol) { Date startTime = null; if (table instanceof CandidateFact) { for (String key : ((CandidateFact) table).fact.getProperties().keySet()) { @@ -213,7 +215,7 @@ class CandidateTableResolver implements ContextRewriter { return startTime; } - public static Date getFactColumnEndTime(CandidateTable table, String factCol) { + private static Date getFactColumnEndTime(CandidateTable table, String factCol) { Date endTime = null; if (table instanceof CandidateFact) { for (String key : ((CandidateFact) table).fact.getProperties().keySet()) { @@ -232,7 +234,7 @@ class CandidateTableResolver implements ContextRewriter { if (cubeql.getCube() != null) { String str = cubeql.getConf().get(CubeQueryConfUtil.getValidFactTablesKey(cubeql.getCube().getName())); List<String> validFactTables = - StringUtils.isBlank(str) ? null : Arrays.asList(StringUtils.split(str.toLowerCase(), ",")); + StringUtils.isBlank(str) ? null : Arrays.asList(StringUtils.split(str.toLowerCase(), ",")); Set<QueriedPhraseContext> queriedMsrs = new HashSet<>(); Set<QueriedPhraseContext> dimExprs = new HashSet<>(); @@ -243,100 +245,75 @@ class CandidateTableResolver implements ContextRewriter { dimExprs.add(qur); } } - // Remove fact tables based on whether they are valid or not. - for (Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); + // Remove storage candidates based on whether they are valid or not. + for (Iterator<Candidate> i = cubeql.getCandidates().iterator(); i.hasNext(); ) { + Candidate cand = i.next(); + if (cand instanceof StorageCandidate) { + StorageCandidate sc = (StorageCandidate) cand; + if (validFactTables != null) { + if (!validFactTables.contains(sc.getName().toLowerCase())) { + log.info("Not considering storage candidate:{} as it is not a valid candidate", sc); + cubeql.addStoragePruningMsg(sc, new CandidateTablePruneCause(CandidateTablePruneCode.INVALID)); + i.remove(); + continue; + } + } - if (validFactTables != null) { - if (!validFactTables.contains(cfact.getName().toLowerCase())) { - log.info("Not considering fact table:{} as it is not a valid fact", cfact); - cubeql - .addFactPruningMsgs(cfact.fact, new CandidateTablePruneCause(CandidateTablePruneCode.INVALID)); - i.remove(); - continue; + // update expression evaluability for this fact + for (String expr : cubeql.getQueriedExprs()) { + cubeql.getExprCtx().updateEvaluables(expr, sc); } - } - // update expression evaluability for this fact - for (String expr : cubeql.getQueriedExprs()) { - cubeql.getExprCtx().updateEvaluables(expr, cfact); - } + // go over the columns accessed in the query and find out which tables + // can answer the query + // the candidate facts should have all the dimensions queried and + // atleast + // one measure + boolean toRemove = false; + for (QueriedPhraseContext qur : dimExprs) { + if (!qur.isEvaluable(cubeql, sc)) { + log.info("Not considering storage candidate:{} as columns {} are not available", sc, qur.getColumns()); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.columnNotFound(qur.getColumns())); + toRemove = true; + break; + } + } - // go over the columns accessed in the query and find out which tables - // can answer the query - // the candidate facts should have all the dimensions queried and - // atleast - // one measure - boolean toRemove = false; - for (QueriedPhraseContext qur : dimExprs) { - if (!qur.isEvaluable(cubeql, cfact)) { - log.info("Not considering fact table:{} as columns {} are not available", cfact, qur.getColumns()); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(qur.getColumns())); + // check if the candidate fact has atleast one measure queried + // if expression has measures, they should be considered along with other measures and see if the fact can be + // part of measure covering set + if (!checkForFactColumnExistsAndValidForRange(sc, queriedMsrs, cubeql)) { + Set<String> columns = getColumns(queriedMsrs); + log.info("Not considering storage candidate:{} as columns {} is not available", sc, columns); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.columnNotFound(columns)); toRemove = true; - break; } - } - - // check if the candidate fact has atleast one measure queried - // if expression has measures, they should be considered along with other measures and see if the fact can be - // part of measure covering set - if (!checkForFactColumnExistsAndValidForRange(cfact, queriedMsrs, cubeql)) { - Set<String> columns = getColumns(queriedMsrs); - log.info("Not considering fact table:{} as columns {} is not available", cfact, columns); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(columns)); - toRemove = true; - } - // go over join chains and prune facts that dont have any of the columns in each chain - for (JoinChain chain : cubeql.getJoinchains().values()) { - OptionalDimCtx optdim = cubeql.getOptionalDimensionMap().get(Aliased.create((Dimension)cubeql.getCubeTbls() - .get(chain.getName()), chain.getName())); - if (!checkForFactColumnExistsAndValidForRange(cfact, chain.getSourceColumns(), cubeql)) { - // check if chain is optional or not - if (optdim == null) { - log.info("Not considering fact table:{} as columns {} are not available", cfact, - chain.getSourceColumns()); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(chain.getSourceColumns())); - toRemove = true; - break; + // go over join chains and prune facts that dont have any of the columns in each chain + for (JoinChain chain : cubeql.getJoinchains().values()) { + OptionalDimCtx optdim = cubeql.getOptionalDimensionMap().get(Aliased.create((Dimension) cubeql.getCubeTbls() + .get(chain.getName()), chain.getName())); + if (!checkForFactColumnExistsAndValidForRange(sc, chain.getSourceColumns(), cubeql)) { + // check if chain is optional or not + if (optdim == null) { + log.info("Not considering storage candidate:{} as columns {} are not available", sc, + chain.getSourceColumns()); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.columnNotFound(chain.getSourceColumns())); + toRemove = true; + break; + } } } - } - if (toRemove) { - i.remove(); - } - } - if (cubeql.getCandidateFacts().size() == 0) { - throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), - getColumns(cubeql.getQueriedPhrases()).toString()); - } - Set<Set<CandidateFact>> cfactset; - if (queriedMsrs.isEmpty()) { - // if no measures are queried, add all facts individually as single covering sets - cfactset = new HashSet<>(); - for (CandidateFact cfact : cubeql.getCandidateFacts()) { - Set<CandidateFact> one = new LinkedHashSet<>(); - one.add(cfact); - cfactset.add(one); - } - cubeql.getCandidateFactSets().addAll(cfactset); - } else { - // Find out candidate fact table sets which contain all the measures - // queried - - List<CandidateFact> cfacts = new ArrayList<>(cubeql.getCandidateFacts()); - cfactset = findCoveringSets(cubeql, cfacts, queriedMsrs); - log.info("Measure covering fact sets :{}", cfactset); - String msrString = getColumns(queriedMsrs).toString(); - if (cfactset.isEmpty()) { - throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), msrString); + if (toRemove) { + i.remove(); + } + } else { + throw new LensException("Not a storage candidate!!"); } - cubeql.getCandidateFactSets().addAll(cfactset); - cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.columnNotFound(getColumns(queriedMsrs))); - - if (cubeql.getCandidateFacts().size() == 0) { - throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), msrString); + if (cubeql.getCandidates().size() == 0) { + throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), + getColumns(cubeql.getQueriedPhrases()).toString()); } } } @@ -349,51 +326,6 @@ class CandidateTableResolver implements ContextRewriter { } return cols; } - static Set<Set<CandidateFact>> findCoveringSets(CubeQueryContext cubeql, List<CandidateFact> cfactsPassed, - Set<QueriedPhraseContext> msrs) throws LensException { - Set<Set<CandidateFact>> cfactset = new HashSet<>(); - List<CandidateFact> cfacts = new ArrayList<>(cfactsPassed); - for (Iterator<CandidateFact> i = cfacts.iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - if (!checkForFactColumnExistsAndValidForRange(cfact, msrs, cubeql)) { - // cfact does not contain any of msrs and none of exprsWithMeasures are evaluable. - // ignore the fact - i.remove(); - continue; - } else if (allEvaluable(cfact, msrs, cubeql)) { - // return single set - Set<CandidateFact> one = new LinkedHashSet<>(); - one.add(cfact); - cfactset.add(one); - i.remove(); - } - } - // facts that contain all measures or no measures are removed from iteration. - // find other facts - for (Iterator<CandidateFact> i = cfacts.iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - i.remove(); - // find the remaining measures in other facts - if (i.hasNext()) { - Set<QueriedPhraseContext> remainingMsrs = new HashSet<>(msrs); - Set<QueriedPhraseContext> coveredMsrs = coveredMeasures(cfact, msrs, cubeql); - remainingMsrs.removeAll(coveredMsrs); - - Set<Set<CandidateFact>> coveringSets = findCoveringSets(cubeql, cfacts, remainingMsrs); - if (!coveringSets.isEmpty()) { - for (Set<CandidateFact> set : coveringSets) { - set.add(cfact); - cfactset.add(set); - } - } else { - log.info("Couldnt find any set containing remaining measures:{} {} in {}", remainingMsrs, - cfactsPassed); - } - } - } - log.info("Covering set {} for measures {} with factsPassed {}", cfactset, msrs, cfactsPassed); - return cfactset; - } private void resolveCandidateDimTablesForJoinsAndDenorms(CubeQueryContext cubeql) throws LensException { if (cubeql.getAutoJoinCtx() == null) { @@ -720,7 +652,7 @@ class CandidateTableResolver implements ContextRewriter { // The candidate table contains atleast one column in the colSet and // column can the queried in the range specified - static boolean checkForFactColumnExistsAndValidForRange(CandidateTable table, Collection<String> colSet, + private static boolean checkForFactColumnExistsAndValidForRange(CandidateTable table, Collection<String> colSet, CubeQueryContext cubeql) { if (colSet == null || colSet.isEmpty()) { return true; @@ -733,37 +665,39 @@ class CandidateTableResolver implements ContextRewriter { return false; } - static boolean checkForFactColumnExistsAndValidForRange(CandidateFact table, Collection<QueriedPhraseContext> colSet, - CubeQueryContext cubeql) throws LensException { + + private static boolean checkForFactColumnExistsAndValidForRange(StorageCandidate sc, + Collection<QueriedPhraseContext> colSet, + CubeQueryContext cubeql) throws LensException { if (colSet == null || colSet.isEmpty()) { return true; } for (QueriedPhraseContext qur : colSet) { - if (qur.isEvaluable(cubeql, table)) { + if (qur.isEvaluable(cubeql, sc)) { return true; } } return false; } - static boolean allEvaluable(CandidateFact table, Collection<QueriedPhraseContext> colSet, - CubeQueryContext cubeql) throws LensException { + static boolean allEvaluable(StorageCandidate sc, Collection<QueriedPhraseContext> colSet, + CubeQueryContext cubeql) throws LensException { if (colSet == null || colSet.isEmpty()) { return true; } for (QueriedPhraseContext qur : colSet) { - if (!qur.isEvaluable(cubeql, table)) { + if (!qur.isEvaluable(cubeql, sc)) { return false; } } return true; } - static Set<QueriedPhraseContext> coveredMeasures(CandidateFact table, Collection<QueriedPhraseContext> msrs, - CubeQueryContext cubeql) throws LensException { + static Set<QueriedPhraseContext> coveredMeasures(StorageCandidate sc, Collection<QueriedPhraseContext> msrs, + CubeQueryContext cubeql) throws LensException { Set<QueriedPhraseContext> coveringSet = new HashSet<>(); for (QueriedPhraseContext msr : msrs) { - if (msr.isEvaluable(cubeql, table)) { + if (msr.isEvaluable(cubeql, sc)) { coveringSet.add(msr); } } http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java new file mode 100644 index 0000000..dd3b1dd --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateUtil.java @@ -0,0 +1,208 @@ +package org.apache.lens.cube.parse; + +import java.util.*; + +import org.apache.lens.cube.metadata.CubeMetastoreClient; +import org.apache.lens.cube.metadata.MetastoreUtil; +import org.apache.lens.cube.metadata.TimeRange; +import org.apache.lens.server.api.error.LensException; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.parse.ASTNode; + +import com.google.common.collect.BoundType; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; + +/** + * Placeholder for Util methods that will be required for {@link Candidate} + */ +public class CandidateUtil { + + /** + * Is calculated measure expression answerable by the Candidate + * @param exprNode + * @param candidate + * @param context + * @return + * @throws LensException + */ + public static boolean isMeasureExpressionAnswerable(ASTNode exprNode, Candidate candidate, CubeQueryContext context) + throws LensException { + return candidate.getColumns().containsAll(HQLParser.getColsInExpr( + context.getAliasForTableName(context.getCube()), exprNode)); + } + + /** + * Returns true if the Candidate is valid for all the timeranges based on its start and end times. + * @param candidate + * @param timeRanges + * @return + */ + public static boolean isValidForTimeRanges(Candidate candidate, List<TimeRange> timeRanges) { + for (TimeRange timeRange : timeRanges) { + if (!(timeRange.getFromDate().after(candidate.getStartTime()) + && timeRange.getToDate().before(candidate.getEndTime()))) { + return false; + } + } + return true; + } + + public static boolean isPartiallyValidForTimeRanges(Candidate cand, List<TimeRange> timeRanges) { + for (TimeRange timeRange : timeRanges) { + if ((cand.getStartTime().before(timeRange.getFromDate()) && cand.getEndTime().after(timeRange.getFromDate())) + || (cand.getStartTime().before(timeRange.getToDate()) && cand.getEndTime().after(timeRange.getToDate()))) { + return true; + } + } + return false; + } + + /** + * Gets the time partition columns for a storage candidate + * TODO decide is this needs to be supported for all Candidate types. + * + * @param candidate : Stoarge Candidate + * @param metastoreClient : Cube metastore client + * @return + * @throws LensException + */ + public Set<String> getTimePartitionCols(StorageCandidate candidate, CubeMetastoreClient metastoreClient) + throws LensException { + Set<String> cubeTimeDimensions = candidate.getCube().getTimedDimensions(); + Set<String> timePartDimensions = new HashSet<String>(); + String singleStorageTable = candidate.getStorageName(); + List<FieldSchema> partitionKeys = null; + partitionKeys = metastoreClient.getTable(singleStorageTable).getPartitionKeys(); + for (FieldSchema fs : partitionKeys) { + if (cubeTimeDimensions.contains(CubeQueryContext.getTimeDimOfPartitionColumn(candidate.getCube(), + fs.getName()))) { + timePartDimensions.add(fs.getName()); + } + } + return timePartDimensions; + } + + /** + * Copy Query AST from sourceAst to targetAst + * + * @param sourceAst + * @param targetAst + * @throws LensException + */ + public void copyASTs(QueryAST sourceAst, QueryAST targetAst) throws LensException { + targetAst.setSelectAST(MetastoreUtil.copyAST(sourceAst.getSelectAST())); + targetAst.setWhereAST(MetastoreUtil.copyAST(sourceAst.getWhereAST())); + if (sourceAst.getJoinAST() != null) { + targetAst.setJoinAST(MetastoreUtil.copyAST(sourceAst.getJoinAST())); + } + if (sourceAst.getGroupByAST() != null) { + targetAst.setGroupByAST(MetastoreUtil.copyAST(sourceAst.getGroupByAST())); + } + } + + public static Set<StorageCandidate> getStorageCandidates(final Candidate candidate) { + return getStorageCandidates(new HashSet<Candidate>(1) {{ + add(candidate); + }}); + } + + + public static Set<QueriedPhraseContext> coveredMeasures(Candidate candSet, Collection<QueriedPhraseContext> msrs, + CubeQueryContext cubeql) throws LensException { + Set<QueriedPhraseContext> coveringSet = new HashSet<>(); + for (QueriedPhraseContext msr : msrs) { + if (candSet.getChildren() == null) { + if (msr.isEvaluable(cubeql, (StorageCandidate) candSet)) { + coveringSet.add(msr); + } + } else { + for (Candidate cand : candSet.getChildren()) { + if (msr.isEvaluable(cubeql, (StorageCandidate) cand)) { + coveringSet.add(msr); + } + } + } + } + return coveringSet; + } + + /** + * Returns true is the Candidates cover the entire time range. + * @param candidates + * @param startTime + * @param endTime + * @return + */ + public static boolean isTimeRangeCovered(Collection<Candidate> candidates, Date startTime, Date endTime) { + RangeSet<Date> set = TreeRangeSet.create(); + for (Candidate candidate : candidates) { + set.add(Range.range(candidate.getStartTime(), BoundType.CLOSED, candidate.getEndTime(), BoundType.OPEN)); + } + return set.encloses(Range.range(startTime, BoundType.CLOSED, endTime, BoundType.OPEN)); + } + + public static Set<String> getColumns(Collection<QueriedPhraseContext> queriedPhraseContexts) { + Set<String> cols = new HashSet<>(); + for (QueriedPhraseContext qur : queriedPhraseContexts) { + cols.addAll(qur.getColumns()); + } + return cols; + } + + /** + * Filters Candidates that contain the filterCandidate + * + * @param candidates + * @param filterCandidate + * @return pruned Candidates + */ + public static Collection<Candidate> filterCandidates(Collection<Candidate> candidates, Candidate filterCandidate) { + List<Candidate> prunedCandidates = new ArrayList<>(); + Iterator<Candidate> itr = candidates.iterator(); + while (itr.hasNext()) { + if (itr.next().contains(filterCandidate)) { + prunedCandidates.add(itr.next()); + itr.remove(); + } + } + return prunedCandidates; + } + + /** + * Gets all the Storage Candidates that participate in the collection of passed candidates + * @param candidates + * @return + */ + public static Set<StorageCandidate> getStorageCandidates(Collection<Candidate> candidates) { + Set<StorageCandidate> storageCandidateSet = new HashSet<>(); + getStorageCandidates(candidates, storageCandidateSet); + return storageCandidateSet; + } + + private static void getStorageCandidates(Collection<Candidate> candidates, + Set<StorageCandidate> storageCandidateSet) { + for (Candidate candidate : candidates) { + if (candidate.getChildren() == null) { + //Expecting this to be a StorageCandidate as it has no children. + storageCandidateSet.add((StorageCandidate)candidate); + } else { + getStorageCandidates(candidate.getChildren(), storageCandidateSet); + } + } + } + + public static StorageCandidate cloneStorageCandidate(StorageCandidate sc) { + return new StorageCandidate(sc.getCube(), sc.getFact(), sc.getStorageName(), sc.getAlias(), sc.getCubeql()); + } + + public static class UnionCandidateComparator<T> implements Comparator<UnionCandidate> { + + @Override + public int compare(UnionCandidate o1, UnionCandidate o2) { + return Integer.valueOf(o1.getChildren().size() - o2.getChildren().size()); + } + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java index e83ae76..58fc5b1 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java @@ -102,9 +102,19 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { // Mapping of a qualified column name to its table alias private final Map<String, String> colToTableAlias = new HashMap<>(); + //TODO union: remove candidateFactSets and use @Getter private final Set<Set<CandidateFact>> candidateFactSets = new HashSet<>(); + /** + * This is the set of working Candidates that gets updated during different phases of + * query resolution. Each {@link ContextRewriter} may add/remove/update Candiadtes in + * this working set and from the final set of Candidates single {@link #pickedCandidate} + * is chosen. + */ + @Getter + private final Set<Candidate> candidates = new HashSet<>(); + @Getter // would be added through join chains and de-normalized resolver protected Map<Aliased<Dimension>, OptionalDimCtx> optionalDimensionMap = new HashMap<>(); @@ -177,9 +187,12 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { @Getter @Setter private DenormalizationResolver.DenormalizationContext deNormCtx; + //TODO union : deprecate factPruningMsgs + @Getter + @Deprecated + private PruneCauses<CubeFactTable> factPruningMsgs = new PruneCauses<>(); @Getter - private PruneCauses<CubeFactTable> factPruningMsgs = - new PruneCauses<CubeFactTable>(); + private PruneCauses<StorageCandidate> storagePruningMsgs = new PruneCauses<>(); @Getter private Map<Dimension, PruneCauses<CubeDimensionTable>> dimPruningMsgs = new HashMap<Dimension, PruneCauses<CubeDimensionTable>>(); @@ -480,9 +493,36 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { return candidateDims; } + /** + * TODO union : deprecate this method and use + * {@link #addFactPruningMsg(CubeInterface, CubeFactTable, CandidateTablePruneCause)} + * or + * {@link #addStoragePruningMsg(StorageCandidate, CandidateTablePruneCause)} + * */ + @Deprecated public void addFactPruningMsgs(CubeFactTable fact, CandidateTablePruneCause factPruningMsg) { + throw new IllegalStateException("This method is deprecate"); + } + + //TODO union : not required as all the pruning happening at StorageCandidate + /* + public void addFactPruningMsg(CubeInterface cube, CubeFactTable fact, CandidateTablePruneCause factPruningMsg) { log.info("Pruning fact {} with cause: {}", fact, factPruningMsg); - factPruningMsgs.addPruningMsg(fact, factPruningMsg); + for (String storageName : fact.getStorages()) { + addStoragePruningMsg(new StorageCandidate(cube, fact, storageName), factPruningMsg); + } + } +*/ + public void addCandidatePruningMsg(Candidate cand, CandidateTablePruneCause factPruningMsg) { + Set<StorageCandidate> scs = CandidateUtil.getStorageCandidates(cand); + for (StorageCandidate sc : scs) { + addStoragePruningMsg(sc, factPruningMsg); + } + } + + public void addStoragePruningMsg(StorageCandidate sc, CandidateTablePruneCause factPruningMsg) { + log.info("Pruning Storage {} with cause: {}", sc, factPruningMsg); + storagePruningMsgs.addPruningMsg(sc, factPruningMsg); } public void addDimPruningMsgs(Dimension dim, CubeDimensionTable dimtable, CandidateTablePruneCause msg) { @@ -675,6 +715,11 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { return qb.getParseInfo().getJoinExpr(); } + @Override + public void setJoinAST(ASTNode node) { + //NO-OP + } + public String getOrderByString() { if (orderByAST != null) { return HQLParser.getString(orderByAST); @@ -769,6 +814,7 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { } } + // TODO union : Reevaluate this method. void setNonexistingParts(Map<String, Set<String>> nonExistingParts) throws LensException { if (!nonExistingParts.isEmpty()) { ByteArrayOutputStream out = null; @@ -873,8 +919,14 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { } private HQLContextInterface hqlContext; + + //TODO union : Delete this and use pickedCandidate @Getter private Collection<CandidateFact> pickedFacts; + + @Getter + //TODO union : This will be the final Candidate . private Candidate pickedCandidate + private Candidate pickedCandidate; @Getter private Collection<CandidateDim> pickedDimTables; @@ -1211,6 +1263,8 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { * * @param pruneCause */ + //TODO union : deprecated + @Deprecated public void pruneCandidateFactSet(CandidateTablePruneCode pruneCause) { // remove candidate fact sets that have missing facts for (Iterator<Set<CandidateFact>> i = candidateFactSets.iterator(); i.hasNext();) { @@ -1237,6 +1291,8 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { pruneCandidateFactWithCandidateSet(new CandidateTablePruneCause(pruneCause)); } + //TODO union : deprecated + @Deprecated public void pruneCandidateFactWithCandidateSet(CandidateTablePruneCause pruneCause) { // remove candidate facts that are not part of any covering set Set<CandidateFact> allCoveringFacts = new HashSet<CandidateFact>(); @@ -1253,6 +1309,7 @@ public class CubeQueryContext extends TracksQueriedColumns implements QueryAST { } } + public void addQueriedTimeDimensionCols(final String timeDimColName) { checkArgument(StringUtils.isNotBlank(timeDimColName)); http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java index b612173..3ff6070 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java @@ -150,6 +150,10 @@ public class CubeQueryRewriter { // Resolve candidate fact tables and dimension tables for columns queried rewriters.add(candidateTblResolver); // Resolve aggregations and generate base select tree + rewriters.add(new CandidateCoveringSetsResolver(conf)); + + //TODO union: Add CoveringSetResolver which creates UnionCandidates and JoinCandidates. Some code form candidateTblResolver(phase 2) to be moved to CoveringSetResolver + //TODO union: AggregateResolver,GroupbyResolver,FieldValidator before CoveringSetResolver rewriters.add(new AggregateResolver()); rewriters.add(new GroupbyResolver(conf)); rewriters.add(new FieldValidator()); @@ -159,12 +163,15 @@ public class CubeQueryRewriter { rewriters.add(new TimeRangeChecker(conf)); // Resolve candidate fact tables and dimension tables for columns included // in join and denorm resolvers + //TODO union : this should be CoveringSetResolver now rewriters.add(candidateTblResolver); // Phase 1: resolve fact tables. + //TODO union: This phase 1 of storageTableResolver should happen before CoveringSetResolver rewriters.add(storageTableResolver); if (lightFactFirst) { // Prune candidate tables for which denorm column references do not exist + //TODO union: phase 2 of denormResolver needs to be moved before CoveringSetResolver rewriters.add(denormResolver); // Prune candidate facts without any valid expressions rewriters.add(exprResolver); @@ -176,6 +183,7 @@ public class CubeQueryRewriter { // Phase 3: resolve dimension tables and partitions. rewriters.add(storageTableResolver); // Prune candidate tables for which denorm column references do not exist + //TODO union: phase 2 of denormResolver needs to be moved before CoveringSetResolver.. check if this makes sense rewriters.add(denormResolver); // Prune candidate facts without any valid expressions rewriters.add(exprResolver); http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java index 40ed387..d8f1ab4 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java @@ -345,20 +345,27 @@ public class DenormalizationResolver implements ContextRewriter { // In the second iteration of denorm resolver // candidate tables which require denorm fields and the refernces are no // more valid will be pruned - if (cubeql.getCube() != null && !cubeql.getCandidateFacts().isEmpty()) { - for (Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - if (denormCtx.tableToRefCols.containsKey(cfact.getName())) { - for (ReferencedQueriedColumn refcol : denormCtx.tableToRefCols.get(cfact.getName())) { - if (denormCtx.getReferencedCols().get(refcol.col.getName()).isEmpty()) { - log.info("Not considering fact table:{} as column {} is not available", cfact, refcol.col); - cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.columnNotFound(refcol.col.getName())); - i.remove(); + if (cubeql.getCube() != null && !cubeql.getCandidates().isEmpty()) { + for (Iterator<Candidate> i = cubeql.getCandidates().iterator(); i.hasNext();) { + Candidate cand = i.next(); + //TODO union : is this happening in pahse 1 or 2 ? + //TODO Union : If phase 2, the below code will not work. Move to phase1 in that case + if (cand instanceof StorageCandidate) { + StorageCandidate sc = (StorageCandidate) cand; + if (denormCtx.tableToRefCols.containsKey(sc.getFact().getName())) { + for (ReferencedQueriedColumn refcol : denormCtx.tableToRefCols.get(sc.getFact().getName())) { + if (denormCtx.getReferencedCols().get(refcol.col.getName()).isEmpty()) { + log.info("Not considering storage candidate :{} as column {} is not available", sc, refcol.col); + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.columnNotFound(refcol.col.getName())); + i.remove(); + } } } + } else { + throw new LensException("Not a storage candidate!!"); } } - if (cubeql.getCandidateFacts().size() == 0) { + if (cubeql.getCandidates().size() == 0) { throw new LensException(LensCubeErrorCode.NO_FACT_HAS_COLUMN.getLensErrorInfo(), cubeql.getColumnsQueriedForTable(cubeql.getCube().getName()).toString()); } http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java index 60dacdb..1b8c560 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java @@ -647,42 +647,38 @@ class ExpressionResolver implements ContextRewriter { // prune invalid expressions cubeql.getExprCtx().pruneExpressions(); // prune candidate facts without any valid expressions - if (cubeql.getCube() != null && !cubeql.getCandidateFacts().isEmpty()) { + if (cubeql.getCube() != null && !cubeql.getCandidates().isEmpty()) { for (Map.Entry<String, Set<ExpressionContext>> ecEntry : exprCtx.allExprsQueried.entrySet()) { String expr = ecEntry.getKey(); Set<ExpressionContext> ecSet = ecEntry.getValue(); for (ExpressionContext ec : ecSet) { if (ec.getSrcTable().getName().equals(cubeql.getCube().getName())) { if (cubeql.getQueriedExprsWithMeasures().contains(expr)) { - for (Iterator<Set<CandidateFact>> sItr = cubeql.getCandidateFactSets().iterator(); sItr.hasNext();) { - Set<CandidateFact> factSet = sItr.next(); - boolean evaluableInSet = false; - for (CandidateFact cfact : factSet) { - if (ec.isEvaluable(cfact)) { - evaluableInSet = true; - } - } - if (!evaluableInSet) { - log.info("Not considering fact table set:{} as {} is not evaluable", factSet, ec.exprCol.getName()); + for (Iterator<Candidate> sItr = cubeql.getCandidates().iterator(); sItr.hasNext(); ) { + Candidate cand = sItr.next(); + if (!cand.isExpressionEvaluable(ec)) { + log.info("Not considering Candidate :{} as {} is not evaluable", cand, ec.exprCol.getName()); sItr.remove(); } } } else { - for (Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); i.hasNext();) { - CandidateFact cfact = i.next(); - if (!ec.isEvaluable(cfact)) { - log.info("Not considering fact table:{} as {} is not evaluable", cfact, ec.exprCol.getName()); - cubeql.addFactPruningMsgs(cfact.fact, - CandidateTablePruneCause.expressionNotEvaluable(ec.exprCol.getName())); - i.remove(); + // prune dimension only expressions + Set<StorageCandidate> storageCandidates = CandidateUtil.getStorageCandidates(cubeql.getCandidates()); + for (StorageCandidate sc : storageCandidates) { + if (!sc.isExpressionEvaluable(ec)) { + Collection<Candidate> prunedCandidates = + CandidateUtil.filterCandidates(cubeql.getCandidates(), sc); + log.info("Not considering candidate(s) :{} as expr :{} in storage :{} is not evaluable", + prunedCandidates, ec.exprCol.getName(), sc); + cubeql.addStoragePruningMsg(sc, + CandidateTablePruneCause.expressionNotEvaluable(ec.exprCol.getName())); } } - } } } } - cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCode.EXPRESSION_NOT_EVALUABLE); } + } // prune candidate dims without any valid expressions if (cubeql.getDimensions() != null && !cubeql.getDimensions().isEmpty()) { for (Dimension dim : cubeql.getDimensions()) { http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java new file mode 100644 index 0000000..7781ba6 --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java @@ -0,0 +1,119 @@ +package org.apache.lens.cube.parse; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.Set; + +import org.apache.lens.cube.metadata.FactPartition; +import org.apache.lens.cube.metadata.TimeRange; +import org.apache.lens.server.api.error.LensException; + +import lombok.Getter; + +/** + * Represents a join of two candidates + */ +public class JoinCandidate implements Candidate { + + /** + * Child candidates that will participate in the join + */ + private Candidate childCandidate1; + private Candidate childCandidate2; + private String toStr; + @Getter + private String alias; + + public JoinCandidate(Candidate childCandidate1, Candidate childCandidate2, String alias) { + this.childCandidate1 = childCandidate1; + this.childCandidate2 = childCandidate2; + this.alias = alias; + } + + private String getJoinCondition() { + return null; + } + + @Override + public String toHQL() { + return null; + } + + @Override + public QueryAST getQueryAst() { + return null; + } + + @Override + public Collection<String> getColumns() { + return null; + } + + @Override + public Date getStartTime() { + return childCandidate1.getStartTime().after(childCandidate2.getStartTime()) + ? childCandidate1.getStartTime() + : childCandidate2.getStartTime(); + } + + @Override + public Date getEndTime() { + return childCandidate1.getEndTime().before(childCandidate2.getEndTime()) + ? childCandidate1.getEndTime() + : childCandidate2.getEndTime(); + } + + @Override + public double getCost() { + return childCandidate1.getCost() + childCandidate2.getCost(); + } + + @Override + public boolean contains(Candidate candidate) { + if (this.equals(candidate)) { + return true; + } else + return childCandidate1.contains(candidate) || childCandidate2.contains(candidate); + } + + @Override + public Collection<Candidate> getChildren() { + return new ArrayList() {{ + add(childCandidate1); + add(childCandidate2); + }}; + } + + /** + * @param timeRange + * @return + */ + @Override + public boolean evaluateCompleteness(TimeRange timeRange, boolean failOnPartialData) throws LensException { + return this.childCandidate1.evaluateCompleteness(timeRange, failOnPartialData) && this.childCandidate2 + .evaluateCompleteness(timeRange, failOnPartialData); + } + + @Override + public Set<FactPartition> getParticipatingPartitions() { + return null; + } + + @Override + public boolean isExpressionEvaluable(ExpressionResolver.ExpressionContext expr) { + return childCandidate1.isExpressionEvaluable(expr) || childCandidate1.isExpressionEvaluable(expr); + } + + @Override + public String toString() { + if (this.toStr == null) { + this.toStr = getToString(); + } + return this.toStr; + } + + private String getToString() { + return this.toStr = "JOIN[" + childCandidate1.toString() + ", " + childCandidate2.toString() + "]"; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java index 7b865bf..0370964 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java @@ -43,7 +43,10 @@ import lombok.extern.slf4j.Slf4j; class JoinResolver implements ContextRewriter { private Map<AbstractCubeTable, JoinType> tableJoinTypeMap; private AbstractCubeTable target; - private HashMap<Dimension, List<JoinChain>> dimensionInJoinChain = new HashMap<Dimension, List<JoinChain>>(); + /** + * Dimension as key and all the participating join chains for this dimension as value. + */ + private HashMap<Dimension, List<JoinChain>> dimensionToJoinChainsMap = new HashMap<Dimension, List<JoinChain>>(); public JoinResolver(Configuration conf) { } @@ -95,10 +98,10 @@ class JoinResolver implements ContextRewriter { dims.add(chain.getDestTable()); for (String dim : dims) { Dimension dimension = cubeql.getMetastoreClient().getDimension(dim); - if (dimensionInJoinChain.get(dimension) == null) { - dimensionInJoinChain.put(dimension, new ArrayList<JoinChain>()); + if (dimensionToJoinChainsMap.get(dimension) == null) { + dimensionToJoinChainsMap.put(dimension, new ArrayList<JoinChain>()); } - dimensionInJoinChain.get(dimension).add(chain); + dimensionToJoinChainsMap.get(dimension).add(chain); } } } @@ -143,7 +146,7 @@ class JoinResolver implements ContextRewriter { Map<Aliased<Dimension>, List<JoinPath>> multipleJoinPaths = new LinkedHashMap<>(); - // populate paths from joinchains + // populate paths from joinchains. For a destination Dimension get all the join paths that lead to it. for (JoinChain chain : cubeql.getJoinchains().values()) { Dimension dimension = cubeql.getMetastoreClient().getDimension(chain.getDestTable()); Aliased<Dimension> aliasedDimension = Aliased.create(dimension, chain.getName()); @@ -153,6 +156,7 @@ class JoinResolver implements ContextRewriter { multipleJoinPaths.get(aliasedDimension).addAll( chain.getRelationEdges(cubeql.getMetastoreClient())); } + boolean flattenBridgeTables = cubeql.getConf().getBoolean(CubeQueryConfUtil.ENABLE_FLATTENING_FOR_BRIDGETABLES, CubeQueryConfUtil.DEFAULT_ENABLE_FLATTENING_FOR_BRIDGETABLES); String bridgeTableFieldAggr = cubeql.getConf().get(CubeQueryConfUtil.BRIDGE_TABLE_FIELD_AGGREGATOR, http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java index 97accbb..077c0d2 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java @@ -38,32 +38,24 @@ public class LightestFactResolver implements ContextRewriter { @Override public void rewriteContext(CubeQueryContext cubeql) throws LensException { - if (cubeql.getCube() != null && !cubeql.getCandidateFactSets().isEmpty()) { - Map<Set<CandidateFact>, Double> factWeightMap = new HashMap<Set<CandidateFact>, Double>(); + if (cubeql.getCube() != null && !cubeql.getCandidates().isEmpty()) { + Map<Candidate, Double> factWeightMap = new HashMap<Candidate, Double>(); - for (Set<CandidateFact> facts : cubeql.getCandidateFactSets()) { - factWeightMap.put(facts, getWeight(facts)); + for (Candidate cand : cubeql.getCandidates()) { + factWeightMap.put(cand, cand.getCost()); } double minWeight = Collections.min(factWeightMap.values()); - for (Iterator<Set<CandidateFact>> i = cubeql.getCandidateFactSets().iterator(); i.hasNext();) { - Set<CandidateFact> facts = i.next(); - if (factWeightMap.get(facts) > minWeight) { - log.info("Not considering facts:{} from candidate fact tables as it has more fact weight:{} minimum:{}", - facts, factWeightMap.get(facts), minWeight); + for (Iterator<Candidate> i = cubeql.getCandidates().iterator(); i.hasNext();) { + Candidate cand = i.next(); + if (factWeightMap.get(cand) > minWeight) { + log.info("Not considering candidate:{} from final candidates as it has more fact weight:{} minimum:{}", + cand, factWeightMap.get(cand), minWeight); + cubeql.addCandidatePruningMsg(cand, new CandidateTablePruneCause(CandidateTablePruneCode.MORE_WEIGHT)); i.remove(); } } - cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCode.MORE_WEIGHT); } } - - private Double getWeight(Set<CandidateFact> set) { - Double weight = 0.0; - for (CandidateFact f : set) { - weight += f.fact.weight(); - } - return weight; - } } http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java index 45824fe..57c9c44 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java @@ -67,6 +67,8 @@ class MaxCoveringFactResolver implements ContextRewriter { // For each part column, which candidate fact sets are covering how much amount. // Later, we'll maximize coverage for each queried part column. Map<String, Map<Set<CandidateFact>, Long>> partCountsPerPartCol = Maps.newHashMap(); + //TODO union: max covering set will be calculated based on List<Candidate> + //TODO union: Each candidate will provide Set<FactPartion> using {@link Candidate.getParticipatingPartitions} for (Set<CandidateFact> facts : cubeql.getCandidateFactSets()) { for (Map.Entry<String, Long> entry : getTimeCoveredForEachPartCol(facts).entrySet()) { if (!partCountsPerPartCol.containsKey(entry.getKey())) { @@ -114,6 +116,7 @@ class MaxCoveringFactResolver implements ContextRewriter { } // We prune those candidate fact set, whose dataCompletenessFactor is less than maxDataCompletenessFactor + //TODO union : This needs to work on List<Candidate> Iterator<Set<CandidateFact>> iter = cubeql.getCandidateFactSets().iterator(); while (iter.hasNext()) { Set<CandidateFact> facts = iter.next(); @@ -127,6 +130,7 @@ class MaxCoveringFactResolver implements ContextRewriter { cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.incompletePartitions(null)); } + //TODO union : This needs to work on Candidate private float computeDataCompletenessFactor(Set<CandidateFact> facts) { float completenessFactor = 0f; int numPartition = 0; http://git-wip-us.apache.org/repos/asf/lens/blob/b6f0cc3d/lens-cube/src/main/java/org/apache/lens/cube/parse/PruneCauses.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/PruneCauses.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/PruneCauses.java index 9b5a52f..c17e5bf 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/PruneCauses.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/PruneCauses.java @@ -36,7 +36,7 @@ import lombok.Data; import lombok.Getter; import lombok.NoArgsConstructor; -public class PruneCauses<T extends AbstractCubeTable> extends HashMap<T, List<CandidateTablePruneCause>> { +public class PruneCauses<T> extends HashMap<T, List<CandidateTablePruneCause>> { @Getter(lazy = true) private final HashMap<CandidateTablePruneCause, List<T>> reversed = reverse(); @Getter(lazy = true) @@ -66,7 +66,7 @@ public class PruneCauses<T extends AbstractCubeTable> extends HashMap<T, List<Ca get(table).add(msg); } - public HashMap<CandidateTablePruneCause, List<T>> reverse() { + private HashMap<CandidateTablePruneCause, List<T>> reverse() { HashMap<CandidateTablePruneCause, List<T>> result = new HashMap<CandidateTablePruneCause, List<T>>(); for (T key : keySet()) { for (CandidateTablePruneCause value : get(key)) { @@ -103,7 +103,7 @@ public class PruneCauses<T extends AbstractCubeTable> extends HashMap<T, List<Ca Map<CandidateTablePruneCause, String> maxCauseMap = Maps.newHashMap(); for (Map.Entry<CandidateTablePruneCause, List<T>> entry: getReversed().entrySet()) { if (entry.getKey().getCause().equals(maxCause)) { - maxCauseMap.put(entry.getKey(), StringUtils.join(entry.getValue(), ",")); + maxCauseMap.put(entry.getKey(), StringUtils.join(entry.getValue(), ",")); } } return maxCause.getBriefError(maxCauseMap.keySet());
